Let set up the basic Chameleon Configuration


In [1]:
import chi, os, time
from chi import lease
from chi import server

PROJECT_NAME = os.getenv('OS_PROJECT_NAME') # change this if you need to
chi.use_site("CHI@UC")
chi.set("project_name", PROJECT_NAME)
username = os.getenv('USER') # all exp resources will have this prefix

ModuleNotFoundError: No module named 'chi'

Set the `NODE_TYPE` to resource server where you performed your experiment

In [2]:
NODE_TYPE="gpu_rtx_6000"

Lets access the resources where our evaluation was conducted

In [3]:
l = lease.get_lease(f"colab-{username}-{NODE_TYPE}-v2")
reservation_id = lease.get_node_reservation(l["id"])
server_id = server.get_server_id(f"colab-{username}-{NODE_TYPE}-v2")
server.wait_for_active(server_id)
reserved_fip = [d['addr'] for d in chi.server.show_server(server_id).addresses['sharednet1'] if d['OS-EXT-IPS:type']=='floating'][0]

### Transfer raw results from the resource server to the jupyter server

In [7]:
from chi import ssh
node = ssh.Remote(reserved_fip)

In [64]:
# Define the remote directory path and the local path to download to
remote_directory = 'results/'
archive_name='covost2_results.tar.gz'


The ssh implementation provided by `python-chi` as a wrapper over Fabric, only allows for single file transfer. In order to transfer the directory we would nead to archive the entire remote directory then transfer the archive file

In [66]:
node.run(f'tar -czf {archive_name} -C {remote_directory} .')

<Result cmd='tar -czf covost2_results.tar.gz -C results/ .' exited=0>

In [67]:
node.get(archive_name)

<fabric.transfer.Result at 0x7f001be87730>

In [69]:
import tarfile
with tarfile.open(archive_name) as tar:
    tar.extractall(path=remote_directory)

## Reproduced results summaries

### CoVoST2 evaluation summary

Lets first summarize the results of the CoVoST2 evaluation. We have the scores for four models on this data for X→eng translations for 21 languages. These models are: 

1) Whisper 
2) XLS-R
3) Seamless Medium
4) Seamless Large


#### Divide language in different categories


While evaluating performance in terms of translation capabilities, we need to divide our languages between high, mid and low resource categories depending on what amount of data is available in each language. This distribution has been provided by Babu et al.,2021 in their XLS-R [paper](https://arxiv.org/pdf/2111.09296.pdf).

In [35]:
res_levels=["low_res","mid_res","high_res"]

high_res=['ca','de','fr','es']
mid_res=['zh-CN','fa','it','ru','pt']
low_res=['mn','ta','lv','et','cy','sl','ja','tr','ar','nl','sv-SE','id']


In [73]:
import collections
def resource_level_results(scores,model_name):
    res_scores=collections.defaultdict(float)
    for level in res_levels:
        for lang in eval(level):
            res_scores[level]+=scores[lang]
        res_scores['all']+=res_scores[level]
        res_scores[level]/=len(eval(level))
    res_scores['all']/=21.0
    return {
      "Model":model_name,
      "High" : round(res_scores["high_res"],1),
      "Mid" : round(res_scores["mid_res"],1),
      "Low" : round(res_scores["low_res"],1),
      "All" : round(res_scores['all'],1)
    }

In [151]:
import os, json
re_results=[]
score_directory='results/covost2/scores/'
#read all the json files
for files in os.listdir(score_directory):
    with open(score_directory+files) as f:
        scores=json.load(f)
        model_name=files.split('.')[0]
        re_results.append(resource_level_results(scores,model_name))

re_results_df=pd.DataFrame(re_results)




In [164]:
re_results_df.to_json('claims/re_covost_summary.json',orient='split')

## Whisper Claim 1

In [179]:
#read the claim data
import pandas as pd
claims_df= pd.read_json('claims/whisper_claim_1.json',orient='split')

In [165]:
claims_df

Unnamed: 0,Model,High,Mid,Low,All
0,XMEF-X,34.2,20.2,5.9,14.7
1,XLS-R (2B),36.1,27.7,15.1,22.1
2,mSLAM-CTC (2B),37.8,29.6,18.5,24.8
3,Maestro,38.2,31.3,18.4,25.2
4,Zero-Shot Whisper,36.2,32.6,25.2,29.1


In [178]:
re_results_df= pd.read_json('claims/re_covost_summary.json',orient='split')
re_results_df

Unnamed: 0,Model,High,Mid,Low,All
0,Seamless Medium,37.3,33.6,28.3,31.3
1,Seamless Large,39.3,36.2,31.9,34.3
2,XLS-R (2B),36.0,27.8,15.4,22.3
3,Whisper Large-v2,35.2,32.6,23.8,28.1


In [167]:
from fuzzywuzzy import process

In [168]:
def apply_brackets(claims_df, avoid_cols=["Model"]):
    df=claims_df.copy()
    for col in df.columns:
        if col not in avoid_cols:
            df[col]=df[col].apply(lambda x: f"({x})")
    return df

In [169]:
def fuzzy_combine_results(claims_df, re_results_df, value_cols=['High', 'Mid', 'Low', 'All']):
    combined_results=claims_df.copy()

    # Create a set to keep track of matched models from final_results_df
    matched_models = set()

    # ensuring that each model from final_results_df is only used once
    for index, claim_model in claims_df['Model'].items():

        lookup_list=[model for model in re_results_df['Model'] if model not in matched_models]

        if not lookup_list:
            break

        # Extract best match that hasn't been used already
        best_match, score = process.extractOne(
            claim_model,
            lookup_list
            
        )

        # If a good match is found and not already used
        if best_match and score >= 50:
            # Mark this model as matched to prevent further matches
            matched_models.add(best_match)

            # For each score column, append the final_results value to the claims value
            for col in value_cols:
                final_value = re_results_df.loc[re_results_df['Model'] == best_match, col].iloc[0]
                combined_results.at[index, col] = f"{final_value} {combined_results.at[index, col]}"

    return combined_results
    
        

In [170]:
merged_df=fuzzy_combine_results(apply_brackets(claims_df), re_results_df)
merged_df

Unnamed: 0,Model,High,Mid,Low,All
0,XMEF-X,(34.2),(20.2),(5.9),(14.7)
1,XLS-R (2B),36.0 (36.1),27.8 (27.7),15.4 (15.1),22.3 (22.1)
2,mSLAM-CTC (2B),(37.8),(29.6),(18.5),(24.8)
3,Maestro,(38.2),(31.3),(18.4),(25.2)
4,Zero-Shot Whisper,35.2 (36.2),32.6 (32.6),23.8 (25.2),28.1 (29.1)


In [171]:
merged_df.to_json('claims/re_whisper_claim_1.json',orient='split')

## Seamless Claim 1

In [180]:
#read the claim data
claims_df= pd.read_json('claims/seamless_claim_1.json',orient='split')

In [181]:
claims_df

Unnamed: 0,Model,size,FLEURS X→eng (n=81),FLEURS eng→X (n=88),CoVoST 2 X→eng (n=21),CoVoST 2 eng→X (n=15)
0,XLS-R-2B-S2T,2.6B,,x,22.1,27.8
1,WHISPER-LARGE-v2,1.5B,17.9,x,29.1,x
2,AUDIOPaLM-2-8B-AST,8.0B,19.7,x,37.8,x
3,SEAMLESSM4T-MEDIUM,1.2B,20.9,19.2,29.8,26.6
4,SEAMLESSM4T-LARGE,2.3B,24.0,21.5,34.1,30.6


#### CoVoST2 X→eng related entries

In [182]:
re_results_df=pd.read_json('claims/re_covost_summary.json',orient='split')
re_results_df

Unnamed: 0,Model,High,Mid,Low,All
0,Seamless Medium,37.3,33.6,28.3,31.3
1,Seamless Large,39.3,36.2,31.9,34.3
2,XLS-R (2B),36.0,27.8,15.4,22.3
3,Whisper Large-v2,35.2,32.6,23.8,28.1


In [183]:
#keep only Model and All columns and rename All column to CoVoST 2 X→eng (n=21)

re_results_df=re_results_df[['Model','All']]
re_results_df.rename(columns = {'All':'CoVoST 2 X→eng (n=21)'}, inplace = True)

re_results_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  re_results_df.rename(columns = {'All':'CoVoST 2 X→eng (n=21)'}, inplace = True)


Unnamed: 0,Model,CoVoST 2 X→eng (n=21)
0,Seamless Medium,31.3
1,Seamless Large,34.3
2,XLS-R (2B),22.3
3,Whisper Large-v2,28.1


In [184]:
merged_df=fuzzy_combine_results(apply_brackets(claims_df,["Model","size"]), re_results_df,value_cols=['CoVoST 2 X→eng (n=21)'])

In [185]:
merged_df

Unnamed: 0,Model,size,FLEURS X→eng (n=81),FLEURS eng→X (n=88),CoVoST 2 X→eng (n=21),CoVoST 2 eng→X (n=15)
0,XLS-R-2B-S2T,2.6B,(),(x),22.3 (22.1),(27.8)
1,WHISPER-LARGE-v2,1.5B,(17.9),(x),28.1 (29.1),(x)
2,AUDIOPaLM-2-8B-AST,8.0B,(19.7),(x),(37.8),(x)
3,SEAMLESSM4T-MEDIUM,1.2B,(20.9),(19.2),31.3 (29.8),(26.6)
4,SEAMLESSM4T-LARGE,2.3B,(24.0),(21.5),34.3 (34.1),(30.6)
