Let set up the basic Chameleon Configuration


In [1]:
import chi, os, time
from chi import lease
from chi import server

PROJECT_NAME = os.getenv('OS_PROJECT_NAME') # change this if you need to
chi.use_site("CHI@UC")
chi.set("project_name", PROJECT_NAME)
username = os.getenv('USER') # all exp resources will have this prefix

Now using CHI@UC:
URL: https://chi.uc.chameleoncloud.org
Location: Argonne National Laboratory, Lemont, Illinois, USA
Support contact: help@chameleoncloud.org


Set the `NODE_TYPE` to resource server where you performed your experiment

In [2]:
NODE_TYPE="gpu_rtx_6000"

Lets access the resources where our evaluation was conducted

In [3]:
l = lease.get_lease(f"colab-{username}-{NODE_TYPE}-v2")
reservation_id = lease.get_node_reservation(l["id"])
server_id = server.get_server_id(f"colab-{username}-{NODE_TYPE}-v2")
server.wait_for_active(server_id)
reserved_fip = [d['addr'] for d in chi.server.show_server(server_id).addresses['sharednet1'] if d['OS-EXT-IPS:type']=='floating'][0]

In [7]:
from chi import ssh
node = ssh.Remote(reserved_fip)

In [64]:
# Define the remote directory path and the local path to download to
remote_directory = 'results/'
archive_name='covost2_results.tar.gz'


The ssh implementation provided by `python-chi` as a wrapper over Fabric, only allows for single file transfer. In order to transfer the directory we would nead to archive the entire remote directory then transfer the archive file

In [66]:
node.run(f'tar -czf {archive_name} -C {remote_directory} .')

<Result cmd='tar -czf covost2_results.tar.gz -C results/ .' exited=0>

In [67]:
node.get(archive_name)

<fabric.transfer.Result at 0x7f001be87730>

In [69]:
import tarfile
with tarfile.open(archive_name) as tar:
    tar.extractall(path=remote_directory)

## Divide language in different categories


While evaluating performance in terms of translation capabilities, we need to divide our languages between high, mid and low resource categories depending on what amount of data is available in each language. This distribution has been provided by Babu et al.,2021 in their XLS-R [paper](https://arxiv.org/pdf/2111.09296.pdf).

In [71]:
res_levels=["low_res","mid_res","high_res"]

high_res=['ca','de','fr','es']
mid_res=['zh-CN','fa','it','ru','pt']
low_res=['mn','ta','lv','et','cy','sl','ja','tr','ar','nl','sv-SE','id']


In [72]:
def resource_level_results(scores,model_name):
    res_scores=collections.defaultdict(float)
    for level in res_levels:
        for lang in eval(level):
            res_scores[level]+=scores[lang]
        res_scores['all']+=res_scores[level]
        res_scores[level]/=len(eval(level))
    res_scores['all']/=21.0
    return {
      "Model":model_name,
      "High" : round(res_scores["high_res"],1),
      "Mid" : round(res_scores["mid_res"],1),
      "Low" : round(res_scores["low_res"],1),
      "All" : round(res_scores['all'],1)
    }

In [73]:
final_results=[]

lang_codes= low_res + mid_res +high_res

In [74]:
import pandas as pd

# Data as interpreted from the provided image
data = {
    "Model": ["XMEF-X", "XLS-R (2B)", "mSLAM-CTC (2B)", "Maestro", "Zero-Shot Whisper"],
    "High": [34.2, 36.1, 37.8, 38.2, 36.2],
    "Mid": [20.2, 27.7, 29.6, 31.3, 32.6],
    "Low": [5.9, 15.1, 18.5, 18.4, 25.2],
    "All": [14.7, 22.1, 24.8, 25.2, 29.1]
}

# Convert to pandas DataFrame
df = pd.DataFrame(data)

# Convert DataFrame to JSON
json_data = df.to_json(orient='split')
json_data


'{"columns":["Model","High","Mid","Low","All"],"index":[0,1,2,3,4],"data":[["XMEF-X",34.2,20.2,5.9,14.7],["XLS-R (2B)",36.1,27.7,15.1,22.1],["mSLAM-CTC (2B)",37.8,29.6,18.5,24.8],["Maestro",38.2,31.3,18.4,25.2],["Zero-Shot Whisper",36.2,32.6,25.2,29.1]]}'

In [76]:
pd.read_json(json_data, orient='split')

Unnamed: 0,Model,High,Mid,Low,All
0,XMEF-X,34.2,20.2,5.9,14.7
1,XLS-R (2B),36.1,27.7,15.1,22.1
2,mSLAM-CTC (2B),37.8,29.6,18.5,24.8
3,Maestro,38.2,31.3,18.4,25.2
4,Zero-Shot Whisper,36.2,32.6,25.2,29.1


In [79]:
import json
with open('claims/whisper_claim_covost2.json','w') as f:
          json.dump(json_data,f)