In [1]:
import os
import sys
import json
import pandas as pd

from collections import defaultdict

In [2]:
glue = ['mnli'] #["cola", "mnli", "mrpc", "qnli", "qqp", "rte", "sst2", 'squad', 'squad_v2']

dirs_per_task_topk = defaultdict(list)
dirs_per_task_trimmed = defaultdict(list)

for task in glue:
    for dirs in os.listdir(f"../../../../MoEBERT-results/{task}"):
        if dirs.startswith("new_moebert_k2_experiment"):
            #print(dirs)
            if 'trl_' in dirs:
                dirs_per_task_trimmed[task].append(f"../../../../MoEBERT-results/{task}/{dirs}")
            else:
                # add the full path to the list
                dirs_per_task_topk[task].append(f"../../../../MoEBERT-results/{task}/{dirs}")    

In [3]:
def get_best_metric(list, metric_name):
    best_metric = 0
    best_epoch = 0
    for item_dict in list:
        if f'eval_{metric_name}' in item_dict:
            if item_dict[f'eval_{metric_name}'] > best_metric:
                best_metric = item_dict[f'eval_{metric_name}']
                best_epoch = item_dict['epoch']
    return best_metric, best_epoch

In [4]:
# create a dataframe with task, dis, wdec given the path
# for each of them, get the best metric, given the model in the path

metric_for_task = {
    "cola": "matthews_correlation",
    "mnli": "accuracy",
    "mrpc": "accuracy",
    "qnli": "accuracy",
    "qqp": "f1",
    "rte": "accuracy",
    "sst2": "accuracy",
    "squad" : "f1",
    "squad_v2": "f1",
}

df_results = pd.DataFrame(columns=["task", "dis", "wdec", "value", "epoch", "seed"])

for task in glue:
    for path in dirs_per_task_topk[task]:
        #print(path)
        try:
            with open(f"{path}/model/trainer_state.json", "r") as f:
                results = json.load(f)
            best_result, best_epoch = get_best_metric(results['log_history'], metric_for_task[task])
        except FileNotFoundError:
            try:
                with open(f"{path}/all_results.json", "r") as f:
                    best_epoch = 10
                    best_result = json.load(f)[metric_for_task[task]]
            except FileNotFoundError:
                best_result = 0
                best_epoch = 0
        dis = path.split("/")[-1].split("_")[6]
        wdec = path.split("/")[-1].split("_")[8]
        try:
            seed = path.split("/")[-1].split("_")[10]
        except:
            seed = "0"
        df_results.loc[len(df_results)] = pd.Series({"task": task, "dis": dis, "wdec": wdec, "value": best_result, "epoch": best_epoch, "seed": seed})
#         df_results = df_results.append({"task": task, "dis": dis, "wdec": wdec, "value": best_result, "epoch": best_epoch, "seed": seed}, ignore_index=True)


In [5]:
path.split("/")[-1].split("_")

['new',
 'moebert',
 'k2',
 'experiment',
 'topk',
 'dis',
 '1',
 'wdec',
 '0.1',
 'seed',
 '2']

In [6]:
# number of experiments per task
print(df_results.groupby("task").count()['value'])

task
mnli    15
Name: value, dtype: int64


In [7]:
# get the best value for each task, and the corresponding dis and wdec and epoch
best_results = pd.DataFrame(columns=["task", "dis", "wdec", "value", "epoch", "seed"])
for task in glue:
    best_results = pd.concat([best_results, pd.DataFrame([df_results[df_results["task"] == task].sort_values(by="value", ascending=False).iloc[0]])], axis=0)
#     best_results = best_results.append(df_results[df_results["task"] == task].sort_values(by="value", ascending=False).iloc[0])

In [8]:
best_results

Unnamed: 0,task,dis,wdec,value,epoch,seed
5,mnli,1,0,0.851404,8.8,2


In [9]:
df_results

Unnamed: 0,task,dis,wdec,value,epoch,seed
0,mnli,5,0.0,0.847742,8.8,2
1,mnli,5,0.1,0.846928,9.78,2
2,mnli,2,0.01,0.849573,8.8,2
3,mnli,4,0.1,0.847539,8.15,2
4,mnli,2,0.1,0.848861,7.82,2
5,mnli,1,0.0,0.851404,8.8,2
6,mnli,3,0.1,0.848352,8.8,2
7,mnli,3,0.0,0.850692,8.8,2
8,mnli,1,0.01,0.849166,7.82,2
9,mnli,4,0.0,0.848047,8.8,2


## Trimmed Lasso Gate results

In [10]:
df_results_trimmed = pd.DataFrame(columns=["task", "dis", "wdec", "trl", "value", "epoch", "seed"])

for task in glue:
    for path in dirs_per_task_trimmed[task]:
        try:
            with open(f"{path}/model/trainer_state.json", "r") as f:
                results = json.load(f)
            best_result, best_epoch = get_best_metric(results['log_history'], metric_for_task[task])
            print(best_result, best_epoch)
        except FileNotFoundError:
            try:
                with open(f"{path}/all_results.json", "r") as f:
                    best_epoch = 10
                    best_result = json.load(f)[metric_for_task[task]]
            except FileNotFoundError:
                best_result = 0
                best_epoch = 0
        dis = path.split("/")[-1].split("_")[5]
        wdec = path.split("/")[-1].split("_")[7]
        trl = path.split("/")[-1].split("_")[9]
        try:
            seed = path.split("/")[-1].split("_")[11]
        except:
            seed = "0"
        df_results_trimmed.loc[len(df_results_trimmed)] = pd.Series({"task": task, "dis": dis, "wdec": wdec, "trl": trl, "value": best_result, "epoch": best_epoch, "seed": seed})
#         df_results_trimmed = df_results_trimmed.append({"task": task, "dis": dis, "wdec": wdec, "trl": trl, "value": best_result, "epoch": best_epoch, "seed": seed}, ignore_index=True)


0.8512001627339301 6.52
0.8513018714401953 6.52
0.8461538461538461 3.1
0.8475386493083807 5.87
0.852013832384052 6.52
0.8482506102522376 8.15
0.8474369406021155 5.87
0.8463181448331977 7.17
0.8484540276647681 8.8
0.8509967453213995 8.15
0.8483523189585028 6.52
0.8479454841334418 8.8
0.8497762408462164 7.17
0.8495728234336859 7.17
0.849979658258747 8.15
0.8481489015459723 6.52
0.8465215622457283 6.84
0.8502847843775427 8.15
0.8478437754271766 5.87
0.8470301057770545 5.87
0.8491659886086249 8.15
0.8473352318958502 8.15
0.8469283970707893 5.87
0.8492676973148902 8.15
0.8479454841334418 6.52
0.8477420667209113 5.87
0.8461538461538461 3.1
0.8510984540276648 7.17
0.8485557363710333 6.52
0.8490642799023597 8.15
0.849979658258747 7.17
0.846826688364524 5.87
0.8463181448331977 5.87
0.8487591537835639 8.15
0.8521155410903173 6.52
0.8478437754271766 9.78
0.8478437754271766 5.87
0.849979658258747 7.17
0.8473352318958502 6.52
0.8489625711960944 5.87
0.8478437754271766 9.78
0.8461538461538461 3.1
0.

In [16]:
df_results_trimmed.sort_values(by=['dis','wdec','trl'])

Unnamed: 0,task,dis,wdec,trl,value,epoch,seed
2,mnli,1,0.0,0.0001,0.851302,6.52,2
79,mnli,1,0.0,0.001,0.851404,7.17,2
45,mnli,1,0.0,0.01,0.849675,8.15,2
65,mnli,1,0.0,0.1,0.851709,7.17,2
66,mnli,1,0.0,1.0,0.85181,7.17,2
10,mnli,1,0.0,5.0,0.850997,8.15,2
36,mnli,1,0.01,0.0001,0.852116,6.52,2
13,mnli,1,0.01,0.001,0.849776,7.17,2
53,mnli,1,0.01,0.01,0.850285,7.17,2
60,mnli,1,0.01,0.1,0.850081,7.17,2


In [12]:
print(df_results_trimmed.groupby("task").count()['value'])

task
mnli    80
Name: value, dtype: int64


In [13]:
# get the best value for each task, and the corresponding dis and wdec and epoch
best_results_trimmed = pd.DataFrame(columns=["task", "dis", "wdec", "trl", "value", "epoch", "seed"])
for task in glue:
    try:
        best_results_trimmed = pd.concat([best_results_trimmed, pd.DataFrame([df_results_trimmed[df_results_trimmed["task"] == task].sort_values(by="value", ascending=False).iloc[0]])], ignore_index=True)
#         best_results_trimmed = best_results_trimmed.append(df_results_trimmed[df_results_trimmed["task"] == task].sort_values(by="value", ascending=False).iloc[0], ignore_index=True)
    except IndexError:
        #print(task)
        pass

In [14]:
print(best_results_trimmed.to_markdown())

|    | task   |   dis |   wdec |    trl |    value |   epoch |   seed |
|---:|:-------|------:|-------:|-------:|---------:|--------:|-------:|
|  0 | mnli   |     1 |   0.01 | 0.0001 | 0.852116 |    6.52 |      2 |


In [15]:
best_results_trimmed.reset_index(drop=True, inplace=True)

In [16]:
best_results_trimmed

Unnamed: 0,task,dis,wdec,trl,value,epoch,seed
0,mnli,1,0.01,0.0001,0.852116,6.52,2


In [17]:
best_results[best_results['task'] != 'mnli'].reset_index(drop=True)

Unnamed: 0,task,dis,wdec,value,epoch,seed


In [None]:
# Winning models (when we do better than the best baseline)
best_results_trimmed[best_results_trimmed['value'] >= best_results[best_results['task'] != 'mnli'].reset_index(drop=True)['value']]

In [16]:
df_results_trimmed = pd.DataFrame(columns=["task", "dis", "wdec", "trl", "value", "epoch", "seed"])

for task in glue:
    for path in dirs_per_task_trimmed[task]:
        try:
            with open(f"{path}/model/all_results.json", "r") as f:
                best_epoch = 10
                best_result = json.load(f)[metric_for_task[task]]
        except FileNotFoundError:
            best_result = 0
            best_epoch = 0
        dis = path.split("/")[-1].split("_")[5]
        wdec = path.split("/")[-1].split("_")[7]
        trl = path.split("/")[-1].split("_")[9]
        try:
            seed = path.split("/")[-1].split("_")[11]
        except:
            seed = "0"
        df_results_trimmed = df_results_trimmed.append({"task": task, "dis": dis, "wdec": wdec, "trl": trl, "value": best_result, "epoch": best_epoch, "seed": seed}, ignore_index=True)


KeyError: 'accuracy'

In [17]:
df_results_trimmed

Unnamed: 0,task,dis,wdec,trl,value,epoch,seed


In [45]:
task_names = ['mnli']   #(rte mrpc sst2 qnli qqp qnli cola) # mnli not included
trimmed_lasso_regs = [0.0001]    #(0.0001 0.001 0.01 0.1 1.0 5.0 10.0)
weight_decays = [0, 0.01, 0.1]
distillations = [1, 2, 3, 4, 5]
gates = ['lasso']



In [54]:
df = pd.DataFrame(columns=["task", "distillation","weight_decay","trimmed_lasso_reg","gate"])
for TASK_ID in range(0,15):
    task = task_names[TASK_ID%1]
    weight_decay = weight_decays[TASK_ID%3]
    distillation = distillations[TASK_ID%5]
    trimmed_lasso_reg = trimmed_lasso_regs[TASK_ID%1]
    gate = gates[TASK_ID%1]
    df.loc[len(df)] = pd.Series(
        {
            "task": task,
            "distillation": distillation,
            "weight_decay": weight_decay,
            "trimmed_lasso_reg": trimmed_lasso_reg,
            "gate": gate,
        }
    )


In [55]:
df

Unnamed: 0,task,distillation,weight_decay,trimmed_lasso_reg,gate
0,mnli,1,0.0,0.0001,lasso
1,mnli,2,0.01,0.0001,lasso
2,mnli,3,0.1,0.0001,lasso
3,mnli,4,0.0,0.0001,lasso
4,mnli,5,0.01,0.0001,lasso
5,mnli,1,0.1,0.0001,lasso
6,mnli,2,0.0,0.0001,lasso
7,mnli,3,0.01,0.0001,lasso
8,mnli,4,0.1,0.0001,lasso
9,mnli,5,0.0,0.0001,lasso


In [15]:
pd.set_option('display.max_rows', 500)

In [57]:
df.drop_duplicates().shape

(15, 5)