In [1]:
import os
import sys
import json
import pandas as pd

from collections import defaultdict

In [2]:
glue = ['squad', 'squad_v2'] #["cola", "mnli", "mrpc", "qnli", "qqp", "rte", "sst2", 'squad', 'squad_v2']

dirs_per_task_topk = defaultdict(list)
dirs_per_task_trimmed = defaultdict(list)

for task in glue:
    for dirs in os.listdir(f"../../results/{task}"):
        if dirs.startswith("new_moebert_k2_experiment") and not dirs.startswith("new_moebert_k2_experiment_topk"):
            #print(dirs)
            if 'trl_' in dirs:
                dirs_per_task_trimmed[task].append(f"../../results/{task}/{dirs}")
        elif dirs.startswith("new_moebert_k2_experiment_topk"):
                # add the full path to the list
            dirs_per_task_topk[task].append(f"../../results/{task}/{dirs}")    

In [3]:
def get_best_metric(list, metric_name):
    best_metric = 0
    best_epoch = 0
    for item_dict in list:
        if f'eval_{metric_name}' in item_dict:
            if item_dict[f'eval_{metric_name}'] > best_metric:
                best_metric = item_dict[f'eval_{metric_name}']
                best_epoch = item_dict['epoch']
    return best_metric, best_epoch

def get_best_metric_squad(list, metric_name):
    best_metric = 0
    best_epoch = 0
    for item_dict in list:
        if 'exact_match' in item_dict or 'exact' in item_dict:
            if item_dict['f1'] > best_metric:
                best_metric = item_dict['f1']
                best_epoch = item_dict['epoch']
    return best_metric, best_epoch



In [4]:
# create a dataframe with task, dis, wdec given the path
# for each of them, get the best metric, given the model in the path

metric_for_task = {
    "cola": "matthews_correlation",
    "mnli": "accuracy",
    "mrpc": "accuracy",
    "qnli": "accuracy",
    "qqp": "f1",
    "rte": "accuracy",
    "sst2": "accuracy",
    "squad" : "f1",
    "squad_v2": "f1",
}

df_results = pd.DataFrame(columns=["task", "dis", "wdec", "value", "epoch", "seed"])

for task in glue:
    for path in dirs_per_task_topk[task]:
        #print(path)
        try:
            with open(f"{path}/model/trainer_state.json", "r") as f:
                results = json.load(f)
            best_result, best_epoch = get_best_metric_squad(results['log_history'], metric_for_task[task])
        except FileNotFoundError:
            try:
                with open(f"{path}/all_results.json", "r") as f:
                    best_epoch = 10
                    best_result = json.load(f)[metric_for_task[task]]
            except FileNotFoundError:
                best_result = 0
                best_epoch = 0
        dis = path.split("/")[-1].split("_")[6]
        wdec = path.split("/")[-1].split("_")[8]
        try:
            seed = path.split("/")[-1].split("_")[10]
        except:
            seed = "0"
        df_results = df_results.append({"task": task, "dis": dis, "wdec": wdec, "value": best_result, "epoch": best_epoch, "seed": seed}, ignore_index=True)


In [5]:
# number of experiments per task
print(df_results.groupby("task").count()['value'])

task
squad       15
squad_v2    15
Name: value, dtype: int64


In [6]:
df_results

Unnamed: 0,task,dis,wdec,value,epoch,seed
0,squad,1,0.1,88.056702,10.0,2
1,squad,3,0.01,88.25336,10.0,2
2,squad,5,0.1,88.217408,9.0,2
3,squad,4,0.0,88.334108,10.0,2
4,squad,3,0.0,88.330693,9.0,2
5,squad,1,0.0,88.240684,10.0,2
6,squad,2,0.01,88.267063,9.0,2
7,squad,4,0.1,88.252166,10.0,2
8,squad,5,0.01,88.235082,10.0,2
9,squad,5,0.0,88.251521,9.0,2


In [7]:
# get the best value for each task, and the corresponding dis and wdec and epoch
best_results = pd.DataFrame(columns=["task", "dis", "wdec", "value", "epoch", "seed"])
for task in glue:
    try:
        best_results = best_results.append(df_results[df_results["task"] == task].sort_values(by="value", ascending=False).iloc[0], ignore_index=True)
    except IndexError:
        #print(task)
        pass

In [9]:
print(best_results.to_markdown())

|    | task     |   dis |   wdec |   value |   epoch |   seed |
|---:|:---------|------:|-------:|--------:|--------:|-------:|
|  0 | squad    |     4 |   0.01 | 88.3786 |       9 |      2 |
|  1 | squad_v2 |     4 |   0.01 | 78.7283 |       6 |      2 |


## Trimmed Lasso Gate results

In [9]:
df_results_trimmed = pd.DataFrame(columns=["task", "dis", "wdec", "trl", "value", "epoch", "seed"])

for task in glue:
    for path in dirs_per_task_trimmed[task]:
        try:
            with open(f"{path}/model/trainer_state.json", "r") as f:
                results = json.load(f)
            best_result, best_epoch = get_best_metric_squad(results['log_history'], metric_for_task[task])
            #print(best_result, best_epoch)
        except FileNotFoundError:
            try:
                with open(f"{path}/all_results.json", "r") as f:
                    best_epoch = 10
                    best_result = json.load(f)[metric_for_task[task]]
            except FileNotFoundError:
                best_result = 0
                best_epoch = 0
        dis = path.split("/")[-1].split("_")[5]
        wdec = path.split("/")[-1].split("_")[7]
        trl = path.split("/")[-1].split("_")[9]
        try:
            seed = path.split("/")[-1].split("_")[11]
        except:
            seed = "0"
        df_results_trimmed = df_results_trimmed.append({"task": task, "dis": dis, "wdec": wdec, "trl": trl, "value": best_result, "epoch": best_epoch, "seed": seed}, ignore_index=True)


In [10]:
print(df_results_trimmed.groupby("task").count()['value'])

task
squad       101
squad_v2    100
Name: value, dtype: int64


In [11]:
# get the best value for each task, and the corresponding dis and wdec and epoch
best_results_trimmed = pd.DataFrame(columns=["task", "dis", "wdec", "trl", "value", "epoch", "seed"])
for task in glue:
    try:
        best_results_trimmed = best_results_trimmed.append(df_results_trimmed[df_results_trimmed["task"] == task].sort_values(by="value", ascending=False).iloc[0], ignore_index=True)
    except IndexError:
        #print(task)
        pass

In [12]:
print(best_results_trimmed.to_markdown())

|    | task     |   dis |   wdec |    trl |   value |   epoch |   seed |
|---:|:---------|------:|-------:|-------:|--------:|--------:|-------:|
|  0 | squad    |     5 |   0.01 | 5      | 88.4301 |      10 |      2 |
|  1 | squad_v2 |     2 |   0    | 0.0001 | 79.2045 |       7 |      2 |
