In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from collections import OrderedDict
import pandas as pd
import re
from tqdm import tqdm

from io import StringIO
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [2]:
def em_and_f1(p_url, acc, id="L1", rounded_to_num=5):

    page = requests.get(p_url)
    soup = BeautifulSoup(page.content)

    tr = soup.find('tr', attrs={'id': id})
    td = tr.find(attrs={'class': 'overflow-visible whitespace-pre px-3'}).text
    x = re.findall("\d+\.\d+", td)

    if acc == True: return round(float(x[0]), rounded_to_num)
    else: return round(float(x[1]), rounded_to_num)

# Intermediate task transfer-learning

In [3]:
prefix = "https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-"
suffix = "-LR-1e-05/blob/main/results/evaluation/metric_result.txt"

In [4]:
def take_all_url_ittl(data):

    ittl_arr = ["without-ITTL", "with-ITTL"]
    freeze_arr = ["without-freeze", "with-freeze"]

    url_arr = []
    for ittl in ittl_arr:
        for freeze in freeze_arr:

            if ittl == "without-ITTL":
                freeze = "without-freeze"
            
            url_arr.append(f"{prefix}{data}-with-xlm-roberta-large-{ittl}-{freeze}{suffix}")
    
    url_arr = list(OrderedDict.fromkeys(url_arr))
    print("Amount of url:", len(url_arr))
    return url_arr

In [5]:
def extract_values_ittl(url, data):

    ittl_pattern = re.compile(r'with-ITTL')
    freeze_pattern = re.compile(r'with-freeze')

    ittl_match = ittl_pattern.search(url)
    ittl = bool(ittl_match)

    freeze_match = freeze_pattern.search(url)
    freeze = bool(freeze_match)

    EM = em_and_f1(url, True)
    F1 = em_and_f1(url, False)

    if data == "IDK-MRC":
        data = "idkmrc"
    elif data == "TYDI-QA-ID":
        data = "tydiqaid"
    elif data == "Squad-ID":
        data = "squadid"

    return data, ittl, freeze, EM, F1

In [6]:
def create_table_ittl(url_list, data):
    
    data_list = []
    for url in tqdm(url_list, desc="Processing URLs", unit="URL"):
        extracted_values = extract_values_ittl(url, data)
        data_list.append(extracted_values)

    df = pd.DataFrame(data_list, columns=['data', 'ITTL', 'Freeze', 'EM', 'F1'])
    
    df.loc[0, 'ITTL'] = "Baseline"
    df.loc[0, 'Freeze'] = "Baseline"
    
    return df

## IDK-MRC

In [7]:
data = "IDK-MRC"
url_list = take_all_url_ittl(data)
table = pd.DataFrame(create_table_ittl(url_list, data))
table.to_excel(f'{data}-ittl.xlsx', index=False)
table

Amount of url: 3


Processing URLs: 100%|███████████████████████████████████████████████████████████████████| 3/3 [00:04<00:00,  1.53s/URL]


Unnamed: 0,data,ITTL,Freeze,EM,F1
0,idkmrc,Baseline,Baseline,77.94811,84.63667
1,idkmrc,True,False,77.94811,84.23181
2,idkmrc,True,True,78.30189,85.14123


In [8]:
print(f"{data} sorted by EM")
sorted_table = table.sort_values(by='EM', ascending=False)
sorted_table

IDK-MRC sorted by EM


Unnamed: 0,data,ITTL,Freeze,EM,F1
2,idkmrc,True,True,78.30189,85.14123
0,idkmrc,Baseline,Baseline,77.94811,84.63667
1,idkmrc,True,False,77.94811,84.23181


In [9]:
print(f"{data} sorted by F1")
sorted_table = table.sort_values(by='F1', ascending=False)
sorted_table

IDK-MRC sorted by F1


Unnamed: 0,data,ITTL,Freeze,EM,F1
2,idkmrc,True,True,78.30189,85.14123
0,idkmrc,Baseline,Baseline,77.94811,84.63667
1,idkmrc,True,False,77.94811,84.23181


## TYDI-QA-ID

In [10]:
data = "TYDI-QA-ID"
url_list = take_all_url_ittl(data)
table = pd.DataFrame(create_table_ittl(url_list, data))
table.to_excel(f'{data}-ittl.xlsx', index=False)
table

Amount of url: 3


Processing URLs: 100%|███████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.00URL/s]


Unnamed: 0,data,ITTL,Freeze,EM,F1
0,tydiqaid,Baseline,Baseline,66.51109,78.26768
1,tydiqaid,True,False,65.11085,76.39569
2,tydiqaid,True,True,66.51109,77.58959


In [11]:
print(f"{data} sorted by EM")
sorted_table = table.sort_values(by='EM', ascending=False)
sorted_table

TYDI-QA-ID sorted by EM


Unnamed: 0,data,ITTL,Freeze,EM,F1
0,tydiqaid,Baseline,Baseline,66.51109,78.26768
2,tydiqaid,True,True,66.51109,77.58959
1,tydiqaid,True,False,65.11085,76.39569


In [12]:
print(f"{data} sorted by F1")
sorted_table = table.sort_values(by='F1', ascending=False)
sorted_table

TYDI-QA-ID sorted by F1


Unnamed: 0,data,ITTL,Freeze,EM,F1
0,tydiqaid,Baseline,Baseline,66.51109,78.26768
2,tydiqaid,True,True,66.51109,77.58959
1,tydiqaid,True,False,65.11085,76.39569


## SQUAD-ID

In [13]:
data = "Squad-ID"
url_list = take_all_url_ittl(data)
table = pd.DataFrame(create_table_ittl(url_list, data))
table.to_excel(f'{data}-ittl.xlsx', index=False)
table

Amount of url: 3


Processing URLs: 100%|███████████████████████████████████████████████████████████████████| 3/3 [00:03<00:00,  1.00s/URL]


Unnamed: 0,data,ITTL,Freeze,EM,F1
0,squadid,Baseline,Baseline,49.54021,66.69283
1,squadid,True,False,49.94148,67.20374
2,squadid,True,True,50.40963,67.63862


In [14]:
print(f"{data} sorted by EM")
sorted_table = table.sort_values(by='EM', ascending=False)
sorted_table

Squad-ID sorted by EM


Unnamed: 0,data,ITTL,Freeze,EM,F1
2,squadid,True,True,50.40963,67.63862
1,squadid,True,False,49.94148,67.20374
0,squadid,Baseline,Baseline,49.54021,66.69283


In [15]:
print(f"{data} sorted by F1")
sorted_table = table.sort_values(by='F1', ascending=False)
sorted_table

Squad-ID sorted by F1


Unnamed: 0,data,ITTL,Freeze,EM,F1
2,squadid,True,True,50.40963,67.63862
1,squadid,True,False,49.94148,67.20374
0,squadid,Baseline,Baseline,49.54021,66.69283


# Inference with NLI validation

In [16]:
prefix = "https://huggingface.co/muhammadravi251001/fine-tuned-FilteringNLI-"
suffix = "/blob/main/results/evaluation/metric_result.txt"

suffix_df = "/raw/main/results/output/output_df.csv"
suffix_squadid_df = "/resolve/main/results/output/output_df.csv"

In [17]:
def take_all_url_nli_validator(data, df=False):
    
    if not df:
        baseline = f"{prefix}indonli_mnli-{data}-TQ2-TS4-MS3-VA0-TH0.0{suffix}"
    else:
        if data == "idkmrc" or data == "tydiqaid":
            baseline = f"{prefix}indonli_mnli-{data}-TQ2-TS4-MS3-VA0-TH0.0{suffix_df}"
        elif data == "squadid":
            baseline = f"{prefix}indonli_mnli-{data}-TQ2-TS4-MS3-VA0-TH0.0{suffix_squadid_df}"
    
    msc_arr = ["indonli", "indonli_mnli", f"indonli_mnli_{data}-nli"]
    tq_arr = [1, 2]
    msi_arr = [1, 2, 3]
    var_arr = [1, 2, 3]
    th_arr = [0.25, 0.5, 0.75]

    url_arr = []
    url_arr.append(baseline)

    for msc in msc_arr:
        for tq in tq_arr:
            for msi in msi_arr:
                for var in var_arr:
                    for th in th_arr:
                        
                        if msc == "indonli" or msc == "indonli_mnli":
                            var = 1
                            th = 0.0

                        if var == 1:
                            th = 0.0
                            
                        if (var == 3) and (msi == 1):
                            continue
                        
                        if not df:
                            url_arr.append(f"{prefix}{msc}-{data}-TQ{tq}-TS4-MS{msi}-VA{var}-TH{th}{suffix}")
                        else:
                            if data == "idkmrc" or data == "tydiqaid":
                                url_arr.append(f"{prefix}{msc}-{data}-TQ{tq}-TS4-MS{msi}-VA{var}-TH{th}{suffix_df}")
                            elif data == "squadid":
                                url_arr.append(f"{prefix}{msc}-{data}-TQ{tq}-TS4-MS{msi}-VA{var}-TH{th}{suffix_squadid_df}")
    
    url_arr = list(OrderedDict.fromkeys(url_arr))
    print("Amount of url:", len(url_arr))
    return url_arr

In [18]:
def take_dataframe(url): 
    
    response = requests.get(url)
    
    if response.status_code == 200:
        csv_data = StringIO(response.text)
        df = pd.read_csv(csv_data, index_col=0)
        df = df[['Prediction Answer', 'Rec. Pred Answer', 'Gold Answer', 'Properties']]
        df.fillna('', inplace=True)
    
    else:
        print("Failed to download CSV")
    
    return df

In [19]:
def take_accepted_and_rejected_qa_with_nli(url):
    
    df = take_dataframe(url)
    
    right_pred_model_qa_and_accepted_by_nli_ctr = 0
    right_pred_model_qa_and_rejected_by_nli_ctr = 0
    wrong_pred_model_qa_and_accepted_by_nli_ctr = 0
    wrong_pred_model_qa_and_rejected_by_nli_ctr = 0
    
    for i in range(len(df)):
        
        gold_answer = df['Gold Answer'][i]
        qa_generated_answer = eval(df['Rec. Pred Answer'][i])[0]
        nli_validated_answer = df['Prediction Answer'][i]
    
        # Jawaban dari model qa yg benar & di-accept sama model NLI
        right_pred_model_qa_and_accepted_by_nli = (gold_answer == qa_generated_answer) and \
            (qa_generated_answer == nli_validated_answer)

        # Jawaban dari model qa yg benar & di-reject sama model NLI
        right_pred_model_qa_and_rejected_by_nli = (gold_answer == qa_generated_answer) and \
            (qa_generated_answer != nli_validated_answer)

        # Jawaban dari model qa yg salah & di-accept sama model NLI
        wrong_pred_model_qa_and_accepted_by_nli = (gold_answer != qa_generated_answer) and \
            (qa_generated_answer == nli_validated_answer)

        # Jawaban dari model qa yg salah & di-reject sama model NLI
        wrong_pred_model_qa_and_rejected_by_nli = (gold_answer != qa_generated_answer) and \
            (qa_generated_answer != nli_validated_answer)
        
        if right_pred_model_qa_and_accepted_by_nli:
            right_pred_model_qa_and_accepted_by_nli_ctr += 1
        elif right_pred_model_qa_and_rejected_by_nli:
            right_pred_model_qa_and_rejected_by_nli_ctr += 1
        elif wrong_pred_model_qa_and_accepted_by_nli:
            wrong_pred_model_qa_and_accepted_by_nli_ctr += 1
        elif wrong_pred_model_qa_and_rejected_by_nli:
            wrong_pred_model_qa_and_rejected_by_nli_ctr += 1
    
    return right_pred_model_qa_and_accepted_by_nli_ctr, right_pred_model_qa_and_rejected_by_nli_ctr, \
            wrong_pred_model_qa_and_accepted_by_nli_ctr, wrong_pred_model_qa_and_rejected_by_nli_ctr

In [20]:
def extract_values_nli_validator(url, data):

    msc_list = f"indonli_mnli_{data}-nli|indonli_mnli|indonli"
    pattern = re.compile(r'.*FilteringNLI-({})-{}-TQ(\d+)-TS(\d+)-MS([\d]+)-VA([\d]+)-TH([\d.]+).*'.format(msc_list, data))
    match = pattern.match(url)
    
    if match:
        msc = match.group(1)
        TQ = int(match.group(2))
        TS = int(match.group(3))
        MSI = int(match.group(4))
        VA = int(match.group(5))
        
        TH = float(match.group(6))
        if TH == '0.00':
            TH = '0.0'
        elif TH == '0.50':
            TH = '0.5'

        EM = em_and_f1(url, True)
        F1 = em_and_f1(url, False)

        return msc, data, TQ, TS, MSI, VA, TH, EM, F1
    
    else:
        return None

In [21]:
def create_table_nli_validator(url_list, url_list_df, data):
    
    data_list = []
    for url in tqdm(url_list, desc="Processing first URLs", unit="URL"):
        extracted_values = extract_values_nli_validator(url, data)
        data_list.append(extracted_values)

    df = pd.DataFrame(data_list, columns=['MSC', 'data', 'TQ', 'TS', 'MSI', 'VA', 'TH', 'EM', 'F1'])
    
    for idx, url_df in enumerate(tqdm(url_list_df, desc="Processing second URLs", unit="URL")):
        extracted_values = take_accepted_and_rejected_qa_with_nli(url_df)
        df.loc[idx, ['RA', 'RR', 'WA', 'WR']] = extracted_values
    
    df[['RA', 'RR', 'WA', 'WR']] = df[['RA', 'RR', 'WA', 'WR']].astype(int)
    
    df.loc[0, 'MSC'] = "Baseline"
    df.loc[0, 'TQ'] = "BL"
    df.loc[0, 'TS'] = "BL"
    df.loc[0, 'MSI'] = "BL"
    df.loc[0, 'VA'] = "BL"
    df.loc[0, 'TH'] = "BL"
    
    return df

In [22]:
data = "idkmrc"
url_list = take_all_url_nli_validator(data)
url_list_df = take_all_url_nli_validator(data, df=True)
table = pd.DataFrame(create_table_nli_validator(url_list, url_list_df, data))
table.to_excel(f'{data}-nli-validator.xlsx', index=False)
table

Amount of url: 49
Amount of url: 49


Processing first URLs: 100%|███████████████████████████████████████████████████████████| 49/49 [00:55<00:00,  1.14s/URL]
Processing second URLs: 100%|██████████████████████████████████████████████████████████| 49/49 [00:46<00:00,  1.05URL/s]


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,idkmrc,BL,BL,BL,BL,BL,75.82938,84.25128,640,0,204,0
1,indonli,idkmrc,1,4,1,1,0.0,68.48341,75.02483,567,73,157,47
2,indonli,idkmrc,1,4,2,1,0.0,54.62085,62.08946,450,190,137,67
3,indonli,idkmrc,1,4,3,1,0.0,43.95735,51.95651,361,279,133,71
4,indonli,idkmrc,2,4,1,1,0.0,75.35545,83.4219,633,7,195,9
5,indonli,idkmrc,2,4,2,1,0.0,73.93365,82.22194,621,19,193,11
6,indonli,idkmrc,2,4,3,1,0.0,72.63033,80.93617,610,30,193,11
7,indonli_mnli,idkmrc,1,4,1,1,0.0,71.32701,78.02263,589,51,153,51
8,indonli_mnli,idkmrc,1,4,2,1,0.0,61.96682,69.50875,509,131,136,68
9,indonli_mnli,idkmrc,1,4,3,1,0.0,52.60664,60.5456,431,209,132,72


In [23]:
print(f"{data} sorted by EM")
sorted_table = table.sort_values(by='EM', ascending=False)
sorted_table

idkmrc sorted by EM


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
16,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,2,0.75,75.94787,84.16834,639,1,198,6
34,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,2,0.75,75.94787,84.16834,639,1,198,6
13,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,1,0.0,75.82938,84.16834,639,1,201,3
31,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,1,0.0,75.82938,84.16834,639,1,201,3
33,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,2,0.5,75.82938,84.16834,639,1,201,3
15,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,2,0.5,75.82938,84.16834,639,1,201,3
14,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,2,0.25,75.82938,84.16834,639,1,201,3
0,Baseline,idkmrc,BL,BL,BL,BL,BL,75.82938,84.25128,640,0,204,0
32,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,2,0.25,75.82938,84.16834,639,1,201,3
10,indonli_mnli,idkmrc,2,4,1,1,0.0,75.47393,83.89583,636,4,202,2


In [24]:
print(f"{data} sorted by F1")
sorted_table = table.sort_values(by='F1', ascending=False)
sorted_table

idkmrc sorted by F1


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,idkmrc,BL,BL,BL,BL,BL,75.82938,84.25128,640,0,204,0
13,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,1,0.0,75.82938,84.16834,639,1,201,3
31,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,1,0.0,75.82938,84.16834,639,1,201,3
34,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,2,0.75,75.94787,84.16834,639,1,198,6
33,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,2,0.5,75.82938,84.16834,639,1,201,3
15,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,2,0.5,75.82938,84.16834,639,1,201,3
14,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,2,0.25,75.82938,84.16834,639,1,201,3
16,indonli_mnli_idkmrc-nli,idkmrc,1,4,1,2,0.75,75.94787,84.16834,639,1,198,6
32,indonli_mnli_idkmrc-nli,idkmrc,2,4,1,2,0.25,75.82938,84.16834,639,1,201,3
10,indonli_mnli,idkmrc,2,4,1,1,0.0,75.47393,83.89583,636,4,202,2


## TYDI-QA-ID

In [25]:
data = "tydiqaid"
url_list = take_all_url_nli_validator(data)
url_list_df = take_all_url_nli_validator(data, df=True)
table = pd.DataFrame(create_table_nli_validator(url_list, url_list_df, data))
table.to_excel(f'{data}-nli-validator.xlsx', index=False)
table

Amount of url: 49
Amount of url: 49


Processing first URLs: 100%|███████████████████████████████████████████████████████████| 49/49 [00:54<00:00,  1.11s/URL]
Processing second URLs: 100%|██████████████████████████████████████████████████████████| 49/49 [00:47<00:00,  1.03URL/s]


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,tydiqaid,BL,BL,BL,BL,BL,61.28655,76.47617,524,0,331,0
1,indonli,tydiqaid,1,4,1,1,0.0,35.90643,46.40946,307,217,212,119
2,indonli,tydiqaid,1,4,2,1,0.0,36.14035,48.78837,307,217,201,130
3,indonli,tydiqaid,1,4,3,1,0.0,36.25731,50.22544,307,217,201,130
4,indonli,tydiqaid,2,4,1,1,0.0,59.76608,74.66427,511,13,319,12
5,indonli,tydiqaid,2,4,2,1,0.0,60.0,75.23357,511,13,318,13
6,indonli,tydiqaid,2,4,3,1,0.0,60.11696,75.3528,511,13,318,13
7,indonli_mnli,tydiqaid,1,4,1,1,0.0,42.92398,54.29436,367,157,223,108
8,indonli_mnli,tydiqaid,1,4,2,1,0.0,43.50877,56.56735,367,157,213,118
9,indonli_mnli,tydiqaid,1,4,3,1,0.0,43.50877,57.579,367,157,212,119


In [26]:
print(f"{data} sorted by EM")
sorted_table = table.sort_values(by='EM', ascending=False)
sorted_table

tydiqaid sorted by EM


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,tydiqaid,BL,BL,BL,BL,BL,61.28655,76.47617,524,0,331,0
19,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,2,2,0.5,61.28655,77.30986,523,1,304,27
22,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,2,3,0.5,61.28655,77.4529,523,1,305,26
25,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,3,2,0.25,61.28655,77.30986,523,1,304,27
26,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,3,2,0.5,61.28655,77.30986,523,1,304,27
28,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,3,3,0.25,61.28655,77.33594,523,1,305,26
29,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,3,3,0.5,61.28655,77.33594,523,1,305,26
35,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,2,1,0.0,61.28655,77.30986,523,1,304,27
36,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,2,2,0.25,61.28655,77.30986,523,1,304,27
37,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,2,2,0.5,61.28655,77.30986,523,1,304,27


In [27]:
print(f"{data} sorted by F1")
sorted_table = table.sort_values(by='F1', ascending=False)
sorted_table

tydiqaid sorted by F1


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
22,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,2,3,0.5,61.28655,77.4529,523,1,305,26
21,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,2,3,0.25,61.28655,77.4529,523,1,305,26
40,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,2,3,0.5,61.28655,77.4529,523,1,305,26
39,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,2,3,0.25,61.28655,77.4529,523,1,305,26
41,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,2,3,0.75,61.16959,77.42606,521,3,303,28
23,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,2,3,0.75,61.16959,77.42606,521,3,303,28
28,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,3,3,0.25,61.28655,77.33594,523,1,305,26
47,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,3,3,0.5,61.28655,77.33594,523,1,305,26
46,indonli_mnli_tydiqaid-nli,tydiqaid,2,4,3,3,0.25,61.28655,77.33594,523,1,305,26
29,indonli_mnli_tydiqaid-nli,tydiqaid,1,4,3,3,0.5,61.28655,77.33594,523,1,305,26


## SQUAD-ID

In [28]:
data = "squadid"
url_list = take_all_url_nli_validator(data)
url_list_df = take_all_url_nli_validator(data, df=True)
table = pd.DataFrame(create_table_nli_validator(url_list, url_list_df, data))
table.to_excel(f'{data}-nli-validator.xlsx', index=False)
table

Amount of url: 49
Amount of url: 49


Processing first URLs: 100%|███████████████████████████████████████████████████████████| 49/49 [00:53<00:00,  1.09s/URL]
Processing second URLs: 100%|██████████████████████████████████████████████████████████| 49/49 [04:13<00:00,  5.18s/URL]


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,squadid,BL,BL,BL,BL,BL,47.91544,67.07638,5689,0,6184,0
1,indonli,squadid,1,4,1,1,0.0,13.66967,20.70488,1615,4074,2350,3834
2,indonli,squadid,1,4,2,1,0.0,13.78759,23.24303,1615,4074,2168,4016
3,indonli,squadid,1,4,3,1,0.0,13.79601,25.48443,1615,4074,2133,4051
4,indonli,squadid,2,4,1,1,0.0,40.84056,56.91996,4847,842,5308,876
5,indonli,squadid,2,4,2,1,0.0,40.95848,57.65389,4847,842,5289,895
6,indonli,squadid,2,4,3,1,0.0,40.95848,58.03293,4847,842,5285,899
7,indonli_mnli,squadid,1,4,1,1,0.0,21.88158,31.73284,2592,3097,3157,3027
8,indonli_mnli,squadid,1,4,2,1,0.0,22.02476,34.01653,2592,3097,2996,3188
9,indonli_mnli,squadid,1,4,3,1,0.0,22.05845,35.85515,2592,3097,2964,3220


In [29]:
print(f"{data} sorted by EM")
sorted_table = table.sort_values(by='EM', ascending=False)
sorted_table

squadid sorted by EM


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,squadid,BL,BL,BL,BL,BL,47.91544,67.07638,5689,0,6184,0
40,indonli_mnli_squadid-nli,squadid,2,4,2,3,0.5,47.80595,67.05412,5661,28,6032,152
22,indonli_mnli_squadid-nli,squadid,1,4,2,3,0.5,47.80595,67.05292,5661,28,6029,155
39,indonli_mnli_squadid-nli,squadid,2,4,2,3,0.25,47.80595,67.05412,5661,28,6032,152
21,indonli_mnli_squadid-nli,squadid,1,4,2,3,0.25,47.80595,67.05292,5661,28,6029,155
46,indonli_mnli_squadid-nli,squadid,2,4,3,3,0.25,47.78068,67.03771,5659,30,6019,165
29,indonli_mnli_squadid-nli,squadid,1,4,3,3,0.5,47.78068,67.04097,5659,30,6016,168
28,indonli_mnli_squadid-nli,squadid,1,4,3,3,0.25,47.78068,67.04097,5659,30,6016,168
47,indonli_mnli_squadid-nli,squadid,2,4,3,3,0.5,47.78068,67.03771,5659,30,6019,165
43,indonli_mnli_squadid-nli,squadid,2,4,3,2,0.25,47.76383,67.01314,5657,32,6016,168


In [30]:
print(f"{data} sorted by F1")
sorted_table = table.sort_values(by='F1', ascending=False)
sorted_table

squadid sorted by F1


Unnamed: 0,MSC,data,TQ,TS,MSI,VA,TH,EM,F1,RA,RR,WA,WR
0,Baseline,squadid,BL,BL,BL,BL,BL,47.91544,67.07638,5689,0,6184,0
40,indonli_mnli_squadid-nli,squadid,2,4,2,3,0.5,47.80595,67.05412,5661,28,6032,152
39,indonli_mnli_squadid-nli,squadid,2,4,2,3,0.25,47.80595,67.05412,5661,28,6032,152
21,indonli_mnli_squadid-nli,squadid,1,4,2,3,0.25,47.80595,67.05292,5661,28,6029,155
22,indonli_mnli_squadid-nli,squadid,1,4,2,3,0.5,47.80595,67.05292,5661,28,6029,155
29,indonli_mnli_squadid-nli,squadid,1,4,3,3,0.5,47.78068,67.04097,5659,30,6016,168
28,indonli_mnli_squadid-nli,squadid,1,4,3,3,0.25,47.78068,67.04097,5659,30,6016,168
47,indonli_mnli_squadid-nli,squadid,2,4,3,3,0.5,47.78068,67.03771,5659,30,6019,165
46,indonli_mnli_squadid-nli,squadid,2,4,3,3,0.25,47.78068,67.03771,5659,30,6019,165
26,indonli_mnli_squadid-nli,squadid,1,4,3,2,0.5,47.76383,67.0164,5657,32,6013,171
