# Inference with NLI validation

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [2]:
prefix = "https://huggingface.co/muhammadravi251001/fine-tuned-FilteringNLI-"
suffix = "/blob/main/results/evaluation/metric_result.txt"

In [3]:
def acc_and_f1(p_url, acc, id="L1"):

    page = requests.get(p_url)
    soup = BeautifulSoup(page.content)

    tr = soup.find('tr', attrs={'id': id})
    td = tr.find(attrs={'class': 'overflow-visible whitespace-pre px-3'}).text
    x = re.findall("\d+\.\d+", td)

    if acc == True: return round(float(x[0]), 3)
    else: return round(float(x[1]), 3)

In [4]:
def show_table_of_acc_and_f1(data):
    
    # Take accuracy and F1 from Hugging Face
    indonli_mnli_va0 = f"{prefix}indonli_mnli-{data}-TQ2-TS4-MS3-VA0-TH0.0{suffix}"

    indonli_va1 = f"{prefix}indonli-{data}-TQ2-TS4-MS3-VA1-TH0.0{suffix}"

    indonli_mnli_va1 = f"{prefix}indonli_mnli-{data}-TQ2-TS4-MS3-VA1-TH0.0{suffix}"

    indonli_mnli_aug_va1 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA1-TH0.0{suffix}"

    indonli_mnli_aug_va2_t025 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA2-TH0.25{suffix}"
    indonli_mnli_aug_va2_t050 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA2-TH0.5{suffix}"
    indonli_mnli_aug_va2_t075 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA2-TH0.75{suffix}"

    indonli_mnli_aug_va3_t025 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA3-TH0.25{suffix}"
    indonli_mnli_aug_va3_t050 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA3-TH0.5{suffix}"
    indonli_mnli_aug_va3_t075 = f"{prefix}indonli_mnli_{data}-nli-{data}-TQ2-TS4-MS3-VA3-TH0.75{suffix}"
    
    # Show table to Latex
    dataset_list = []
    dataset_list.extend([data] * 10)

    df_score_acc_f1 = pd.DataFrame(
        {

        'Model': ["INLI_MNLI-v0 (BL)", "INLI-va1", "INLI_MNLI-v1", "INLI_MNLI_AUG-v1", 
                  "INLI_MNLI_AUG-v2-t0.25", "INLI_MNLI_AUG-v2-t0.50", "INLI_MNLI_AUG-v2-t0.75",
                  "INLI_MNLI_AUG-v3-t0.25", "INLI_MNLI_AUG-v3-t0.50", "INLI_MNLI_AUG-v3-t0.75"],

        'Dataset': dataset_list,

        'Accuracy': [acc_and_f1(indonli_mnli_va0, True), 

                     acc_and_f1(indonli_va1, True), 

                     acc_and_f1(indonli_mnli_va1, True), 

                     acc_and_f1(indonli_mnli_aug_va1, True), 

                     acc_and_f1(indonli_mnli_aug_va2_t025, True), 
                     acc_and_f1(indonli_mnli_aug_va2_t050, True), acc_and_f1(indonli_mnli_aug_va2_t075, True), 

                     acc_and_f1(indonli_mnli_aug_va3_t025, True), 
                     acc_and_f1(indonli_mnli_aug_va3_t050, True), acc_and_f1(indonli_mnli_aug_va3_t075, True)
                    ],

        'F1 Score': [acc_and_f1(indonli_mnli_va0, False), 

                     acc_and_f1(indonli_va1, False), 

                     acc_and_f1(indonli_mnli_va1, False), 

                     acc_and_f1(indonli_mnli_aug_va1, False), 

                     acc_and_f1(indonli_mnli_aug_va2_t025, False), 
                     acc_and_f1(indonli_mnli_aug_va2_t050, False), acc_and_f1(indonli_mnli_aug_va2_t075, False), 

                     acc_and_f1(indonli_mnli_aug_va3_t025, False), 
                     acc_and_f1(indonli_mnli_aug_va3_t050, False), acc_and_f1(indonli_mnli_aug_va3_t075, False)
                    ]
        }, 

        columns=['Model', 'Dataset', 'Accuracy', 'F1 Score']
        )

    # Uncomment this to get latex code
    #print("Accuracy & F1 Score")
    #print(df_score_acc_f1.to_latex(index=False))
    
    return df_score_acc_f1

# IDK-MRC

In [5]:
data = "idkmrc"
table = pd.DataFrame(show_table_of_acc_and_f1(data))
table

Unnamed: 0,Model,Dataset,Accuracy,F1 Score
0,INLI_MNLI-v0 (BL),idkmrc,75.829,84.251
1,INLI-va1,idkmrc,72.63,80.936
2,INLI_MNLI-v1,idkmrc,71.445,79.92
3,INLI_MNLI_AUG-v1,idkmrc,57.82,67.152
4,INLI_MNLI_AUG-v2-t0.25,idkmrc,57.82,67.152
5,INLI_MNLI_AUG-v2-t0.50,idkmrc,56.991,66.322
6,INLI_MNLI_AUG-v2-t0.75,idkmrc,52.37,61.611
7,INLI_MNLI_AUG-v3-t0.25,idkmrc,55.924,65.256
8,INLI_MNLI_AUG-v3-t0.50,idkmrc,54.976,64.308
9,INLI_MNLI_AUG-v3-t0.75,idkmrc,48.223,57.553


In [6]:
sorted_table = table.sort_values(by='F1 Score', ascending=False)
sorted_table

Unnamed: 0,Model,Dataset,Accuracy,F1 Score
0,INLI_MNLI-v0 (BL),idkmrc,75.829,84.251
1,INLI-va1,idkmrc,72.63,80.936
2,INLI_MNLI-v1,idkmrc,71.445,79.92
3,INLI_MNLI_AUG-v1,idkmrc,57.82,67.152
4,INLI_MNLI_AUG-v2-t0.25,idkmrc,57.82,67.152
5,INLI_MNLI_AUG-v2-t0.50,idkmrc,56.991,66.322
7,INLI_MNLI_AUG-v3-t0.25,idkmrc,55.924,65.256
8,INLI_MNLI_AUG-v3-t0.50,idkmrc,54.976,64.308
6,INLI_MNLI_AUG-v2-t0.75,idkmrc,52.37,61.611
9,INLI_MNLI_AUG-v3-t0.75,idkmrc,48.223,57.553


# TYDI-QA-ID

In [7]:
data = "tydiqaid"
table = pd.DataFrame(show_table_of_acc_and_f1(data))
table

Unnamed: 0,Model,Dataset,Accuracy,F1 Score
0,INLI_MNLI-v0 (BL),tydiqaid,61.287,76.476
1,INLI-va1,tydiqaid,60.117,75.353
2,INLI_MNLI-v1,tydiqaid,60.0,75.305
3,INLI_MNLI_AUG-v1,tydiqaid,61.287,77.31
4,INLI_MNLI_AUG-v2-t0.25,tydiqaid,61.287,77.31
5,INLI_MNLI_AUG-v2-t0.50,tydiqaid,61.287,77.31
6,INLI_MNLI_AUG-v2-t0.75,tydiqaid,60.936,76.955
7,INLI_MNLI_AUG-v3-t0.25,tydiqaid,61.287,77.336
8,INLI_MNLI_AUG-v3-t0.50,tydiqaid,61.287,77.336
9,INLI_MNLI_AUG-v3-t0.75,tydiqaid,61.053,77.198


In [8]:
sorted_table = table.sort_values(by='F1 Score', ascending=False)
sorted_table

Unnamed: 0,Model,Dataset,Accuracy,F1 Score
7,INLI_MNLI_AUG-v3-t0.25,tydiqaid,61.287,77.336
8,INLI_MNLI_AUG-v3-t0.50,tydiqaid,61.287,77.336
3,INLI_MNLI_AUG-v1,tydiqaid,61.287,77.31
4,INLI_MNLI_AUG-v2-t0.25,tydiqaid,61.287,77.31
5,INLI_MNLI_AUG-v2-t0.50,tydiqaid,61.287,77.31
9,INLI_MNLI_AUG-v3-t0.75,tydiqaid,61.053,77.198
6,INLI_MNLI_AUG-v2-t0.75,tydiqaid,60.936,76.955
0,INLI_MNLI-v0 (BL),tydiqaid,61.287,76.476
1,INLI-va1,tydiqaid,60.117,75.353
2,INLI_MNLI-v1,tydiqaid,60.0,75.305


# SQUAD-ID

In [9]:
data = "squadid"
table = pd.DataFrame(show_table_of_acc_and_f1(data))
table

Unnamed: 0,Model,Dataset,Accuracy,F1 Score
0,INLI_MNLI-v0 (BL),squadid,47.915,67.076
1,INLI-va1,squadid,40.958,58.033
2,INLI_MNLI-v1,squadid,41.826,59.046
3,INLI_MNLI_AUG-v1,squadid,47.764,67.013
4,INLI_MNLI_AUG-v2-t0.25,squadid,47.764,67.013
5,INLI_MNLI_AUG-v2-t0.50,squadid,47.764,67.013
6,INLI_MNLI_AUG-v2-t0.75,squadid,47.511,66.827
7,INLI_MNLI_AUG-v3-t0.25,squadid,47.781,67.038
8,INLI_MNLI_AUG-v3-t0.50,squadid,47.781,67.038
9,INLI_MNLI_AUG-v3-t0.75,squadid,47.553,66.886


In [10]:
sorted_table = table.sort_values(by='F1 Score', ascending=False)
sorted_table

Unnamed: 0,Model,Dataset,Accuracy,F1 Score
0,INLI_MNLI-v0 (BL),squadid,47.915,67.076
7,INLI_MNLI_AUG-v3-t0.25,squadid,47.781,67.038
8,INLI_MNLI_AUG-v3-t0.50,squadid,47.781,67.038
3,INLI_MNLI_AUG-v1,squadid,47.764,67.013
4,INLI_MNLI_AUG-v2-t0.25,squadid,47.764,67.013
5,INLI_MNLI_AUG-v2-t0.50,squadid,47.764,67.013
9,INLI_MNLI_AUG-v3-t0.75,squadid,47.553,66.886
6,INLI_MNLI_AUG-v2-t0.75,squadid,47.511,66.827
2,INLI_MNLI-v1,squadid,41.826,59.046
1,INLI-va1,squadid,40.958,58.033
