In [1]:
!nvidia-smi

Sat Sep  9 09:08:02 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   39C    P0    59W / 300W |  31956MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   39C    P0    43W / 300W |      3MiB / 32510MiB |      0%      Default |
|       

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '6'

In [3]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
import pandas as pd
import numpy as np
import requests, string, re, collections, math
from tqdm import tqdm
from transformers import pipeline

# Download output of baseline

In [5]:
def remove_punctuation(text):
    return text.strip(string.punctuation)

def return_overlap_between_gold_and_pred(data):
    
    data['is_overlap'] = ""
    overlap_data_array = []
    
    for i in tqdm(range(len(data))):
        
        pred_answer = data['Prediction Answer'][i]
        gold_answer = data['Gold Answer'][i]
        
        if type(pred_answer) != str and math.isnan(pred_answer): pred_answer = ""
        if type(gold_answer) != str and math.isnan(gold_answer): gold_answer = ""
        
        pred_answer = remove_punctuation(str(pred_answer).lower().strip())
        gold_answer = remove_punctuation(str(gold_answer).lower().strip())
        
        pred_answer_words = set(remove_punctuation(text) for text in pred_answer.split())
        gold_answer_words = set(remove_punctuation(text) for text in gold_answer.split())
        
        if pred_answer_words.intersection(gold_answer_words):
            if pred_answer == gold_answer:
                overlap_data_array.append(False)
            else:
                overlap_data_array.append(True)
        else:
            overlap_data_array.append(False)
            
    data['is_overlap'] = overlap_data_array
    return data

In [6]:
def convert_question_and_answer_to_hypothesis(data):
    for i in tqdm(range(len(data))):
        data['Prediction Hypothesis'] = data['Question'] + ' ' + data['Prediction Answer']
        data['Gold Hypothesis'] = data['Question'] + ' ' + data['Gold Answer']
    return data

In [7]:
def normalize_text(s):
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)
    def white_space_fix(text):
        return " ".join(text.split())
    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punc(lower(s))))

def return_acc_and_f1(data, message):
    em = 0
    f1_arr = []
    
    for i in tqdm(range(len(data))):
        
        pred = str(data['Prediction Answer'][i])
        gold = str(data['Gold Answer'][i])
        
        # for exact match
        if pred == gold:
            em += 1
        
        # for f1
        pred_tokens = normalize_text(pred).split()
        gold_tokens = normalize_text(gold).split()
        common = collections.Counter(pred_tokens) & collections.Counter(gold_tokens)
        num_same = sum(common.values()) # True positive

        if len(gold_tokens) == 0 or len(pred_tokens) == 0: 
            f1 = int(gold_tokens == pred_tokens)
            f1_arr.append(f1)
            continue

        if num_same == 0:
            f1 = 0
            f1_arr.append(f1)
            continue

        precision = 1.0 * num_same / len(pred_tokens)
        recall = 1.0 * num_same / len(gold_tokens)
        f1 = (2.0 * precision * recall) / (precision + recall)
        f1_arr.append(f1)
        
    em_final = em / len(data)
    f1_final = np.mean(f1_arr)
    
    print(message.upper())
    print("Final exact match:", round(em_final, 3))
    print("Average F1 score:", round(f1_final, 3))

In [8]:
df_squadid = pd.read_csv("output_squadid_df.csv")
df_squadid = df_squadid[["Context", "Question", "Prediction Answer", "Gold Answer"]]
df_squadid = convert_question_and_answer_to_hypothesis(df_squadid)
df_squadid = df_squadid.dropna(subset=['Gold Answer']).reset_index(drop=True)
df_squadid = return_overlap_between_gold_and_pred(df_squadid)
print(df_squadid['Prediction Answer'].isna().sum())
df_squadid

100%|████████████████████████████████████████████████████████████████████████████| 11962/11962 [01:12<00:00, 165.66it/s]
100%|██████████████████████████████████████████████████████████████████████████| 10888/10888 [00:00<00:00, 84393.46it/s]

384





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap
0,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Di negara apa Normandia berada?,Perancis,Perancis,Di negara apa Normandia berada? Perancis,Di negara apa Normandia berada? Perancis,False
1,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Kapan Normandia di Normandia?,abad ke-10 dan ke-11,-10 dan ke,Kapan Normandia di Normandia? abad ke-10 dan k...,Kapan Normandia di Normandia? -10 dan ke,True
2,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Dari negara mana asal Norse?,"Denmark, Islandia dan Norwegia","Denmark, Islandia dan Norwegia","Dari negara mana asal Norse? Denmark, Islandia...","Dari negara mana asal Norse? Denmark, Islandia...",False
3,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Siapa pemimpin Norse?,Rollo,Rollo,Siapa pemimpin Norse? Rollo,Siapa pemimpin Norse? Rollo,False
4,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Abad berapa pertama kali Normandia mendapatkan...,abad ke-10,abad ke-10,Abad berapa pertama kali Normandia mendapatkan...,Abad berapa pertama kali Normandia mendapatkan...,False
...,...,...,...,...,...,...,...
10883,Hubungan antara kekuatan nonkonservatif makros...,Kekuatan konservatif sering dikaitkan dengan p...,panas,panas,Kekuatan konservatif sering dikaitkan dengan p...,Kekuatan konservatif sering dikaitkan dengan p...,False
10884,"Gaya pon memiliki padanan metrik, yang lebih j...",Seperti apa kekuatan kilogram yang kadang-kada...,kilopond,kilopond,Seperti apa kekuatan kilogram yang kadang-kada...,Seperti apa kekuatan kilogram yang kadang-kada...,False
10885,"Gaya pon memiliki padanan metrik, yang lebih j...",Apakah satuan massa yang sangat jarang digunak...,newton,siput,Apakah satuan massa yang sangat jarang digunak...,Apakah satuan massa yang sangat jarang digunak...,False
10886,"Gaya pon memiliki padanan metrik, yang lebih j...",Apa yang jarang menggunakan istilah satuan kek...,sthène,kip,Apa yang jarang menggunakan istilah satuan kek...,Apa yang jarang menggunakan istilah satuan kek...,False


In [9]:
return_acc_and_f1(df_squadid, "squad-id")

100%|██████████████████████████████████████████████████████████████████████████| 10888/10888 [00:00<00:00, 30054.01it/s]

SQUAD-ID
Final exact match: 0.538
Average F1 score: 0.726





In [10]:
url = "https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-IDK-MRC-with-xlm-roberta-large-without-ITTL-without-freeze-LR-1e-05/raw/main/results/output/output_df.csv"
response = requests.get(url)

if response.status_code == 200:
    with open("data.csv", "wb") as file:
        file.write(response.content)
    df_idkmrc = pd.read_csv("data.csv")
else: print("Download failed!")
    
df_idkmrc = df_idkmrc[["Context", "Question", "Prediction Answer", "Gold Answer"]]
df_idkmrc = convert_question_and_answer_to_hypothesis(df_idkmrc)
df_idkmrc = df_idkmrc.dropna(subset=['Gold Answer']).reset_index(drop=True)
df_idkmrc = return_overlap_between_gold_and_pred(df_idkmrc)
print(df_idkmrc['Prediction Answer'].isna().sum())
df_idkmrc

100%|████████████████████████████████████████████████████████████████████████████████| 848/848 [00:01<00:00, 709.11it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 422/422 [00:00<00:00, 58346.40it/s]

43





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan?,1959,1959,Kapan Komputer mikro mulai dikembangkan? 1959,Kapan Komputer mikro mulai dikembangkan? 1959,False
1,"Patronim, atau patronimik, adalah sebuah kompo...",Apakah pengertian matronimik?,komponen dari sebuah nama pribadi yang berdasa...,Komomene sebuah nama yang berdasarkan pada nam...,Apakah pengertian matronimik? komponen dari se...,Apakah pengertian matronimik? Komomene sebuah ...,True
2,"Ir. Basuki Tjahaja Purnama, M.M. (EYD: Basuki ...",Siapakah Basuki Tjahaja Purnama?,Gubernur DKI Jakarta,Gubernur DKI Jakarta yang menjabat sejak 19 No...,Siapakah Basuki Tjahaja Purnama? Gubernur DKI ...,Siapakah Basuki Tjahaja Purnama? Gubernur DKI ...,True
3,"Ir. Basuki Tjahaja Purnama, M.M. (EYD: Basuki ...",Siapakah Gubernur DKI Jakarta yang menjabat se...,Ir. Basuki Tjahaja Purnama,Basuki Tjahaja Purnama,Siapakah Gubernur DKI Jakarta yang menjabat se...,Siapakah Gubernur DKI Jakarta yang menjabat se...,True
4,Setelah kepala Sebastianus dan Jovinus tiba di...,Kapan Raja Ataulf menikah?,Januari 414,Januari 414,Kapan Raja Ataulf menikah? Januari 414,Kapan Raja Ataulf menikah? Januari 414,False
...,...,...,...,...,...,...,...
417,Studi hubungan internasional sebagai teori sud...,Kapan Teori hubungan internasional diciptakan?,1939,1939,Kapan Teori hubungan internasional diciptakan?...,Kapan Teori hubungan internasional diciptakan?...,False
418,"Dalam melaksanakan kegiatan belajar-mengajar, ...",Berapa luas SMK Negeri 1 Cikampek?,28997m2,29095m2.,Berapa luas SMK Negeri 1 Cikampek? 28997m2,Berapa luas SMK Negeri 1 Cikampek? 29095m2.,False
419,Ikan pari manta (Manta birostris) adalah salah...,Berapakah berat Ikan pari manta yag terbesar?,3 ton,3 ton,Berapakah berat Ikan pari manta yag terbesar? ...,Berapakah berat Ikan pari manta yag terbesar? ...,False
420,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa lu...",131.3 mil persegi,"Menurut Biro Sensus Amerika Serikat, kota ini ...","Menurut Biro Sensus Amerika Serikat, berapa lu...","Menurut Biro Sensus Amerika Serikat, berapa lu...",True


In [11]:
return_acc_and_f1(df_idkmrc, "idk-mrc")

100%|██████████████████████████████████████████████████████████████████████████████| 422/422 [00:00<00:00, 23560.37it/s]

IDK-MRC
Final exact match: 0.633
Average F1 score: 0.767





In [12]:
url = "https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-TYDI-QA-ID-with-xlm-roberta-large-without-ITTL-without-freeze-LR-1e-05/raw/main/results/output/output_df.csv"
response = requests.get(url)

if response.status_code == 200:
    with open("data.csv", "wb") as file:
        file.write(response.content)
    df_tydiqaid = pd.read_csv("data.csv")
else: print("Download failed!")
    
df_tydiqaid = df_tydiqaid[["Context", "Question", "Prediction Answer", "Gold Answer"]]
df_tydiqaid = convert_question_and_answer_to_hypothesis(df_tydiqaid)
df_tydiqaid = df_tydiqaid.dropna(subset=['Gold Answer']).reset_index(drop=True)
df_tydiqaid = return_overlap_between_gold_and_pred(df_tydiqaid)
print(df_tydiqaid['Prediction Answer'].isna().sum())
df_tydiqaid

100%|████████████████████████████████████████████████████████████████████████████████| 857/857 [00:01<00:00, 648.05it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 856/856 [00:00<00:00, 67740.69it/s]

21





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap
0,Ernest Douwes Dekker wafat dini hari tanggal 2...,dimanakah Dr. Ernest François Eugène Douwes De...,"TMP Cikutra, Bandung",28 Agustus 1950,dimanakah Dr. Ernest François Eugène Douwes De...,dimanakah Dr. Ernest François Eugène Douwes De...,False
1,"Pada tanggal 18 Februari 2008, desain Yoo Kerl...",Siapa arsitek Balai Kota Seoul?,Yoo Kerl,Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,False
2,Sebagai tindak lanjut Atlantic Charter tersebu...,Kapan PBB mulai terbentuk?,24 Oktober 1945,24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,False
3,"Dia dipenjarakan di Puri Dragsholm, 75 kilomet...",Dimana James Hepburn meninggal?,"Puri Dragsholm, 75 kilometer Kopenhagen",Puri Dragsholm,Dimana James Hepburn meninggal? Puri Dragsholm...,Dimana James Hepburn meninggal? Puri Dragsholm,True
4,"Lahir di Sheffield, South Yorkshire, Vardy mem...",Dimana Jamie Richard Vardy lahir?,"Sheffield, South Yorkshire","Sheffield, South Yorkshire","Dimana Jamie Richard Vardy lahir? Sheffield, S...","Dimana Jamie Richard Vardy lahir? Sheffield, S...",False
...,...,...,...,...,...,...,...
851,Raden Patah (Jawa: code: jav promoted to code:...,Siapa raja Demak pertama?,Raden Patah,Raden Patah,Siapa raja Demak pertama? Raden Patah,Siapa raja Demak pertama? Raden Patah,False
852,Laut dalam adalah lapisan terbawah dari lautan...,Berapakah kedalaman laut yang disebut dengan l...,1828 m,1828 m,Berapakah kedalaman laut yang disebut dengan l...,Berapakah kedalaman laut yang disebut dengan l...,False
853,"Justus Heurnius (lahir di Utrecht, Belanda, 15...",Kapan Justus Heurnius lahir?,1587,1587,Kapan Justus Heurnius lahir? 1587,Kapan Justus Heurnius lahir? 1587,False
854,Frekuensi suara atau frekuensi audio yaitu get...,Apakah yang dimaksud dengan frekuensi audio?,getaran frekuensi yang terdengar oleh manusia ...,getaran frekuensi yang terdengar oleh manusia ...,Apakah yang dimaksud dengan frekuensi audio? g...,Apakah yang dimaksud dengan frekuensi audio? g...,False


In [13]:
return_acc_and_f1(df_tydiqaid, "tydi-qa-id")

100%|██████████████████████████████████████████████████████████████████████████████| 856/856 [00:00<00:00, 21598.40it/s]

TYDI-QA-ID
Final exact match: 0.666
Average F1 score: 0.784





# Count an overlapping pred answer and gold answer

In [14]:
print("SQuAD-ID")
print(df_squadid['is_overlap'].value_counts())
print()

print("IDK-MRC")
print(df_idkmrc['is_overlap'].value_counts())
print()

print("TyDI-QA-ID")
print(df_tydiqaid['is_overlap'].value_counts())

SQuAD-ID
False    7577
True     3311
Name: is_overlap, dtype: int64

IDK-MRC
False    327
True      95
Name: is_overlap, dtype: int64

TyDI-QA-ID
False    668
True     188
Name: is_overlap, dtype: int64


## Deleting row if is_overlap is True

In [15]:
df_squadid = df_squadid[df_squadid['is_overlap'] == False].reset_index(drop=True)
df_idkmrc = df_idkmrc[df_idkmrc['is_overlap'] == False].reset_index(drop=True)
df_tydiqaid = df_tydiqaid[df_tydiqaid['is_overlap'] == False].reset_index(drop=True)

# Test the hypothesis above with newest NLI model

In [16]:
tokenizer_kwargs = {'truncation': True, 'max_length': 512}
#model_nli_name = "muhammadravi251001/fine-tuned-NLI-idk-mrc-nli-keep-with-xlm-roberta-large"
model_nli_name = "muhammadravi251001/fine-tuned-IndoNLI-Augmented-with-xlm-roberta-large-LR-1e-05"
nli_model = pipeline("text-classification", model=model_nli_name, tokenizer=model_nli_name, **tokenizer_kwargs)

In [17]:
nli_model({'text': "Bambang Pamungkas seorang pemain bola asal Jakarta", 
           'text_pair': "Bambang Pamungkas berasal dari Jakarta"})

{'label': 'entailment', 'score': 0.9908077120780945}

In [18]:
nli_model({'text': "Bambang Pamungkas seorang pemain bola asal Jakarta", 
           'text_pair': "Bambang Pamungkas bukan seorang pemain bola"})

{'label': 'contradiction', 'score': 0.9923650622367859}

In [19]:
nli_model({'text': "Bambang Pamungkas seorang pemain bola asal Jakarta", 
           'text_pair': "Bambang Pamungkas berasal dari Bandung"})

{'label': 'contradiction', 'score': 0.9802265167236328}

In [20]:
def add_label(data, message, nli_model=nli_model):
    
    data["Label from Prediction Answer"] = str()
    data["Label from Gold Answer"] = str()
    
    labels_pred_answer = []
    labels_gold_answer = []
    
    for i in tqdm(range(len(data))):
        
        premise = data['Context'][i]
        pred_hypo = data['Prediction Hypothesis'][i]
        gold_hypo = data['Gold Hypothesis'][i]
        
        if type(pred_hypo) != str and math.isnan(pred_hypo): pred_hypo = ""
        if type(gold_hypo) != str and math.isnan(gold_hypo): gold_hypo = ""
        
        label_pred_answer = nli_model({'text': premise, 'text_pair': pred_hypo})['label']
        labels_pred_answer.append(label_pred_answer)
        
        label_gold_answer = nli_model({'text': premise, 'text_pair': gold_hypo})['label']
        labels_gold_answer.append(label_gold_answer)
    
    data["Label from Prediction Answer"] = labels_pred_answer
    data["Label from Gold Answer"] = labels_gold_answer
    
    total_entailment_gold = data['Label from Gold Answer'].str.count('entailment').sum()
    total_neutral_gold = data['Label from Gold Answer'].str.count('neutral').sum()
    total_contradiction_gold = data['Label from Gold Answer'].str.count('contradiction').sum()
    
    total_entailment_pred = data['Label from Prediction Answer'].str.count('entailment').sum()
    total_neutral_pred = data['Label from Prediction Answer'].str.count('neutral').sum()
    total_contradiction_pred = data['Label from Prediction Answer'].str.count('contradiction').sum()
    
    total_label_gold = len(data[data['Label from Gold Answer'] != "NULL"])
    total_label_pred = len(data[data['Label from Prediction Answer'] != "NULL"])
    
    print(f"PREDICTION {message.upper()}")
    print(f"Total entailment: {total_entailment_pred} ({round(total_entailment_pred/total_label_pred, 2) * 100} %)")
    print(f"Total neutral: {total_neutral_pred} ({round(total_neutral_pred/total_label_pred, 2) * 100} %)")
    print(f"Total contradiction: {total_contradiction_pred} ({round(total_contradiction_pred/total_label_pred, 2) * 100} %)")
    print()
    print(f"GOLD {message.upper()}")
    print(f"Total entailment: {total_entailment_gold} ({round(total_entailment_gold/total_label_gold, 2) * 100} %)")
    print(f"Total neutral: {total_neutral_gold} ({round(total_neutral_gold/total_label_gold, 2) * 100} %)")
    print(f"Total contradiction: {total_contradiction_gold} ({round(total_contradiction_gold/total_label_gold, 2) * 100} %)")
    print()
    print(f"Total data: {len(data)}")
    
    return data

In [21]:
df_idkmrc = add_label(df_idkmrc, "idk-mrc")
df_idkmrc

100%|█████████████████████████████████████████████████████████████████████████████████| 327/327 [08:51<00:00,  1.62s/it]

PREDICTION IDK-MRC
Total entailment: 182 (56.00000000000001 %)
Total neutral: 84 (26.0 %)
Total contradiction: 61 (19.0 %)

GOLD IDK-MRC
Total entailment: 197 (60.0 %)
Total neutral: 93 (28.000000000000004 %)
Total contradiction: 37 (11.0 %)

Total data: 327





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan?,1959,1959,Kapan Komputer mikro mulai dikembangkan? 1959,Kapan Komputer mikro mulai dikembangkan? 1959,False,contradiction,contradiction
1,Setelah kepala Sebastianus dan Jovinus tiba di...,Kapan Raja Ataulf menikah?,Januari 414,Januari 414,Kapan Raja Ataulf menikah? Januari 414,Kapan Raja Ataulf menikah? Januari 414,False,neutral,neutral
2,Lontong kupang atau kupang lontong adalah nama...,Apakah kuliner yang terkenal dari daerah surab...,Lontong kupang,Lontong kupang,Apakah kuliner yang terkenal dari daerah surab...,Apakah kuliner yang terkenal dari daerah surab...,False,neutral,neutral
3,Karangkancana adalah sebuah kecamatan di Kabup...,Dimanakah letak Desa Karang kancana?,"Kabupaten Kuningan, Provinsi Jawa Barat, Indon...","Kabupaten Kuningan, Provinsi Jawa Barat, Indon...",Dimanakah letak Desa Karang kancana? Kabupaten...,Dimanakah letak Desa Karang kancana? Kabupaten...,False,neutral,neutral
4,Hamzah bin Abdul-Muththalib (Arabic: حمزه بن ع...,Kenapa Hamzah bin Abdul-Muththalib dijuluki si...,karena kepahlawanannya saat membela Islam,karena kepahlawanannya saat membela Islam,Kenapa Hamzah bin Abdul-Muththalib dijuluki si...,Kenapa Hamzah bin Abdul-Muththalib dijuluki si...,False,entailment,entailment
...,...,...,...,...,...,...,...,...,...
322,"Nepal nyaris berbentuk segi empat, dengan panj...",Berapa luas kota Nepal?,147.181 km2.,147.181 km2.,Berapa luas kota Nepal? 147.181 km2.,Berapa luas kota Nepal? 147.181 km2.,False,neutral,neutral
323,Radang (English: inflammation) adalah respon d...,Apakah yang dimaksud dengan radang?,respon dari suatu organisme terhadap patogen d...,respon dari suatu organisme terhadap patogen d...,Apakah yang dimaksud dengan radang? respon dar...,Apakah yang dimaksud dengan radang? respon dar...,False,entailment,entailment
324,Studi hubungan internasional sebagai teori sud...,Kapan Teori hubungan internasional diciptakan?,1939,1939,Kapan Teori hubungan internasional diciptakan?...,Kapan Teori hubungan internasional diciptakan?...,False,entailment,entailment
325,"Dalam melaksanakan kegiatan belajar-mengajar, ...",Berapa luas SMK Negeri 1 Cikampek?,28997m2,29095m2.,Berapa luas SMK Negeri 1 Cikampek? 28997m2,Berapa luas SMK Negeri 1 Cikampek? 29095m2.,False,contradiction,contradiction


In [22]:
df_tydiqaid = add_label(df_tydiqaid, "tydi-qa-id")
df_tydiqaid

100%|█████████████████████████████████████████████████████████████████████████████████| 668/668 [16:59<00:00,  1.53s/it]

PREDICTION TYDI-QA-ID
Total entailment: 400 (60.0 %)
Total neutral: 216 (32.0 %)
Total contradiction: 52 (8.0 %)

GOLD TYDI-QA-ID
Total entailment: 408 (61.0 %)
Total neutral: 215 (32.0 %)
Total contradiction: 45 (7.000000000000001 %)

Total data: 668





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer
0,Ernest Douwes Dekker wafat dini hari tanggal 2...,dimanakah Dr. Ernest François Eugène Douwes De...,"TMP Cikutra, Bandung",28 Agustus 1950,dimanakah Dr. Ernest François Eugène Douwes De...,dimanakah Dr. Ernest François Eugène Douwes De...,False,entailment,contradiction
1,"Pada tanggal 18 Februari 2008, desain Yoo Kerl...",Siapa arsitek Balai Kota Seoul?,Yoo Kerl,Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,False,neutral,neutral
2,Sebagai tindak lanjut Atlantic Charter tersebu...,Kapan PBB mulai terbentuk?,24 Oktober 1945,24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,False,entailment,entailment
3,"Lahir di Sheffield, South Yorkshire, Vardy mem...",Dimana Jamie Richard Vardy lahir?,"Sheffield, South Yorkshire","Sheffield, South Yorkshire","Dimana Jamie Richard Vardy lahir? Sheffield, S...","Dimana Jamie Richard Vardy lahir? Sheffield, S...",False,neutral,neutral
4,John Fitzgerald Kennedy lahir di 83 Beals Stre...,siapakah orang tua John Fitzgerald Kennedy?,"Joseph Patrick ""Joe"" Kennedy, Sr. (1888–1969) ...","Joseph Patrick ""Joe"" Kennedy, Sr. (1888–1969) ...",siapakah orang tua John Fitzgerald Kennedy? Jo...,siapakah orang tua John Fitzgerald Kennedy? Jo...,False,entailment,entailment
...,...,...,...,...,...,...,...,...,...
663,Raden Patah (Jawa: code: jav promoted to code:...,Siapa raja Demak pertama?,Raden Patah,Raden Patah,Siapa raja Demak pertama? Raden Patah,Siapa raja Demak pertama? Raden Patah,False,entailment,entailment
664,Laut dalam adalah lapisan terbawah dari lautan...,Berapakah kedalaman laut yang disebut dengan l...,1828 m,1828 m,Berapakah kedalaman laut yang disebut dengan l...,Berapakah kedalaman laut yang disebut dengan l...,False,entailment,entailment
665,"Justus Heurnius (lahir di Utrecht, Belanda, 15...",Kapan Justus Heurnius lahir?,1587,1587,Kapan Justus Heurnius lahir? 1587,Kapan Justus Heurnius lahir? 1587,False,entailment,entailment
666,Frekuensi suara atau frekuensi audio yaitu get...,Apakah yang dimaksud dengan frekuensi audio?,getaran frekuensi yang terdengar oleh manusia ...,getaran frekuensi yang terdengar oleh manusia ...,Apakah yang dimaksud dengan frekuensi audio? g...,Apakah yang dimaksud dengan frekuensi audio? g...,False,entailment,entailment


In [23]:
#df_squadid = add_label(df_squadid, "squad-id")
#df_squadid

In [24]:
def count_by_answer_and_label(data, message):
    
    data['properties'] = str()
    properties = []
    right_answer_and_entailment = 0
    right_answer_and_not_entailment = 0
    wrong_answer_and_entailment = 0
    wrong_answer_and_not_entailment = 0
    
    for i in tqdm(range(len(data))):
        
        pred_answer = data['Prediction Answer'][i]
        gold_answer = data['Gold Answer'][i]
        
        label_from_pred_answer = data['Label from Prediction Answer'][i]
        
        # For right answer and entailment label
        if pred_answer == gold_answer and label_from_pred_answer == 'entailment':
            right_answer_and_entailment += 1
            properties.append("Right answer and entailment label")
        
        # For right answer but not-entailment label
        elif pred_answer == gold_answer and label_from_pred_answer != 'entailment':
            right_answer_and_not_entailment += 1
            properties.append("Right answer and not-entailment label")
        
        # For wrong answer but entailment label
        elif pred_answer != gold_answer and label_from_pred_answer == 'entailment':
            wrong_answer_and_entailment += 1
            properties.append("Wrong answer and entailment label")
        
        # For wrong answer and not-entailment label
        elif pred_answer != gold_answer and label_from_pred_answer != 'entailment':
            wrong_answer_and_not_entailment += 1
            properties.append("Wrong answer and not-entailment label")
    
    data['properties'] = properties
    
    total_right_answer = right_answer_and_entailment + right_answer_and_not_entailment
    total_wrong_answer = wrong_answer_and_entailment + wrong_answer_and_not_entailment
    
    print(message.upper())
    if total_right_answer != 0:
        print(f"Right answer and Prediction hypothesis entailment label: {right_answer_and_entailment} ({round(right_answer_and_entailment/total_right_answer, 2) * 100}) %")
        print(f"Right answer and Prediction hypothesis not-entailment label: {right_answer_and_not_entailment} ({round(right_answer_and_not_entailment/total_right_answer, 2) * 100}) %")
    else:
        print(f"Right answer and Prediction hypothesis entailment label: 0 (0) %")
        print(f"Right answer and Prediction hypothesis not-entailment label: 0 (0) %")
    
    if total_wrong_answer != 0:
        print(f"Wrong answer and Prediction hypothesis entailment label: {wrong_answer_and_entailment} ({round(wrong_answer_and_entailment/total_wrong_answer, 2) * 100}) %")
        print(f"Wrong answer and Prediction hypothesis not-entailment label: {wrong_answer_and_not_entailment} ({round(wrong_answer_and_not_entailment/total_wrong_answer, 2) * 100}) %")
    else:
        print(f"Wrong answer and Prediction hypothesis entailment label: 0 (0) %")
        print(f"Wrong answer and Prediction hypothesis not-entailment label: 0 (0) %")
    
    return data

In [25]:
df_idkmrc = count_by_answer_and_label(df_idkmrc, "idk-mrc")
df_idkmrc

100%|██████████████████████████████████████████████████████████████████████████████| 327/327 [00:00<00:00, 90256.48it/s]

IDK-MRC
Right answer and Prediction hypothesis entailment label: 178 (67.0) %
Right answer and Prediction hypothesis not-entailment label: 89 (33.0) %
Wrong answer and Prediction hypothesis entailment label: 4 (7.000000000000001) %
Wrong answer and Prediction hypothesis not-entailment label: 56 (93.0) %





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer,properties
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan?,1959,1959,Kapan Komputer mikro mulai dikembangkan? 1959,Kapan Komputer mikro mulai dikembangkan? 1959,False,contradiction,contradiction,Right answer and not-entailment label
1,Setelah kepala Sebastianus dan Jovinus tiba di...,Kapan Raja Ataulf menikah?,Januari 414,Januari 414,Kapan Raja Ataulf menikah? Januari 414,Kapan Raja Ataulf menikah? Januari 414,False,neutral,neutral,Right answer and not-entailment label
2,Lontong kupang atau kupang lontong adalah nama...,Apakah kuliner yang terkenal dari daerah surab...,Lontong kupang,Lontong kupang,Apakah kuliner yang terkenal dari daerah surab...,Apakah kuliner yang terkenal dari daerah surab...,False,neutral,neutral,Right answer and not-entailment label
3,Karangkancana adalah sebuah kecamatan di Kabup...,Dimanakah letak Desa Karang kancana?,"Kabupaten Kuningan, Provinsi Jawa Barat, Indon...","Kabupaten Kuningan, Provinsi Jawa Barat, Indon...",Dimanakah letak Desa Karang kancana? Kabupaten...,Dimanakah letak Desa Karang kancana? Kabupaten...,False,neutral,neutral,Right answer and not-entailment label
4,Hamzah bin Abdul-Muththalib (Arabic: حمزه بن ع...,Kenapa Hamzah bin Abdul-Muththalib dijuluki si...,karena kepahlawanannya saat membela Islam,karena kepahlawanannya saat membela Islam,Kenapa Hamzah bin Abdul-Muththalib dijuluki si...,Kenapa Hamzah bin Abdul-Muththalib dijuluki si...,False,entailment,entailment,Right answer and entailment label
...,...,...,...,...,...,...,...,...,...,...
322,"Nepal nyaris berbentuk segi empat, dengan panj...",Berapa luas kota Nepal?,147.181 km2.,147.181 km2.,Berapa luas kota Nepal? 147.181 km2.,Berapa luas kota Nepal? 147.181 km2.,False,neutral,neutral,Right answer and not-entailment label
323,Radang (English: inflammation) adalah respon d...,Apakah yang dimaksud dengan radang?,respon dari suatu organisme terhadap patogen d...,respon dari suatu organisme terhadap patogen d...,Apakah yang dimaksud dengan radang? respon dar...,Apakah yang dimaksud dengan radang? respon dar...,False,entailment,entailment,Right answer and entailment label
324,Studi hubungan internasional sebagai teori sud...,Kapan Teori hubungan internasional diciptakan?,1939,1939,Kapan Teori hubungan internasional diciptakan?...,Kapan Teori hubungan internasional diciptakan?...,False,entailment,entailment,Right answer and entailment label
325,"Dalam melaksanakan kegiatan belajar-mengajar, ...",Berapa luas SMK Negeri 1 Cikampek?,28997m2,29095m2.,Berapa luas SMK Negeri 1 Cikampek? 28997m2,Berapa luas SMK Negeri 1 Cikampek? 29095m2.,False,contradiction,contradiction,Wrong answer and not-entailment label


In [26]:
df_tydiqaid = count_by_answer_and_label(df_tydiqaid, "tydi-qa-id")
df_tydiqaid

100%|██████████████████████████████████████████████████████████████████████████████| 668/668 [00:00<00:00, 94400.10it/s]

TYDI-QA-ID
Right answer and Prediction hypothesis entailment label: 370 (65.0) %
Right answer and Prediction hypothesis not-entailment label: 200 (35.0) %
Wrong answer and Prediction hypothesis entailment label: 30 (31.0) %
Wrong answer and Prediction hypothesis not-entailment label: 68 (69.0) %





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer,properties
0,Ernest Douwes Dekker wafat dini hari tanggal 2...,dimanakah Dr. Ernest François Eugène Douwes De...,"TMP Cikutra, Bandung",28 Agustus 1950,dimanakah Dr. Ernest François Eugène Douwes De...,dimanakah Dr. Ernest François Eugène Douwes De...,False,entailment,contradiction,Wrong answer and entailment label
1,"Pada tanggal 18 Februari 2008, desain Yoo Kerl...",Siapa arsitek Balai Kota Seoul?,Yoo Kerl,Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,False,neutral,neutral,Right answer and not-entailment label
2,Sebagai tindak lanjut Atlantic Charter tersebu...,Kapan PBB mulai terbentuk?,24 Oktober 1945,24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,False,entailment,entailment,Right answer and entailment label
3,"Lahir di Sheffield, South Yorkshire, Vardy mem...",Dimana Jamie Richard Vardy lahir?,"Sheffield, South Yorkshire","Sheffield, South Yorkshire","Dimana Jamie Richard Vardy lahir? Sheffield, S...","Dimana Jamie Richard Vardy lahir? Sheffield, S...",False,neutral,neutral,Right answer and not-entailment label
4,John Fitzgerald Kennedy lahir di 83 Beals Stre...,siapakah orang tua John Fitzgerald Kennedy?,"Joseph Patrick ""Joe"" Kennedy, Sr. (1888–1969) ...","Joseph Patrick ""Joe"" Kennedy, Sr. (1888–1969) ...",siapakah orang tua John Fitzgerald Kennedy? Jo...,siapakah orang tua John Fitzgerald Kennedy? Jo...,False,entailment,entailment,Right answer and entailment label
...,...,...,...,...,...,...,...,...,...,...
663,Raden Patah (Jawa: code: jav promoted to code:...,Siapa raja Demak pertama?,Raden Patah,Raden Patah,Siapa raja Demak pertama? Raden Patah,Siapa raja Demak pertama? Raden Patah,False,entailment,entailment,Right answer and entailment label
664,Laut dalam adalah lapisan terbawah dari lautan...,Berapakah kedalaman laut yang disebut dengan l...,1828 m,1828 m,Berapakah kedalaman laut yang disebut dengan l...,Berapakah kedalaman laut yang disebut dengan l...,False,entailment,entailment,Right answer and entailment label
665,"Justus Heurnius (lahir di Utrecht, Belanda, 15...",Kapan Justus Heurnius lahir?,1587,1587,Kapan Justus Heurnius lahir? 1587,Kapan Justus Heurnius lahir? 1587,False,entailment,entailment,Right answer and entailment label
666,Frekuensi suara atau frekuensi audio yaitu get...,Apakah yang dimaksud dengan frekuensi audio?,getaran frekuensi yang terdengar oleh manusia ...,getaran frekuensi yang terdengar oleh manusia ...,Apakah yang dimaksud dengan frekuensi audio? g...,Apakah yang dimaksud dengan frekuensi audio? g...,False,entailment,entailment,Right answer and entailment label


In [27]:
#df_squadid = count_by_answer_and_label(df_squadid, "squad-id")
#df_squadid