In [1]:
!nvidia-smi

Fri Sep  8 13:52:04 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   36C    P0    58W / 300W |  31956MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   44C    P0    98W / 300W |  17452MiB / 32510MiB |     27%      Default |
|       

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '6'

In [3]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
import pandas as pd
import numpy as np
import requests, string, re, collections, math
from tqdm import tqdm
from transformers import pipeline

# Download output of baseline

In [5]:
def remove_punctuation(text):
    return text.strip(string.punctuation)

def return_overlap_between_gold_and_pred(data):
    
    data['is_overlap'] = ""
    overlap_data_array = []
    
    for i in tqdm(range(len(data))):
        
        pred_answer = data['Prediction Answer'][i]
        gold_answer = data['Gold Answer'][i]
        
        if type(pred_answer) != str and math.isnan(pred_answer): pred_answer = ""
        if type(gold_answer) != str and math.isnan(gold_answer): gold_answer = ""
        
        pred_answer = remove_punctuation(str(pred_answer).lower().strip())
        gold_answer = remove_punctuation(str(gold_answer).lower().strip())
        
        pred_answer_words = set(remove_punctuation(text) for text in pred_answer.split())
        gold_answer_words = set(remove_punctuation(text) for text in gold_answer.split())
        
        if pred_answer_words.intersection(gold_answer_words):
            overlap_data_array.append(True)
        else:
            overlap_data_array.append(False)
            
    data['is_overlap'] = overlap_data_array
    return data

In [6]:
def convert_question_and_answer_to_hypothesis(data):
    for i in tqdm(range(len(data))):
        data['Prediction Hypothesis'] = data['Question'] + ' ' + data['Prediction Answer']
        data['Gold Hypothesis'] = data['Question'] + ' ' + data['Gold Answer']
    return data

In [7]:
def normalize_text(s):
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)
    def white_space_fix(text):
        return " ".join(text.split())
    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punc(lower(s))))

def return_acc_and_f1(data, message):
    em = 0
    f1_arr = []
    
    for i in tqdm(range(len(data))):
        
        pred = str(data['Prediction Answer'][i])
        gold = str(data['Gold Answer'][i])
        
        # for exact match
        if pred == gold:
            em += 1
        
        # for f1
        pred_tokens = normalize_text(pred).split()
        gold_tokens = normalize_text(gold).split()
        common = collections.Counter(pred_tokens) & collections.Counter(gold_tokens)
        num_same = sum(common.values()) # True positive

        if len(gold_tokens) == 0 or len(pred_tokens) == 0: 
            f1 = int(gold_tokens == pred_tokens)
            f1_arr.append(f1)
            continue

        if num_same == 0:
            f1 = 0
            f1_arr.append(f1)
            continue

        precision = 1.0 * num_same / len(pred_tokens)
        recall = 1.0 * num_same / len(gold_tokens)
        f1 = (2.0 * precision * recall) / (precision + recall)
        f1_arr.append(f1)
        
    em_final = em / len(data)
    f1_final = np.mean(f1_arr)
    
    print(message.upper())
    print("Final exact match:", round(em_final, 3))
    print("Average F1 score:", round(f1_final, 3))

In [8]:
df_squadid = pd.read_csv("output_squadid_df.csv")
df_squadid = df_squadid[["Context", "Question", "Prediction Answer", "Gold Answer"]]
df_squadid = convert_question_and_answer_to_hypothesis(df_squadid)
df_squadid = df_squadid.dropna(subset=['Gold Answer']).reset_index(drop=True)
df_squadid = return_overlap_between_gold_and_pred(df_squadid)
print(df_squadid['Prediction Answer'].isna().sum())
df_squadid

100%|████████████████████████████████████████████████████████████████████████████| 11962/11962 [01:20<00:00, 149.36it/s]
100%|██████████████████████████████████████████████████████████████████████████| 10888/10888 [00:00<00:00, 70744.76it/s]

384





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap
0,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Di negara apa Normandia berada?,Perancis,Perancis,Di negara apa Normandia berada? Perancis,Di negara apa Normandia berada? Perancis,True
1,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Kapan Normandia di Normandia?,abad ke-10 dan ke-11,-10 dan ke,Kapan Normandia di Normandia? abad ke-10 dan k...,Kapan Normandia di Normandia? -10 dan ke,True
2,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Dari negara mana asal Norse?,"Denmark, Islandia dan Norwegia","Denmark, Islandia dan Norwegia","Dari negara mana asal Norse? Denmark, Islandia...","Dari negara mana asal Norse? Denmark, Islandia...",True
3,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Siapa pemimpin Norse?,Rollo,Rollo,Siapa pemimpin Norse? Rollo,Siapa pemimpin Norse? Rollo,True
4,Bangsa Normandia (Norman: Nourmands; Prancis: ...,Abad berapa pertama kali Normandia mendapatkan...,abad ke-10,abad ke-10,Abad berapa pertama kali Normandia mendapatkan...,Abad berapa pertama kali Normandia mendapatkan...,True
...,...,...,...,...,...,...,...
10883,Hubungan antara kekuatan nonkonservatif makros...,Kekuatan konservatif sering dikaitkan dengan p...,panas,panas,Kekuatan konservatif sering dikaitkan dengan p...,Kekuatan konservatif sering dikaitkan dengan p...,True
10884,"Gaya pon memiliki padanan metrik, yang lebih j...",Seperti apa kekuatan kilogram yang kadang-kada...,kilopond,kilopond,Seperti apa kekuatan kilogram yang kadang-kada...,Seperti apa kekuatan kilogram yang kadang-kada...,True
10885,"Gaya pon memiliki padanan metrik, yang lebih j...",Apakah satuan massa yang sangat jarang digunak...,newton,siput,Apakah satuan massa yang sangat jarang digunak...,Apakah satuan massa yang sangat jarang digunak...,False
10886,"Gaya pon memiliki padanan metrik, yang lebih j...",Apa yang jarang menggunakan istilah satuan kek...,sthène,kip,Apa yang jarang menggunakan istilah satuan kek...,Apa yang jarang menggunakan istilah satuan kek...,False


In [9]:
return_acc_and_f1(df_squadid, "squad-id")

100%|██████████████████████████████████████████████████████████████████████████| 10888/10888 [00:00<00:00, 24999.51it/s]

SQUAD-ID
Final exact match: 0.538
Average F1 score: 0.726





In [10]:
url = "https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-IDK-MRC-with-xlm-roberta-large-without-ITTL-without-freeze-LR-1e-05/raw/main/results/output/output_df.csv"
response = requests.get(url)

if response.status_code == 200:
    with open("data.csv", "wb") as file:
        file.write(response.content)
    df_idkmrc = pd.read_csv("data.csv")
else: print("Download failed!")
    
df_idkmrc = df_idkmrc[["Context", "Question", "Prediction Answer", "Gold Answer"]]
df_idkmrc = convert_question_and_answer_to_hypothesis(df_idkmrc)
df_idkmrc = df_idkmrc.dropna(subset=['Gold Answer']).reset_index(drop=True)
df_idkmrc = return_overlap_between_gold_and_pred(df_idkmrc)
print(df_idkmrc['Prediction Answer'].isna().sum())
df_idkmrc

100%|████████████████████████████████████████████████████████████████████████████████| 848/848 [00:01<00:00, 647.69it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 422/422 [00:00<00:00, 35530.68it/s]

43





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan?,1959,1959,Kapan Komputer mikro mulai dikembangkan? 1959,Kapan Komputer mikro mulai dikembangkan? 1959,True
1,"Patronim, atau patronimik, adalah sebuah kompo...",Apakah pengertian matronimik?,komponen dari sebuah nama pribadi yang berdasa...,Komomene sebuah nama yang berdasarkan pada nam...,Apakah pengertian matronimik? komponen dari se...,Apakah pengertian matronimik? Komomene sebuah ...,True
2,"Ir. Basuki Tjahaja Purnama, M.M. (EYD: Basuki ...",Siapakah Basuki Tjahaja Purnama?,Gubernur DKI Jakarta,Gubernur DKI Jakarta yang menjabat sejak 19 No...,Siapakah Basuki Tjahaja Purnama? Gubernur DKI ...,Siapakah Basuki Tjahaja Purnama? Gubernur DKI ...,True
3,"Ir. Basuki Tjahaja Purnama, M.M. (EYD: Basuki ...",Siapakah Gubernur DKI Jakarta yang menjabat se...,Ir. Basuki Tjahaja Purnama,Basuki Tjahaja Purnama,Siapakah Gubernur DKI Jakarta yang menjabat se...,Siapakah Gubernur DKI Jakarta yang menjabat se...,True
4,Setelah kepala Sebastianus dan Jovinus tiba di...,Kapan Raja Ataulf menikah?,Januari 414,Januari 414,Kapan Raja Ataulf menikah? Januari 414,Kapan Raja Ataulf menikah? Januari 414,True
...,...,...,...,...,...,...,...
417,Studi hubungan internasional sebagai teori sud...,Kapan Teori hubungan internasional diciptakan?,1939,1939,Kapan Teori hubungan internasional diciptakan?...,Kapan Teori hubungan internasional diciptakan?...,True
418,"Dalam melaksanakan kegiatan belajar-mengajar, ...",Berapa luas SMK Negeri 1 Cikampek?,28997m2,29095m2.,Berapa luas SMK Negeri 1 Cikampek? 28997m2,Berapa luas SMK Negeri 1 Cikampek? 29095m2.,False
419,Ikan pari manta (Manta birostris) adalah salah...,Berapakah berat Ikan pari manta yag terbesar?,3 ton,3 ton,Berapakah berat Ikan pari manta yag terbesar? ...,Berapakah berat Ikan pari manta yag terbesar? ...,True
420,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa lu...",131.3 mil persegi,"Menurut Biro Sensus Amerika Serikat, kota ini ...","Menurut Biro Sensus Amerika Serikat, berapa lu...","Menurut Biro Sensus Amerika Serikat, berapa lu...",True


In [11]:
return_acc_and_f1(df_idkmrc, "idk-mrc")

100%|██████████████████████████████████████████████████████████████████████████████| 422/422 [00:00<00:00, 21765.01it/s]

IDK-MRC
Final exact match: 0.633
Average F1 score: 0.767





In [12]:
url = "https://huggingface.co/muhammadravi251001/fine-tuned-DatasetQAS-TYDI-QA-ID-with-xlm-roberta-large-without-ITTL-without-freeze-LR-1e-05/raw/main/results/output/output_df.csv"
response = requests.get(url)

if response.status_code == 200:
    with open("data.csv", "wb") as file:
        file.write(response.content)
    df_tydiqaid = pd.read_csv("data.csv")
else: print("Download failed!")
    
df_tydiqaid = df_tydiqaid[["Context", "Question", "Prediction Answer", "Gold Answer"]]
df_tydiqaid = convert_question_and_answer_to_hypothesis(df_tydiqaid)
df_tydiqaid = df_tydiqaid.dropna(subset=['Gold Answer']).reset_index(drop=True)
df_tydiqaid = return_overlap_between_gold_and_pred(df_tydiqaid)
print(df_tydiqaid['Prediction Answer'].isna().sum())
df_tydiqaid

100%|████████████████████████████████████████████████████████████████████████████████| 857/857 [00:01<00:00, 596.07it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 856/856 [00:00<00:00, 61375.16it/s]

21





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap
0,Ernest Douwes Dekker wafat dini hari tanggal 2...,dimanakah Dr. Ernest François Eugène Douwes De...,"TMP Cikutra, Bandung",28 Agustus 1950,dimanakah Dr. Ernest François Eugène Douwes De...,dimanakah Dr. Ernest François Eugène Douwes De...,False
1,"Pada tanggal 18 Februari 2008, desain Yoo Kerl...",Siapa arsitek Balai Kota Seoul?,Yoo Kerl,Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,Siapa arsitek Balai Kota Seoul? Yoo Kerl,True
2,Sebagai tindak lanjut Atlantic Charter tersebu...,Kapan PBB mulai terbentuk?,24 Oktober 1945,24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,Kapan PBB mulai terbentuk? 24 Oktober 1945,True
3,"Dia dipenjarakan di Puri Dragsholm, 75 kilomet...",Dimana James Hepburn meninggal?,"Puri Dragsholm, 75 kilometer Kopenhagen",Puri Dragsholm,Dimana James Hepburn meninggal? Puri Dragsholm...,Dimana James Hepburn meninggal? Puri Dragsholm,True
4,"Lahir di Sheffield, South Yorkshire, Vardy mem...",Dimana Jamie Richard Vardy lahir?,"Sheffield, South Yorkshire","Sheffield, South Yorkshire","Dimana Jamie Richard Vardy lahir? Sheffield, S...","Dimana Jamie Richard Vardy lahir? Sheffield, S...",True
...,...,...,...,...,...,...,...
851,Raden Patah (Jawa: code: jav promoted to code:...,Siapa raja Demak pertama?,Raden Patah,Raden Patah,Siapa raja Demak pertama? Raden Patah,Siapa raja Demak pertama? Raden Patah,True
852,Laut dalam adalah lapisan terbawah dari lautan...,Berapakah kedalaman laut yang disebut dengan l...,1828 m,1828 m,Berapakah kedalaman laut yang disebut dengan l...,Berapakah kedalaman laut yang disebut dengan l...,True
853,"Justus Heurnius (lahir di Utrecht, Belanda, 15...",Kapan Justus Heurnius lahir?,1587,1587,Kapan Justus Heurnius lahir? 1587,Kapan Justus Heurnius lahir? 1587,True
854,Frekuensi suara atau frekuensi audio yaitu get...,Apakah yang dimaksud dengan frekuensi audio?,getaran frekuensi yang terdengar oleh manusia ...,getaran frekuensi yang terdengar oleh manusia ...,Apakah yang dimaksud dengan frekuensi audio? g...,Apakah yang dimaksud dengan frekuensi audio? g...,True


In [13]:
return_acc_and_f1(df_tydiqaid, "tydi-qa-id")

100%|██████████████████████████████████████████████████████████████████████████████| 856/856 [00:00<00:00, 20464.22it/s]

TYDI-QA-ID
Final exact match: 0.666
Average F1 score: 0.784





# Count an overlapping pred answer and gold answer

In [14]:
print("SQuAD-ID")
print(df_squadid['is_overlap'].value_counts())
print()

print("IDK-MRC")
print(df_idkmrc['is_overlap'].value_counts())
print()

print("TyDI-QA-ID")
print(df_tydiqaid['is_overlap'].value_counts())

SQuAD-ID
True     9336
False    1552
Name: is_overlap, dtype: int64

IDK-MRC
True     362
False     60
Name: is_overlap, dtype: int64

TyDI-QA-ID
True     760
False     96
Name: is_overlap, dtype: int64


## Deleting row if is_overlap is True

In [15]:
df_squadid = df_squadid[df_squadid['is_overlap'] == False].reset_index(drop=True)
df_idkmrc = df_idkmrc[df_idkmrc['is_overlap'] == False].reset_index(drop=True)
df_tydiqaid = df_tydiqaid[df_tydiqaid['is_overlap'] == False].reset_index(drop=True)

# Test the hypothesis above with newest NLI model

In [16]:
tokenizer_kwargs = {'truncation': True, 'max_length': 512}
model_nli_name = "muhammadravi251001/fine-tuned-NLI-idk-mrc-nli-keep-with-xlm-roberta-large"
#model_nli_name = "muhammadravi251001/fine-tuned-IndoNLI-Augmented-with-xlm-roberta-large-LR-1e-05"
nli_model = pipeline("text-classification", model=model_nli_name, tokenizer=model_nli_name, **tokenizer_kwargs)

In [17]:
nli_model({'text': "Bambang Pamungkas seorang pemain bola asal Jakarta", 
           'text_pair': "Bambang Pamungkas berasal dari Jakarta"})

{'label': 'entailment', 'score': 0.9963834285736084}

In [18]:
nli_model({'text': "Bambang Pamungkas seorang pemain bola asal Jakarta", 
           'text_pair': "Bambang Pamungkas bukan seorang pemain bola"})

{'label': 'entailment', 'score': 0.9935256242752075}

In [19]:
nli_model({'text': "Bambang Pamungkas seorang pemain bola asal Jakarta", 
           'text_pair': "Bambang Pamungkas berasal dari Bandung"})

{'label': 'entailment', 'score': 0.9755388498306274}

In [20]:
def add_label(data, message, nli_model=nli_model):
    
    data["Label from Prediction Answer"] = str()
    data["Label from Gold Answer"] = str()
    
    labels_pred_answer = []
    labels_gold_answer = []
    
    for i in tqdm(range(len(data))):
        
        premise = data['Context'][i]
        pred_hypo = data['Prediction Hypothesis'][i]
        gold_hypo = data['Gold Hypothesis'][i]
        
        if type(pred_hypo) != str and math.isnan(pred_hypo): pred_hypo = ""
        if type(gold_hypo) != str and math.isnan(gold_hypo): gold_hypo = ""
        
        label_pred_answer = nli_model({'text': premise, 'text_pair': pred_hypo})['label']
        labels_pred_answer.append(label_pred_answer)
        
        label_gold_answer = nli_model({'text': premise, 'text_pair': gold_hypo})['label']
        labels_gold_answer.append(label_gold_answer)
    
    data["Label from Prediction Answer"] = labels_pred_answer
    data["Label from Gold Answer"] = labels_gold_answer
    
    total_entailment_gold = data['Label from Gold Answer'].str.count('entailment').sum()
    total_neutral_gold = data['Label from Gold Answer'].str.count('neutral').sum()
    total_contradiction_gold = data['Label from Gold Answer'].str.count('contradiction').sum()
    
    total_entailment_pred = data['Label from Prediction Answer'].str.count('entailment').sum()
    total_neutral_pred = data['Label from Prediction Answer'].str.count('neutral').sum()
    total_contradiction_pred = data['Label from Prediction Answer'].str.count('contradiction').sum()
    
    total_label_gold = len(data[data['Label from Gold Answer'] != "NULL"])
    total_label_pred = len(data[data['Label from Prediction Answer'] != "NULL"])
    
    print(f"PREDICTION {message.upper()}")
    print(f"Total entailment: {total_entailment_pred} ({round(total_entailment_pred/total_label_pred, 2) * 100} %)")
    print(f"Total neutral: {total_neutral_pred} ({round(total_neutral_pred/total_label_pred, 2) * 100} %)")
    print(f"Total contradiction: {total_contradiction_pred} ({round(total_contradiction_pred/total_label_pred, 2) * 100} %)")
    print()
    print(f"GOLD {message.upper()}")
    print(f"Total entailment: {total_entailment_gold} ({round(total_entailment_gold/total_label_gold, 2) * 100} %)")
    print(f"Total neutral: {total_neutral_gold} ({round(total_neutral_gold/total_label_gold, 2) * 100} %)")
    print(f"Total contradiction: {total_contradiction_gold} ({round(total_contradiction_gold/total_label_gold, 2) * 100} %)")
    print()
    print(f"Total data: {len(data)}")
    
    return data

In [21]:
df_idkmrc = add_label(df_idkmrc, "idk-mrc")
df_idkmrc

100%|███████████████████████████████████████████████████████████████████████████████████| 60/60 [02:17<00:00,  2.29s/it]

PREDICTION IDK-MRC
Total entailment: 59 (98.0 %)
Total neutral: 0 (0.0 %)
Total contradiction: 1 (2.0 %)

GOLD IDK-MRC
Total entailment: 53 (88.0 %)
Total neutral: 0 (0.0 %)
Total contradiction: 7 (12.0 %)

Total data: 60





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer
0,Para Redemptoris yang ditunjuk sebagai misiona...,Siapakah anak Bunda Maria Penolong Abadi?,,Yesus Kristus,,Siapakah anak Bunda Maria Penolong Abadi? Yesu...,False,entailment,entailment
1,Protista adalah mikroorganisme eukariota yang ...,Mengapa protista tidak dikelompokkan ke dalam ...,,"Dari sudut pandang taksonomi, pengelompokan in...",,Mengapa protista tidak dikelompokkan ke dalam ...,False,entailment,entailment
2,"Liu Ju (Chinese:劉據; 128-91 SM), secara resmi d...",Kapan Liu Ju meninggal?,,91 SM,,Kapan Liu Ju meninggal? 91 SM,False,entailment,entailment
3,New Orleans (/[invalid input: 'icon']njuː ˈɔːr...,Dimana letak Orléans?,Amerika Serikat,muara Sungai Mississippi,Dimana letak Orléans? Amerika Serikat,Dimana letak Orléans? muara Sungai Mississippi,False,entailment,entailment
4,Fadjroel Rachman lahir di Banjarmasin pada tan...,apakah pendidikan terakhir Mochamad Fadjroel R...,Magister Hukum,Pasca Sarjana,apakah pendidikan terakhir Mochamad Fadjroel R...,apakah pendidikan terakhir Mochamad Fadjroel R...,False,entailment,entailment
5,Penggunaan kata Halloween atau Hallowe'en bera...,Istilah Halloween apa yang sudah tidak ditemuk...,,All Hallows' Eve,,Istilah Halloween apa yang sudah tidak ditemuk...,False,entailment,entailment
6,"Terakhir, setelah diumumkan oleh BNPB pada 10 ...",Berapakah jumlah korban jiwa tsunami Sulawesi ...,,2.045,,Berapakah jumlah korban jiwa tsunami Sulawesi ...,False,entailment,entailment
7,"Baudouin adalah putra Eustace II, Comte Boulog...",Siapa ibu Baudouin I?,Godehilde (atau Godvera) de Toeni,Ide dari Lorraine,Siapa ibu Baudouin I? Godehilde (atau Godvera)...,Siapa ibu Baudouin I? Ide dari Lorraine,False,entailment,entailment
8,Kabupaten Takalar adalah sebuah kabupaten di p...,berapakah luas Takalar?,,"566,51km2",,"berapakah luas Takalar? 566,51km2",False,entailment,entailment
9,Kabupaten Takalar adalah sebuah kabupaten di p...,berapakah luas Kabupaten Takalar?,,"566,51km2",,"berapakah luas Kabupaten Takalar? 566,51km2",False,entailment,entailment


In [22]:
df_tydiqaid = add_label(df_tydiqaid, "tydi-qa-id")
df_tydiqaid

100%|███████████████████████████████████████████████████████████████████████████████████| 96/96 [03:15<00:00,  2.04s/it]

PREDICTION TYDI-QA-ID
Total entailment: 93 (97.0 %)
Total neutral: 0 (0.0 %)
Total contradiction: 3 (3.0 %)

GOLD TYDI-QA-ID
Total entailment: 92 (96.0 %)
Total neutral: 0 (0.0 %)
Total contradiction: 4 (4.0 %)

Total data: 96





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer
0,Ernest Douwes Dekker wafat dini hari tanggal 2...,dimanakah Dr. Ernest François Eugène Douwes De...,"TMP Cikutra, Bandung",28 Agustus 1950,dimanakah Dr. Ernest François Eugène Douwes De...,dimanakah Dr. Ernest François Eugène Douwes De...,False,entailment,entailment
1,Tian Zhao muda - (Byron Mann) Protagonis keemp...,Siapa yang mengetuai Operation Just Cause?,,Amerika Serikat,,Siapa yang mengetuai Operation Just Cause? Ame...,False,entailment,entailment
2,Menendez meninjunya dan mengambil senjatanya d...,Siapa yang mengetuai Operation Just Cause?,Mason dan Woods,Amerika Serikat,Siapa yang mengetuai Operation Just Cause? Mas...,Siapa yang mengetuai Operation Just Cause? Ame...,False,entailment,entailment
3,Manuls sering kali terlihat di padang rumput s...,Dimanakah Kucing Pallas pertama kali ditemukan?,,2008,,Dimanakah Kucing Pallas pertama kali ditemukan...,False,entailment,contradiction
4,"Penjajahan Utsmaniyah di Yunani, disebut juga ...",Kapan Dinasti Utsmaniyah mulai menguasai Yunani?,1360-an,abad ke-15,Kapan Dinasti Utsmaniyah mulai menguasai Yunan...,Kapan Dinasti Utsmaniyah mulai menguasai Yunan...,False,entailment,entailment
...,...,...,...,...,...,...,...,...,...
91,"Nuh membuat bahtera di padang pasir, ketika Tu...",apakah kelebihan/mukjizat nabi Yunus menurut I...,ketampanan luar biasa dan mampu mentakwilkan m...,bisa hidup di dalam perut ikan nun selama tiga...,apakah kelebihan/mukjizat nabi Yunus menurut I...,apakah kelebihan/mukjizat nabi Yunus menurut I...,False,entailment,entailment
92,"Kesultanan Utsmaniyah, nama resmi Daulat/Negar...",Kapan Utsmani berdiri?,1299,bawah,Kapan Utsmani berdiri? 1299,Kapan Utsmani berdiri? bawah,False,entailment,contradiction
93,Abstergo Industries adalah waralaba konglomera...,siapakah karakter antagonis di Assassin's Creed?,Abstergo Industries,Ksatria Templar,siapakah karakter antagonis di Assassin's Cree...,siapakah karakter antagonis di Assassin's Cree...,False,entailment,entailment
94,"Hikayat Genji(源氏物語,Genji Monogatari) atau Kisa...",Kapan Hikayat Genji ditulis?,1001,pertengahan zaman Heian,Kapan Hikayat Genji ditulis? 1001,Kapan Hikayat Genji ditulis? pertengahan zaman...,False,entailment,entailment


In [23]:
#df_squadid = add_label(df_squadid, "squad-id")
#df_squadid

In [24]:
def count_by_answer_and_label(data, message):
    
    data['properties'] = str()
    properties = []
    right_answer_and_entailment = 0
    right_answer_and_not_entailment = 0
    wrong_answer_and_entailment = 0
    wrong_answer_and_not_entailment = 0
    
    for i in tqdm(range(len(data))):
        
        pred_answer = data['Prediction Answer'][i]
        gold_answer = data['Gold Answer'][i]
        
        label_from_pred_answer = data['Label from Prediction Answer'][i]
        
        # For right answer and entailment label
        if pred_answer == gold_answer and label_from_pred_answer == 'entailment':
            right_answer_and_entailment += 1
            properties.append("Right answer and entailment label")
        
        # For right answer but not-entailment label
        elif pred_answer == gold_answer and label_from_pred_answer != 'entailment':
            right_answer_and_not_entailment += 1
            properties.append("Right answer and not-entailment label")
        
        # For wrong answer but entailment label
        elif pred_answer != gold_answer and label_from_pred_answer == 'entailment':
            wrong_answer_and_entailment += 1
            properties.append("Wrong answer and entailment label")
        
        # For wrong answer and not-entailment label
        elif pred_answer != gold_answer and label_from_pred_answer != 'entailment':
            wrong_answer_and_not_entailment += 1
            properties.append("Wrong answer and not-entailment label")
    
    data['properties'] = properties
    
    total_right_answer = right_answer_and_entailment + right_answer_and_not_entailment
    total_wrong_answer = wrong_answer_and_entailment + wrong_answer_and_not_entailment
    
    print(message.upper())
    if total_right_answer != 0:
        print(f"Right answer and Prediction hypothesis entailment label: {right_answer_and_entailment} ({round(right_answer_and_entailment/total_right_answer, 2) * 100}) %")
        print(f"Right answer and Prediction hypothesis not-entailment label: {right_answer_and_not_entailment} ({round(right_answer_and_not_entailment/total_right_answer, 2) * 100}) %")
    else:
        print(f"Right answer and Prediction hypothesis entailment label: 0 (0) %")
        print(f"Right answer and Prediction hypothesis not-entailment label: 0 (0) %")
    
    if total_wrong_answer != 0:
        print(f"Wrong answer and Prediction hypothesis entailment label: {wrong_answer_and_entailment} ({round(wrong_answer_and_entailment/total_wrong_answer, 2) * 100}) %")
        print(f"Wrong answer and Prediction hypothesis not-entailment label: {wrong_answer_and_not_entailment} ({round(wrong_answer_and_not_entailment/total_wrong_answer, 2) * 100}) %")
    else:
        print(f"Wrong answer and Prediction hypothesis entailment label: 0 (0) %")
        print(f"Wrong answer and Prediction hypothesis not-entailment label: 0 (0) %")
    
    return data

In [25]:
df_idkmrc = count_by_answer_and_label(df_idkmrc, "idk-mrc")
df_idkmrc

100%|████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 48517.11it/s]

IDK-MRC
Right answer and Prediction hypothesis entailment label: 0 (0) %
Right answer and Prediction hypothesis not-entailment label: 0 (0) %
Wrong answer and Prediction hypothesis entailment label: 59 (98.0) %
Wrong answer and Prediction hypothesis not-entailment label: 1 (2.0) %





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer,properties
0,Para Redemptoris yang ditunjuk sebagai misiona...,Siapakah anak Bunda Maria Penolong Abadi?,,Yesus Kristus,,Siapakah anak Bunda Maria Penolong Abadi? Yesu...,False,entailment,entailment,Wrong answer and entailment label
1,Protista adalah mikroorganisme eukariota yang ...,Mengapa protista tidak dikelompokkan ke dalam ...,,"Dari sudut pandang taksonomi, pengelompokan in...",,Mengapa protista tidak dikelompokkan ke dalam ...,False,entailment,entailment,Wrong answer and entailment label
2,"Liu Ju (Chinese:劉據; 128-91 SM), secara resmi d...",Kapan Liu Ju meninggal?,,91 SM,,Kapan Liu Ju meninggal? 91 SM,False,entailment,entailment,Wrong answer and entailment label
3,New Orleans (/[invalid input: 'icon']njuː ˈɔːr...,Dimana letak Orléans?,Amerika Serikat,muara Sungai Mississippi,Dimana letak Orléans? Amerika Serikat,Dimana letak Orléans? muara Sungai Mississippi,False,entailment,entailment,Wrong answer and entailment label
4,Fadjroel Rachman lahir di Banjarmasin pada tan...,apakah pendidikan terakhir Mochamad Fadjroel R...,Magister Hukum,Pasca Sarjana,apakah pendidikan terakhir Mochamad Fadjroel R...,apakah pendidikan terakhir Mochamad Fadjroel R...,False,entailment,entailment,Wrong answer and entailment label
5,Penggunaan kata Halloween atau Hallowe'en bera...,Istilah Halloween apa yang sudah tidak ditemuk...,,All Hallows' Eve,,Istilah Halloween apa yang sudah tidak ditemuk...,False,entailment,entailment,Wrong answer and entailment label
6,"Terakhir, setelah diumumkan oleh BNPB pada 10 ...",Berapakah jumlah korban jiwa tsunami Sulawesi ...,,2.045,,Berapakah jumlah korban jiwa tsunami Sulawesi ...,False,entailment,entailment,Wrong answer and entailment label
7,"Baudouin adalah putra Eustace II, Comte Boulog...",Siapa ibu Baudouin I?,Godehilde (atau Godvera) de Toeni,Ide dari Lorraine,Siapa ibu Baudouin I? Godehilde (atau Godvera)...,Siapa ibu Baudouin I? Ide dari Lorraine,False,entailment,entailment,Wrong answer and entailment label
8,Kabupaten Takalar adalah sebuah kabupaten di p...,berapakah luas Takalar?,,"566,51km2",,"berapakah luas Takalar? 566,51km2",False,entailment,entailment,Wrong answer and entailment label
9,Kabupaten Takalar adalah sebuah kabupaten di p...,berapakah luas Kabupaten Takalar?,,"566,51km2",,"berapakah luas Kabupaten Takalar? 566,51km2",False,entailment,entailment,Wrong answer and entailment label


In [26]:
df_tydiqaid = count_by_answer_and_label(df_tydiqaid, "tydi-qa-id")
df_tydiqaid

100%|████████████████████████████████████████████████████████████████████████████████| 96/96 [00:00<00:00, 55668.90it/s]

TYDI-QA-ID
Right answer and Prediction hypothesis entailment label: 0 (0) %
Right answer and Prediction hypothesis not-entailment label: 0 (0) %
Wrong answer and Prediction hypothesis entailment label: 93 (97.0) %
Wrong answer and Prediction hypothesis not-entailment label: 3 (3.0) %





Unnamed: 0,Context,Question,Prediction Answer,Gold Answer,Prediction Hypothesis,Gold Hypothesis,is_overlap,Label from Prediction Answer,Label from Gold Answer,properties
0,Ernest Douwes Dekker wafat dini hari tanggal 2...,dimanakah Dr. Ernest François Eugène Douwes De...,"TMP Cikutra, Bandung",28 Agustus 1950,dimanakah Dr. Ernest François Eugène Douwes De...,dimanakah Dr. Ernest François Eugène Douwes De...,False,entailment,entailment,Wrong answer and entailment label
1,Tian Zhao muda - (Byron Mann) Protagonis keemp...,Siapa yang mengetuai Operation Just Cause?,,Amerika Serikat,,Siapa yang mengetuai Operation Just Cause? Ame...,False,entailment,entailment,Wrong answer and entailment label
2,Menendez meninjunya dan mengambil senjatanya d...,Siapa yang mengetuai Operation Just Cause?,Mason dan Woods,Amerika Serikat,Siapa yang mengetuai Operation Just Cause? Mas...,Siapa yang mengetuai Operation Just Cause? Ame...,False,entailment,entailment,Wrong answer and entailment label
3,Manuls sering kali terlihat di padang rumput s...,Dimanakah Kucing Pallas pertama kali ditemukan?,,2008,,Dimanakah Kucing Pallas pertama kali ditemukan...,False,entailment,contradiction,Wrong answer and entailment label
4,"Penjajahan Utsmaniyah di Yunani, disebut juga ...",Kapan Dinasti Utsmaniyah mulai menguasai Yunani?,1360-an,abad ke-15,Kapan Dinasti Utsmaniyah mulai menguasai Yunan...,Kapan Dinasti Utsmaniyah mulai menguasai Yunan...,False,entailment,entailment,Wrong answer and entailment label
...,...,...,...,...,...,...,...,...,...,...
91,"Nuh membuat bahtera di padang pasir, ketika Tu...",apakah kelebihan/mukjizat nabi Yunus menurut I...,ketampanan luar biasa dan mampu mentakwilkan m...,bisa hidup di dalam perut ikan nun selama tiga...,apakah kelebihan/mukjizat nabi Yunus menurut I...,apakah kelebihan/mukjizat nabi Yunus menurut I...,False,entailment,entailment,Wrong answer and entailment label
92,"Kesultanan Utsmaniyah, nama resmi Daulat/Negar...",Kapan Utsmani berdiri?,1299,bawah,Kapan Utsmani berdiri? 1299,Kapan Utsmani berdiri? bawah,False,entailment,contradiction,Wrong answer and entailment label
93,Abstergo Industries adalah waralaba konglomera...,siapakah karakter antagonis di Assassin's Creed?,Abstergo Industries,Ksatria Templar,siapakah karakter antagonis di Assassin's Cree...,siapakah karakter antagonis di Assassin's Cree...,False,entailment,entailment,Wrong answer and entailment label
94,"Hikayat Genji(源氏物語,Genji Monogatari) atau Kisa...",Kapan Hikayat Genji ditulis?,1001,pertengahan zaman Heian,Kapan Hikayat Genji ditulis? 1001,Kapan Hikayat Genji ditulis? pertengahan zaman...,False,entailment,entailment,Wrong answer and entailment label


In [27]:
#df_squadid = count_by_answer_and_label(df_squadid, "squad-id")
#df_squadid