In [1]:
!nvidia-smi

Sun Nov  5 03:09:16 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   36C    P0    62W / 300W |      3MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   44C    P0   108W / 300W |  17094MiB / 32510MiB |     29%      Default |
|       

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['TRANSFORMERS_NO_ADVISORY_WARNINGS'] = 'true'

import transformers
import evaluate
import torch
import operator
import re
import sys
import collections
import string
import contextlib

import numpy as np
import pandas as pd
import torch.nn as nn

from multiprocessing import cpu_count
from nusacrowd import NusantaraConfigHelper
from datetime import datetime
from tqdm import tqdm
from deep_translator import GoogleTranslator
from huggingface_hub import HfApi, create_repo

from datasets import (
    load_dataset, 
    Dataset,
    DatasetDict
)
from transformers import (
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
    BertForQuestionAnswering,
    AutoTokenizer,
    EarlyStoppingCallback,
    AutoModelForQuestionAnswering,
    pipeline
)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")

In [4]:
MODEL_NAME = 'xlm-roberta-large'
TYPE_QAS = "entailment_only"
TYPE_SMOOTHING = "just_concat_answer_and_question"
MAXIMUM_SEARCH_ITER = 3
VARIATION = 3
THRESHOLD = 0.5
MODEL_SC_NAME = "muhammadravi251001/fine-tuned-NLI-indonli-with-xlm-roberta-large"
MODEL_QA_NAME = "muhammadravi251001/fine-tuned-DatasetQAS-IDK-MRC-with-xlm-roberta-large-without-ITTL-without-freeze-LR-1e-05"

USER = "muhammadravi251001"   
MODEL_TG_IND_NAME = "Wikidepia/IndoT5-base-paraphrase"
MODEL_TG_ENG_NAME = "humarin/chatgpt_paraphraser_on_T5_base"
MODEL_NER_NAME = "ageng-anugrah/indobert-large-p2-finetuned-ner"
MAX_LENGTH = 512
STRIDE = 128
LOGGING_STEPS = 50
WARMUP_RATIO = 0.0
WEIGHT_DECAY = 0.0
EVAL_STEPS_RATIO = 0.5
SAMPLE = sys.maxsize

In [5]:
conhelps = NusantaraConfigHelper()
data_qas_id = conhelps.filtered(lambda x: 'idk_mrc' in x.dataset_name)[0].load_dataset()

df_train = pd.DataFrame(data_qas_id['train'])
df_validation = pd.DataFrame(data_qas_id['validation'])
df_test = pd.DataFrame(data_qas_id['test'])

cols = ['context', 'question', 'answer']
new_df_train = pd.DataFrame(columns=cols)

for i in tqdm(range(len(df_train['context']))):
    for j in df_train["qas"][i]:
        if len(j['answers']) != 0:
            new_df_train = new_df_train.append({'context': df_train["context"][i], 
                                                'question': j['question'], 
                                                'answer': {"text": j['answers'][0]['text'], 
                                                           "answer_start": j['answers'][0]['answer_start'], 
                                                           "answer_end": j['answers'][0]['answer_start'] + len(j['answers'][0]['text'])}}, 
                                                           ignore_index=True)
        else:
            new_df_train = new_df_train.append({'context': df_train["context"][i], 
                                                'question': j['question'], 
                                                'answer': {"text": str(), 
                                                           "answer_start": 0, 
                                                           "answer_end": 0}}, 
                                                           ignore_index=True)

cols = ['context', 'question', 'answer']
new_df_val = pd.DataFrame(columns=cols)

for i in tqdm(range(len(df_validation['context']))):
    for j in df_validation["qas"][i]:
        if len(j['answers']) != 0:
            new_df_val = new_df_val.append({'context': df_validation["context"][i], 
                                            'question': j['question'], 
                                            'answer': {"text": j['answers'][0]['text'], 
                                                       "answer_start": j['answers'][0]['answer_start'], 
                                                       "answer_end": j['answers'][0]['answer_start'] + len(j['answers'][0]['text'])}}, 
                                                       ignore_index=True)
        else:
            new_df_val = new_df_val.append({'context': df_validation["context"][i], 
                                            'question': j['question'], 
                                            'answer': {"text": str(), 
                                                       "answer_start": 0, 
                                                       "answer_end": 0}}, 
                                                       ignore_index=True)        

cols = ['context', 'question', 'answer']
new_df_test = pd.DataFrame(columns=cols)

for i in tqdm(range(len(df_test['context']))):
    for j in df_test["qas"][i]:
        if len(j['answers']) != 0:
            new_df_test = new_df_test.append({'context': df_test["context"][i], 
                                            'question': j['question'], 
                                            'answer': {"text": j['answers'][0]['text'], 
                                                       "answer_start": j['answers'][0]['answer_start'], 
                                                       "answer_end": j['answers'][0]['answer_start'] + len(j['answers'][0]['text'])}}, 
                                                       ignore_index=True)
        else:
            new_df_test = new_df_test.append({'context': df_test["context"][i], 
                                            'question': j['question'], 
                                            'answer': {"text": str(), 
                                                       "answer_start": 0, 
                                                       "answer_end": 0}}, 
                                                       ignore_index=True)

train_dataset = Dataset.from_dict(new_df_train)
validation_dataset = Dataset.from_dict(new_df_val)
test_dataset = Dataset.from_dict(new_df_test)

data_qas = DatasetDict({"train": train_dataset, "validation": validation_dataset, "test": test_dataset})



  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████████████████████████████████████████████████████████████████████████| 3659/3659 [00:16<00:00, 221.65it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 358/358 [00:01<00:00, 289.04it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 378/378 [00:01<00:00, 268.65it/s]


In [6]:
test_data = pd.DataFrame(data_qas['test'])
test_data

Unnamed: 0,context,question,answer
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan ?,"{'answer_end': 198, 'answer_start': 194, 'text..."
1,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan komputer mikro mulai ditinggalkan?,"{'answer_end': 0, 'answer_start': 0, 'text': ''}"
2,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan Amerik...,"{'answer_end': 0, 'answer_start': 0, 'text': ''}"
3,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer makro mulai dikembangkan?,"{'answer_end': 0, 'answer_start': 0, 'text': ''}"
4,"Patronim, atau patronimik, adalah sebuah kompo...",Apakah pengertian matronimik?,"{'answer_end': 224, 'answer_start': 153, 'text..."
...,...,...,...
839,Umur dewasa dari drummer ganda adalah sekitar ...,Bagaimana cara jangkrik betina hidup?,"{'answer_end': 0, 'answer_start': 0, 'text': ''}"
840,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa lu...","{'answer_end': 555, 'answer_start': 466, 'text..."
841,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa vo...","{'answer_end': 0, 'answer_start': 0, 'text': ''}"
842,"Tanpa beasiswa, Ogilvy tidak bisa kuliah di Fe...",Apa alasan Ogilvy tidak bisa kuliah di Fettes ...,"{'answer_end': 147, 'answer_start': 16, 'text'..."


In [7]:
tokenizer_kwargs = {'truncation': True, 'max_length': 512}

nlp_qa = pipeline(task="question-answering", model=MODEL_QA_NAME, tokenizer=MODEL_QA_NAME, 
                device=torch.cuda.current_device(), **tokenizer_kwargs)

nlp_ner = pipeline(task="ner", model=MODEL_NER_NAME, tokenizer=MODEL_NER_NAME,
                  device=torch.cuda.current_device())

nlp_sc = pipeline(task="text-classification", model=MODEL_SC_NAME, tokenizer=MODEL_SC_NAME, 
                device=torch.cuda.current_device(), **tokenizer_kwargs)

nlp_tg_ind = pipeline(task="text2text-generation", model=MODEL_TG_IND_NAME, tokenizer=MODEL_TG_IND_NAME, 
              device=torch.cuda.current_device(), **tokenizer_kwargs)

nlp_tg_eng = pipeline(task="text2text-generation", model=MODEL_TG_ENG_NAME, tokenizer=MODEL_TG_ENG_NAME, 
              device=torch.cuda.current_device(), **tokenizer_kwargs)

In [8]:
def retrieve_answer_text(data):
    for i in range(len(data)):
        data['answer'][i] = data['answer'][i]['text']
    return data

test_data = retrieve_answer_text(test_data)

In [9]:
def assign_answer_types(answer, nlp=nlp_ner):

    if answer == str(): 
        return ["NULL"]

    entity_array = []    
    ner_result = nlp(answer)

    for i in ner_result:
        entity = i['entity'][2:]
        entity_array.append(entity)

    if entity_array == []: 
        return ["NULL"]

    return list(set(entity_array))

In [10]:
def smoothing(question, answer, type):
    if type == 'just_concat_answer_and_question':
        hypothesis = f"{question} {answer}"
    return hypothesis.strip()

In [11]:
def retrieve_answer_text_from_list(data):
    answer_text_array = []
    for i in range(len(data)):
        answer_text_array.append(data[i]['answer'])
    return answer_text_array

x = nlp_qa(context="Dalam melaksanakan kegiatan belajar-mengajar, SMK Negeri 1 Cikampek menempati 3 buah kampus yang saling terpisah satu sama lain. Seluruh kegiatan pembelajaran teori dan kegiatan produktif dilaksanakan di Kampus Utama SMK Negeri 1 Cikampek di Jalan Raya Sukamanah Desa Cikampek Barat, Cikampek, Karawang. Terkecuali untuk pembelajaran bagi peserta program keahlian Agribisnis Tanaman Pangan dan Holtikultura yang dilaksanakan di Kampus Pertanian yang berlokasi di Jalan Raya Parakan-Cikampek Desa Pucung, Kotabaru, Karawang. Selain dua kampus yang berupa bangunan milik sekolah, tempat kegiatan belajar-mengajar untuk kelas XIII Program Keahlian Teknik Otomasi Industri yang ditempatkan di beberapa perusahaan yang menerapkan teknik otomasi. Kampus terbesar yang dimiliki oleh SMK Negeri 1 Cikampek adalah Kampus Utama, yang memiliki fasilitas berupa 6 buah bengkel[7] dan 3 buah laboratoriumanalisis kimia dan mikrobiologi.[11] Luas Kampus Utama SMK Negeri 1 Cikampek kini adalah 28997m2, dan jika di tambah dengan luas Kampus Agribisnis, maka luas keseluruhan kampus milik SMK Negeri 1 Cikampek adalah 29095m2.[16]", question="Berapa luas SMK Negeri 1 Cikampek?", top_k = 3)
y = retrieve_answer_text_from_list(x)
y

['28997m2,', '29095m2.[16]', '28997m2,']

In [12]:
def create_df_with_prediction(df):
    
    def retrieve_answer_text_from_list(arr):
        
        if type(arr) != list:
            arr = [arr]
        
        answer_text_array = []
        for i in range(len(arr)):
            answer_text_array.append(arr[i]['answer'])
        
        assert type(answer_text_array) == list
        return answer_text_array
    
    def smoothing_from_list(pred_answer_arr, question):
        
        hypothesis_array = []
        for answer in pred_answer_arr:
            hypothesis_array.append(smoothing(question, answer, TYPE_SMOOTHING))
            
        assert type(hypothesis_array) == list
        return hypothesis_array
    
    def retrieve_label_from_list(pred_hypothesis_arr, context):
        
        label_array = []
        for hypothesis in pred_hypothesis_arr:
            pred_label = nlp_sc({'text': context, 'text_pair': hypothesis}, **tokenizer_kwargs)
            label_array.append(pred_label)
        assert type(label_array) == list
        return label_array
    
    pred_answer_array = []
    pred_hypothesis_array = []
    pred_label_array = []
    
    gold_hypothesis_array = []
    answer_types_array = []
    
    for i in tqdm(range(len(df))):
        
        context = df['context'][i]
        question = df['question'][i]
        gold_answer = df['answer'][i]
        
        answer_types_array.append(assign_answer_types(answer=gold_answer))
        gold_hypothesis_array.append(smoothing(question, gold_answer, TYPE_SMOOTHING))
        
        pred_answer = retrieve_answer_text_from_list(nlp_qa(context=context, 
                                                                       question=question, 
                                                                       top_k=MAXIMUM_SEARCH_ITER))
        
        pred_hypothesis = smoothing_from_list(pred_answer, question)
        pred_label = retrieve_label_from_list(pred_hypothesis, context)
        
        pred_answer_array.append(pred_answer)
        pred_hypothesis_array.append(pred_hypothesis)
        pred_label_array.append(pred_label)
        
    df = pd.DataFrame({
                        'Context': df['context'], 
                        'Question': df['question'], 

                        'Rec. Pred Answer': pred_answer_array,
                        'Rec. Pred Hypothesis': pred_hypothesis_array,
                        'Rec. Pred Label': pred_label_array,

                        'Gold Answer': df ['answer'],
                        'Gold Hypothesis': gold_hypothesis_array,

                        'Gold Answer Type': answer_types_array,
                        'Reasoning Type': '-',
                        
                        'Properties': '-',
                        })
    
    return df

In [13]:
qas_df = create_df_with_prediction(test_data)
qas_df

  0%|                                                                                           | 0/844 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|█████████████████████████████████████████████████████████████████████████████████| 844/844 [01:16<00:00, 11.10it/s]


Unnamed: 0,Context,Question,Rec. Pred Answer,Rec. Pred Hypothesis,Rec. Pred Label,Gold Answer,Gold Hypothesis,Gold Answer Type,Reasoning Type,Properties
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan ?,"[1959, 1959 serta microprosesor yang pertama k...",[Kapan Komputer mikro mulai dikembangkan ? 195...,"[{'label': 'entailment', 'score': 0.7981643676...",1959,Kapan Komputer mikro mulai dikembangkan ? 1959,[NULL],-,-
1,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan komputer mikro mulai ditinggalkan?,"[Ada, Ada beberapa inovasi dari GAN, Ada beber...","[Kapan komputer mikro mulai ditinggalkan? Ada,...","[{'label': 'neutral', 'score': 0.8639260530471...",,Kapan komputer mikro mulai ditinggalkan?,[NULL],-,-
2,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan Amerik...,"[Ada, Ada beberapa inovasi dari GAN, Ada beber...",[Kapan Komputer mikro mulai dikembangkan Ameri...,"[{'label': 'neutral', 'score': 0.6503812074661...",,Kapan Komputer mikro mulai dikembangkan Amerik...,[NULL],-,-
3,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer makro mulai dikembangkan?,"[Ada, Ada beberapa inovasi dari GAN, 1959]","[Kapan Komputer makro mulai dikembangkan? Ada,...","[{'label': 'neutral', 'score': 0.6094996333122...",,Kapan Komputer makro mulai dikembangkan?,[NULL],-,-
4,"Patronim, atau patronimik, adalah sebuah kompo...",Apakah pengertian matronimik?,[Komomene sebuah nama yang berdasarkan pada na...,[Apakah pengertian matronimik? Komomene sebuah...,"[{'label': 'entailment', 'score': 0.9266768693...",Komomene sebuah nama yang berdasarkan pada nam...,Apakah pengertian matronimik? Komomene sebuah ...,[NULL],-,-
...,...,...,...,...,...,...,...,...,...,...
839,Umur dewasa dari drummer ganda adalah sekitar ...,Bagaimana cara jangkrik betina hidup?,"[bereproduksi, dan hanya memakan getah pohon h...",[Bagaimana cara jangkrik betina hidup? berepro...,"[{'label': 'entailment', 'score': 0.4965691864...",,Bagaimana cara jangkrik betina hidup?,[NULL],-,-
840,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa lu...","[131.3 mil persegi, 131.3 mil persegi (340.0 k...","[Menurut Biro Sensus Amerika Serikat, berapa l...","[{'label': 'entailment', 'score': 0.9700118303...","Menurut Biro Sensus Amerika Serikat, kota ini ...","Menurut Biro Sensus Amerika Serikat, berapa lu...",[PLACE],-,-
841,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa vo...","[Kota, Kota ini terletak di sebuah lembah sung...","[Menurut Biro Sensus Amerika Serikat, berapa v...","[{'label': 'entailment', 'score': 0.5316853523...",,"Menurut Biro Sensus Amerika Serikat, berapa vo...",[NULL],-,-
842,"Tanpa beasiswa, Ogilvy tidak bisa kuliah di Fe...",Apa alasan Ogilvy tidak bisa kuliah di Fettes ...,[bisnis ayahnya terkena dampak depresi perteng...,[Apa alasan Ogilvy tidak bisa kuliah di Fettes...,"[{'label': 'entailment', 'score': 0.9075763821...",Ogilvy tidak bisa kuliah di Fettes atau Oxford...,Apa alasan Ogilvy tidak bisa kuliah di Fettes ...,"[PLACE, PERSON]",-,-


In [14]:
def create_df_with_final_answer_by_variation(df, type_qas=TYPE_QAS, variation=1, threshold=None):
    
    def search_final_answer_by_variation(label, type_qas=TYPE_QAS, variation=1, threshold=None):
        
        if variation == 1 and threshold is None:
            
            if type_qas == 'entailment_only':

                for i, prediction in enumerate(label):
                    if prediction['label'] == 'entailment':
                        return i
                return None

            elif type_qas == 'entailment_or_neutral':

                for i, prediction in enumerate(label):
                    if prediction['label'] == 'entailment' or prediction['label'] == 'neutral':
                        return i
                return None
        
        elif (variation == 2 or variation == 3) and threshold is not None:
            
            if type_qas == 'entailment_only':

                for i, prediction in enumerate(label):
                    if prediction['label'] == 'entailment' and prediction['score'] >= threshold:
                        return i
                return None

            elif type_qas == 'entailment_or_neutral':

                for i, prediction in enumerate(label):
                    if (prediction['label'] == 'entailment' or prediction['label'] == 'neutral')\
                        and prediction['score'] >= threshold:
                        return i
                return None

    def take_highest_prob(label, type_qas=TYPE_QAS, variation=3, threshold=0.5):
    
        entailments = [item for item in label if item['label'] == 'entailment']
        if entailments:
            best_entailment = max(entailments, key=lambda x: x['score'])
            pred_answer_index = label.index(best_entailment)
            return pred_answer_index
            
        neutrals = [item for item in label if item['label'] == 'neutral']
        if neutrals:
            best_neutral = max(entailments, key=lambda x: x['score'])
            pred_answer_index = label.index(best_neutral)
            return pred_answer_index
            
        contradictions = [item for item in label if item['label'] == 'contradiction']
        if contradictions:
            smallest_contradiction = min(contradictions, key=lambda x: x['score'])
            pred_answer_index = label.index(smallest_contradiction)
            return pred_answer_index

    pred_answer_array = []
    pred_hypothesis_array = []
    pred_label_array = []
    properties_array = []
    
    for i in range(len(df)):
        
        if variation == 1 and not threshold:
            
            index = search_final_answer_by_variation(df['Rec. Pred Label'][i], variation=1, threshold=False)
            
            if index is not None:
                properties = "Answer saved from variation 1 with founded index"
                pred_answer = df['Rec. Pred Answer'][i][index]
                pred_label = df['Rec. Pred Label'][i][index]
            
            else:
                properties = "(Blank) answer saved from variation 1 without founded index"
                pred_answer = ""
                pred_label = ""
        
        elif variation == 2 and threshold:
            
            index = search_final_answer_by_variation(df['Rec. Pred Label'][i], variation=2, threshold=threshold)
            
            if index is not None:
                properties = "Answer saved from variation 2 with founded index"
                pred_answer = df['Rec. Pred Answer'][i][index]
                pred_label = df['Rec. Pred Label'][i][index]
            
            else:
                properties = "(Blank) answer saved from variation 2 without founded index"
                pred_answer = ""
                pred_label = ""
        
        elif variation == 3 and threshold:
            
            index = search_final_answer_by_variation(df['Rec. Pred Label'][i], variation=3, threshold=threshold)
            
            if index is not None:
                properties = "Answer saved from variation 3 with founded index"
                pred_answer = df['Rec. Pred Answer'][i][index]
                pred_label = df['Rec. Pred Label'][i][index]
            
            else:           
                properties = "Highest probability answer saved from variation 3 without founded index"
                highest_prob_idx = take_highest_prob(df['Rec. Pred Label'][i])
                pred_answer = df['Rec. Pred Answer'][i][highest_prob_idx]
                pred_label = df['Rec. Pred Label'][i][highest_prob_idx]
                
        pred_hypothesis = smoothing(df['Question'][i], pred_answer, TYPE_SMOOTHING)
            
        pred_answer_array.append(pred_answer)
        pred_hypothesis_array.append(pred_hypothesis)
        pred_label_array.append(pred_label)
        properties_array.append(properties)
        
    df = pd.DataFrame({
                        'Context': df['Context'], 
                        'Question': df['Question'], 
        
                        'Prediction Answer': pred_answer_array,
                        'Prediction Hypothesis': pred_hypothesis_array,
                        'Prediction Label': pred_label_array,

                        'Rec. Pred Answer': df['Rec. Pred Answer'],
                        'Rec. Pred Hypothesis': df['Rec. Pred Hypothesis'],
                        'Rec. Pred Label': df['Rec. Pred Label'],

                        'Gold Answer': df['Gold Answer'],
                        'Gold Hypothesis': df['Gold Hypothesis'],

                        'Gold Answer Type': df['Gold Answer Type'],
                        'Reasoning Type': df['Reasoning Type'],
                        
                        'Properties': properties_array,
                        })
    
    return df

In [15]:
qas_df = create_df_with_final_answer_by_variation(qas_df, variation=3, threshold=0.50)
qas_df

Unnamed: 0,Context,Question,Prediction Answer,Prediction Hypothesis,Prediction Label,Rec. Pred Answer,Rec. Pred Hypothesis,Rec. Pred Label,Gold Answer,Gold Hypothesis,Gold Answer Type,Reasoning Type,Properties
0,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan ?,1959,Kapan Komputer mikro mulai dikembangkan ? 1959,"{'label': 'entailment', 'score': 0.79816436767...","[1959, 1959 serta microprosesor yang pertama k...",[Kapan Komputer mikro mulai dikembangkan ? 195...,"[{'label': 'entailment', 'score': 0.7981643676...",1959,Kapan Komputer mikro mulai dikembangkan ? 1959,[NULL],-,Answer saved from variation 3 with founded index
1,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan komputer mikro mulai ditinggalkan?,Ada beberapa inovasi dari GAN,Kapan komputer mikro mulai ditinggalkan? Ada b...,"{'label': 'entailment', 'score': 0.62238711118...","[Ada, Ada beberapa inovasi dari GAN, Ada beber...","[Kapan komputer mikro mulai ditinggalkan? Ada,...","[{'label': 'neutral', 'score': 0.8639260530471...",,Kapan komputer mikro mulai ditinggalkan?,[NULL],-,Answer saved from variation 3 with founded index
2,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer mikro mulai dikembangkan Amerik...,Ada beberapa inovasi dari GAN,Kapan Komputer mikro mulai dikembangkan Amerik...,"{'label': 'entailment', 'score': 0.88881957530...","[Ada, Ada beberapa inovasi dari GAN, Ada beber...",[Kapan Komputer mikro mulai dikembangkan Ameri...,"[{'label': 'neutral', 'score': 0.6503812074661...",,Kapan Komputer mikro mulai dikembangkan Amerik...,[NULL],-,Answer saved from variation 3 with founded index
3,Ada beberapa inovasi dari GAN yang sedikit nga...,Kapan Komputer makro mulai dikembangkan?,Ada beberapa inovasi dari GAN,Kapan Komputer makro mulai dikembangkan? Ada b...,"{'label': 'entailment', 'score': 0.58991396427...","[Ada, Ada beberapa inovasi dari GAN, 1959]","[Kapan Komputer makro mulai dikembangkan? Ada,...","[{'label': 'neutral', 'score': 0.6094996333122...",,Kapan Komputer makro mulai dikembangkan?,[NULL],-,Answer saved from variation 3 with founded index
4,"Patronim, atau patronimik, adalah sebuah kompo...",Apakah pengertian matronimik?,Komomene sebuah nama yang berdasarkan pada nam...,Apakah pengertian matronimik? Komomene sebuah ...,"{'label': 'entailment', 'score': 0.92667686939...",[Komomene sebuah nama yang berdasarkan pada na...,[Apakah pengertian matronimik? Komomene sebuah...,"[{'label': 'entailment', 'score': 0.9266768693...",Komomene sebuah nama yang berdasarkan pada nam...,Apakah pengertian matronimik? Komomene sebuah ...,[NULL],-,Answer saved from variation 3 with founded index
...,...,...,...,...,...,...,...,...,...,...,...,...,...
839,Umur dewasa dari drummer ganda adalah sekitar ...,Bagaimana cara jangkrik betina hidup?,"Selama ini, mereka bereproduksi, dan hanya mem...",Bagaimana cara jangkrik betina hidup? Selama i...,"{'label': 'entailment', 'score': 0.78881722688...","[bereproduksi, dan hanya memakan getah pohon h...",[Bagaimana cara jangkrik betina hidup? berepro...,"[{'label': 'entailment', 'score': 0.4965691864...",,Bagaimana cara jangkrik betina hidup?,[NULL],-,Answer saved from variation 3 with founded index
840,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa lu...",131.3 mil persegi,"Menurut Biro Sensus Amerika Serikat, berapa lu...","{'label': 'entailment', 'score': 0.97001183032...","[131.3 mil persegi, 131.3 mil persegi (340.0 k...","[Menurut Biro Sensus Amerika Serikat, berapa l...","[{'label': 'entailment', 'score': 0.9700118303...","Menurut Biro Sensus Amerika Serikat, kota ini ...","Menurut Biro Sensus Amerika Serikat, berapa lu...",[PLACE],-,Answer saved from variation 3 with founded index
841,Kota ini terletak di sebuah lembah sungai yang...,"Menurut Biro Sensus Amerika Serikat, berapa vo...",Kota,"Menurut Biro Sensus Amerika Serikat, berapa vo...","{'label': 'entailment', 'score': 0.53168535232...","[Kota, Kota ini terletak di sebuah lembah sung...","[Menurut Biro Sensus Amerika Serikat, berapa v...","[{'label': 'entailment', 'score': 0.5316853523...",,"Menurut Biro Sensus Amerika Serikat, berapa vo...",[NULL],-,Answer saved from variation 3 with founded index
842,"Tanpa beasiswa, Ogilvy tidak bisa kuliah di Fe...",Apa alasan Ogilvy tidak bisa kuliah di Fettes ...,bisnis ayahnya terkena dampak depresi pertenga...,Apa alasan Ogilvy tidak bisa kuliah di Fettes ...,"{'label': 'entailment', 'score': 0.90757638216...",[bisnis ayahnya terkena dampak depresi perteng...,[Apa alasan Ogilvy tidak bisa kuliah di Fettes...,"[{'label': 'entailment', 'score': 0.9075763821...",Ogilvy tidak bisa kuliah di Fettes atau Oxford...,Apa alasan Ogilvy tidak bisa kuliah di Fettes ...,"[PLACE, PERSON]",-,Answer saved from variation 3 with founded index


In [17]:
def normalize_text(s):
    def remove_articles(text):
        regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
        return re.sub(regex, " ", text)
    def white_space_fix(text):
        return " ".join(text.split())
    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)
    def lower(text):
        return text.lower()
    return white_space_fix(remove_articles(remove_punc(lower(s))))

def compute_f1(pred, gold):
    pred_tokens = normalize_text(pred).split() # True positive + False positive = Untuk precision
    gold_tokens = normalize_text(gold).split() # True positive + False negatives = Untuk recall
    common = collections.Counter(pred_tokens) & collections.Counter(gold_tokens)
    num_same = sum(common.values()) # True positive

    if len(gold_tokens) == 0 or len(pred_tokens) == 0: 
        return int(gold_tokens == pred_tokens)

    if num_same == 0:
        return 0

    precision = 1.0 * num_same / len(pred_tokens)
    recall = 1.0 * num_same / len(gold_tokens)
    f1 = (2.0 * precision * recall) / (precision + recall)

    return f1

def compute_metrics_from_df(df):
    
    denominator = len(df)
    total_correct = 0
    f1_array = []

    for i in range(len(df)):

        pred_answer = df["Prediction Answer"][i]
        gold_answer = df["Gold Answer"][i]

        if pred_answer == gold_answer:
            total_correct += 1

        f1 = compute_f1(pred=pred_answer, gold=gold_answer)

        f1_array.append(f1)
        
    exact_match = ((total_correct / denominator) * 100.0)
    final_f1 = np.mean(f1_array) * 100.0

    return {'exact_match': exact_match, 'f1': final_f1}

In [21]:
metric_result = compute_metrics_from_df(y)

In [None]:
TIME_NOW = str(datetime.now()).replace(":", "-").replace(" ", "_").replace(".", "_")
    
if (re.findall(r'.*/(.*)$', MODEL_NAME) == []): 
    NAME = f'QAS-{DATA_NAME}-with-{str(MODEL_NAME)}'
else:
    new_name = re.findall(r'.*/(.*)$', MODEL_NAME)[0]
    NAME = f'QAS-{DATA_NAME}-with-{str(new_name)}'

if MODEL_SC_NAME == None:
    NAME = f'{NAME}'
else:
    NAME = f'{NAME}-ITTL'

if FREEZE == True:
    NAME = f'{NAME}-freeze'
else:
    NAME = f'{NAME}'

QA = f'./results/{NAME}-{TIME_NOW}'
CHECKPOINT_DIR = f'{QA}/checkpoint/'
MODEL_DIR = f'{QA}/model/'
OUTPUT_DIR = f'{QA}/output/'
METRIC_RESULT_DIR = f'{QA}/metric-result/'
REPO_NAME = f'fine-tuned-{NAME}'[:96]

In [24]:
os.makedirs(os.path.dirname(METRIC_RESULT_DIR), exist_ok=True)
with open(f'{METRIC_RESULT_DIR}/metric_result.txt', "w") as f:
    f.write(str(metric_result))
    f.close()

{'exact_match': 9.715639810426541, 'f1': 33.64000126978179}


In [None]:
qas_df.to_csv(f'{OUTPUT_DIR}/output_df.csv')