### Evaluation DeBERTa_4_90 on HEALTHVER_test dataset

In [None]:
import numpy as np
import pandas as pd
import time

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score, precision_score, f1_score

import datasets
from datasets import Dataset
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import EarlyStoppingCallback, IntervalStrategy

In [1]:
# Download HEALTHVER test dataset in the folder containing this script

with open('./healthver_test.csv', 'rb') as fh:
    df_testiranje = pd.read_csv(fh)
df_testiranje

FileNotFoundError: [Errno 2] No such file or directory: './healthver_test.csv'

In [32]:
df_testiranje['label'] = df_testiranje['label'].apply(lambda x: 0 if x=='Neutral' else 1 if x=='Supports' else 2)
df_testiranje

Unnamed: 0,id,evidence,claim,label,topic_ip,question
0,12813,"In this study, we collected blood from COVID-1...","For most patients, COVID-19 begins and ends in...",0,3,will SARS-CoV2 infected people develop immunity?
1,11044,"However, wearing N95 respirators can prevent ...",N95 masks are better than clothe masks,1,18,what are the best masks for preventing infecti...
2,1590,Lack of personal protective equipment was cite...,Unexpected Cause of Death in Younger COVID-19 ...,2,4,how do people die from the coronavirus?
3,7720,This in vitro study demonstrated that irradiat...,Ultraviolet lamps kill the COVID-19 virus.,1,73,Does UV light help in preventing covid-19?
4,10528,The most common coronaviruses may well survive...,the virus can stay on surfaces long enough to ...,1,16,touching a contaminated surface will not make ...
...,...,...,...,...,...,...
1818,13566,Thus both qualitative and quantitative feature...,"The immune system, noticing the infection, fla...",0,3,will SARS-CoV2 infected people develop immunity?
1819,12749,"In this study, we collected blood from COVID-1...","Vitamins C and D boost our immune systems, aid...",0,3,will SARS-CoV2 infected people develop immunity?
1820,12586,Distinguishing pre-existing and de novo antibo...,a popular treatment to tamp down the immune sy...,0,3,will SARS-CoV2 infected people develop immunity?
1821,7773,Data indicate CVIR demonstrates significant tr...,"Covid-19 is infecting quite a few people, many...",0,75,Can smoking cannabis (weed) help in preventing...


In [33]:
df_testiranje['text'] = tokenizer.cls_token + df_testiranje['claim'] + tokenizer.sep_token + df_testiranje['evidence'] + tokenizer.sep_token
df_testiranje.drop(columns=['claim', 'evidence', 'topic_ip', 'question'], inplace=True)
df_testiranje

Unnamed: 0,id,label,text
0,12813,0,"[CLS]For most patients, COVID-19 begins and en..."
1,11044,1,[CLS]N95 masks are better than clothe masks[SE...
2,1590,2,[CLS]Unexpected Cause of Death in Younger COVI...
3,7720,1,[CLS]Ultraviolet lamps kill the COVID-19 virus...
4,10528,1,[CLS]the virus can stay on surfaces long enoug...
...,...,...,...
1818,13566,0,"[CLS]The immune system, noticing the infection..."
1819,12749,0,[CLS]Vitamins C and D boost our immune systems...
1820,12586,0,[CLS]a popular treatment to tamp down the immu...
1821,7773,0,"[CLS]Covid-19 is infecting quite a few people,..."


In [34]:
Dataset_test = Dataset.from_pandas(df_testiranje)

In [35]:
test_dataset_dict = datasets.DatasetDict({
    'test': Dataset_test
})

In [36]:
test_tokenized = test_dataset_dict.map(preprocess_function, batched=True)

  0%|          | 0/2 [00:00<?, ?ba/s]

In [37]:
test_tokenized

DatasetDict({
    test: Dataset({
        features: ['id', 'label', 'text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1823
    })
})

In [None]:
newtrainer = AutoModelForSequenceClassification.from_pretrained('./DeBERTa_early4_90_new', num_labels=3)
trainer = Trainer(newtrainer)

In [38]:
predictions = trainer.predict(test_tokenized['test'])

In [39]:
new_classifications = []
for pred in predictions[0]:
    new_classifications.append(np.argmax(pred))

In [40]:
target_names = ['NO_EVIDENCE', 'SUPPORT', 'CONTRADICT']
print(classification_report(test_tokenized["test"]['label'], new_classifications, target_names=target_names))

              precision    recall  f1-score   support

 NO_EVIDENCE       0.47      0.88      0.61       727
     SUPPORT       0.67      0.29      0.40       671
  CONTRADICT       0.69      0.27      0.39       425

    accuracy                           0.52      1823
   macro avg       0.61      0.48      0.47      1823
weighted avg       0.59      0.52      0.48      1823



In [41]:
confusion_matrix(test_tokenized["test"]['label'], new_classifications)

array([[643,  64,  20],
       [445, 195,  31],
       [278,  33, 114]])