#### Importing packages

In [None]:
import argparse
import json
import jsonlines

import numpy as np
import pandas as pd
import pickle
# import pickle5 as pickle
import random
import torch

from collections import Counter
from datetime import datetime

from nltk.corpus import stopwords
from nltk import download
from nltk.tokenize import sent_tokenize

from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler
from sentence_transformers import SentenceTransformer

from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from tqdm import tqdm

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel, AutoModelForSequenceClassification
from transformers import AlbertTokenizer, AlbertForSequenceClassification, RobertaConfig
from transformers import BertForSequenceClassification, Trainer, TrainingArguments, AdamW, BertTokenizer, BertPreTrainedModel, BertModel,  RobertaForSequenceClassification, RobertaTokenizer
from transformers import TapasForSequenceClassification, TapasTokenizer, TapasConfig 
from transformers.modeling_outputs import SequenceClassifierOutput


## Overview

1. <strong>Set dataset files in dictionary 'dataset_dict' </strong>


2. <strong>Evaluate hyperparamter selection using function main_evaluation. </strong>


3. <strong>Predict using function main_prediction. </strong> 

Set the following variables first:


In [None]:
# set paths to train, eval and test set files for each table FC dataset below

# path to other datasets: 
# Infotabs: https://github.com/infotabs/infotabs
# TabFact: https://github.com/wenhuchen/Table-Fact-Checking
# FEVEROUS: https://github.com/Raldir/FEVEROUS

dataset_dict = {
    "pubhealthtab_train": "",
    "pubhealthtab_eval": "",
    "pubhealthtab_test": "",
    "feverous_train": "",
    "feverous_eval": "",
    "feverous_test": "",
    "infotabs_train": "",
    "infotabs_eval": "",
    "infotabs_test": "",
    "tabfact_train": "",
    "tabfact_eval": "",
    "tabfact_test": ""
}


-------------

In [None]:
class MYDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)
    

In [None]:
# Dataset 

class T5Dataset(torch.utils.data.Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
    def __getitem__(self, idx):
        item = self.data[idx]
        
        try:
            table = create_table_df(item["table"]).astype(str) 
        except Exception as e: 
            print(f"Error for index {idx}: {e}")
        
        encoding = self.tokenizer(table=table,
                                  queries=[item["claim"]],
                                  padding="max_length",
                                  truncation=True,
                                  return_tensors="pt"
        )
        # remove the batch dimension which the tokenizer adds by default
        encoding = {key: val.squeeze(0) for key, val in encoding.items()}
        if item["label"] == "SUPPORTS": 
            label = torch.tensor([1]) 
        elif item["label"] == "REFUTES":
            label = torch.tensor([2]) 
        else:
            label = torch.tensor([0]) 
            
        encoding['labels'] = label
        return encoding
    
    def __len__(self):
        return len(self.data)


In [None]:
# Dataset 

class TableDataset(torch.utils.data.Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
    def __getitem__(self, idx):
        item = self.data[idx]
        
        try:
            table = create_table_df(item["table"]).astype(str) 
        except Exception as e: 
            print(f"Error for index {idx}: {e}")
        
        encoding = self.tokenizer(table=table,
                                  queries=[item["claim"]],
                                  padding="max_length",
                                  truncation=True,
                                  return_tensors="pt"
        )
        # remove the batch dimension which the tokenizer adds by default
        encoding = {key: val.squeeze(0) for key, val in encoding.items()}
        # add the float_answer which is also required (weak supervision for aggregation case)
        if item["label"] == "SUPPORTS": 
            label = torch.tensor([1]) # 1 means entailed, 0 means refuted
        elif item["label"] == "REFUTES":
            label = torch.tensor([2]) # 1 means entailed, 0 means refuted
        else:
            label = torch.tensor([0]) # 1 means entailed, 0 means refuted
            
        encoding['labels'] = label
        return encoding
    
    def __len__(self):
        return len(self.data)


In [None]:
model_dict = {
    "bert": "bert-base-uncased",
    "roberta": 'ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli',
#     "roberta": 'roberta-base',
    "albert": "albert-base-v2",
    "albert_pretrained_nli": "ynie/albert-xxlarge-v2-snli_mnli_fever_anli_R1_R2_R3-nli",
    "biobert": "gsarti/biobert-nli",
    "bluebert": "adamlin/NCBI_BERT_pubmed_mimic_uncased_large_transformers",
    "clinicalbert": "emilyalsentzer/Bio_ClinicalBERT",
    "tapas": "google/tapas-base-finetuned-tabfact"
}

model_hyperparameter_dict = {
    "bert": {
        "training_epochs": 5, 
        "batch_size": 4, 
        "learning_rate": 1e-5,
        "weight_decay": 0.001
    },
    "roberta": { 
        "training_epochs": 4, 
        "batch_size": 8, 
        "learning_rate": 1e-5,
        "weight_decay": 0.01
    },
    "albert": {
        "training_epochs": 5, 
        "batch_size": 16, 
        "learning_rate": 1e-5,
        "weight_decay": 0.001
    },
    "albert_pretrained_nli": {
        "training_epochs": 5, 
        "batch_size": 8, 
        "learning_rate": 1e-5,
        "weight_decay": 0.001
    },
    "biobert": {
        "training_epochs": 5, 
        "batch_size": 4, 
        "learning_rate": 1e-5,
        "weight_decay": 0.001    
    },
    "bluebert": {
        "training_epochs": 5, 
        "batch_size": 8, 
        "learning_rate": 1e-5,
        "weight_decay": 0.001
    },
    "clinicalbert": {
        "training_epochs": 4, 
        "batch_size": 4, 
        "learning_rate": 1e-5,
        "weight_decay": 0.01
    }
}

def load_dataset(dataset_name: str): 
    trainset_path = dataset_dict[dataset_name+"_train"]
    testset_path = dataset_dict[dataset_name+"_test"]
    evalset_path = dataset_dict[dataset_name+"_eval"]
    
    trainset = []
    testset = []
    evalset = []

    if dataset_name == "pubhealthtab":
        with jsonlines.open(trainset_path) as reader:
            for line in reader: 
                trainset.append(line)

        with jsonlines.open(testset_path) as reader:
            for line in reader: 
                testset.append(line)

        with jsonlines.open(evalset_path) as reader:
            for line in reader: 
                evalset.append(line)

    elif dataset_name == "feverous": 
        with open(trainset_path, "rb") as file:
            trainset = pickle.load(file)

        with open(testset_path, "rb") as file:
            testset = pickle.load(file)

        with open(evalset_path, "rb") as file:
            evalset = pickle.load(file)
    
    else: 
        with open(trainset_path) as file:
            trainset = json.load(file)

        with open(testset_path) as file:
            testset = json.load(file)

        with open(evalset_path) as file:
            evalset = json.load(file)

    print(f"Dataset split into {len(trainset)} training samples, {len(evalset)} eval samples and {len(testset)} test samples.")
    return trainset, evalset, testset
    

In [None]:
def get_table_caption(table: dict) -> str:
    """
    Extracts caption text from table if one exists

    Parameters:
    table (dict): one table consisting of caption, header and rows   
    """

    return (table["caption"] if table["caption"] else "") 


def get_table_header(table: dict) -> str:
    """
    Extracts header text from table if one exists

    Parameters:
    table (dict): one table consisting of caption, header and rows   
    """
    header = (" ".join(table["header_horizontal"]) if table["header_horizontal"] and any(table["header_horizontal"]) else "")
    header = header + " " + (" ".join(table["header_vertical"]) if table["header_vertical"] else "")

    return header


In [None]:
def get_table_text(claim: str, table: dict, method: str, shuffle: bool, model, tokenizer) -> str:
    """
    Extracts text from table returns table in the desired representation 

    Parameters:
    table (dict): one table consisting of caption, header and rows   

    Returns: 
    str: text representation of converted table 

    """
    random.seed(4)
    
    text = ""
    caption = get_table_caption(table) 
    header = get_table_header(table)
    rows = table["rows"].copy()
    shuffle_type = "row"
    
    if shuffle: 
        if shuffle_type == "row": 
            random.shuffle(rows)
        else: 
            new_rows = []
            for row in rows: 
                random.shuffle(row)
                new_rows.append(row)
            rows = new_rows.copy()
    
    if method == "concatenation":
        content = ""
        for row in rows:
            row_content = " ".join([str(entry) for entry in row])
            content = content + row_content + " "

        text = " ".join([caption, header, content.strip()])     
        
    elif method == "template":
        content = ""
        for i_row, row in enumerate(rows):               
            sent = f"In row {str(i_row)} "
            for i_col, col in enumerate(row):
                if table["header_horizontal"] and len(table["header_horizontal"])==len(row):
                    sent += f"column {str(i_col)} ({table['header_horizontal'][i_col]}) is {col}, "
                else:
                    sent += f"column {str(i_col)} is {col}, "
            sent += ". "
            content += sent
        text = " ".join([caption, header, content])
    
    elif method == "t5_concat":
        content = ""
        for row in rows:
            row_content = " ".join([entry for entry in row])
            row_content_tokenized = tokenizer(row_content, truncation = True, max_length=len(row_content)+5, return_tensors="pt").input_ids
            generated_ids  = model.generate(row_content_tokenized, max_length=len(row_content)+5)
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]

            row_content_t5 = preds[0]
            content = content + row_content_t5 + " "

        text = " ".join([caption, header, content.strip()])

    elif method == "t5_temp":
        content = ""
        for i_row, row in enumerate(rows):               
            sent = f"In row {str(i_row)} "
            for i_col, col in enumerate(row):
                if table["header_horizontal"] and len(table["header_horizontal"])==len(row):
                    sent += f"column {str(i_col)} ({table['header_horizontal'][i_col]}) is {col}, "
                else:
                    sent += f"column {str(i_col)} is {col}, "
            sent += ". "
            sent_tokenized = tokenizer(sent, truncation = True, max_length = 512, return_tensors="pt").input_ids
            generated_ids  = model.generate(sent_tokenized)
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]

            sent_t5 = preds[0]
            content += sent_t5
        text = " ".join([caption, header, content])

    elif method == "key_val":
        content = ""
        for i_row, row in enumerate(rows):               
            sent = f"row_{str(i_row)}: "
            for i_col, col in enumerate(row):
                if table["header_horizontal"] and len(table["header_horizontal"])==len(row):
                    sent += f"{table['header_horizontal'][i_col]}:{col}, "
                else:
                    sent += f"column_{str(i_col)}:{col}, "
            sent += ". "
            content += sent
        text = " ".join([caption, header, content])
        
    return text


In [None]:
def process_data(claim_verdict_list):
    map_verdict_to_index = {'NOT ENOUGH INFO': 0, 'SUPPORTS': 1, 'REFUTES': 2}
    text = [x[0] for x in claim_verdict_list] 
    labels = [map_verdict_to_index[x[1]] for x in claim_verdict_list] #get value from enum

    return text, labels


In [None]:
def create_table_df(table_dict): 
    data = table_dict["rows"].copy()
    column_names = table_dict["header_horizontal"].copy()
    df_width = max(len(max(data,key=len)), len(column_names))

    while len(column_names) < df_width: 
        column_names.insert(0,"")
        
    for row in data: 
        while len(row) < df_width: 
            row.insert(0,"")

    df = pd.DataFrame(data, columns=column_names)
    return df
    

In [None]:
def compute_metrics(pred):    
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='micro')
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    class_rep = classification_report(labels, preds, target_names= ['NOT ENOUGH INFO', 'SUPPORTS', 'REFUTES'], output_dict=True)
    
    conf_matrix = confusion_matrix(labels, preds)
    pd.DataFrame(conf_matrix).to_csv("confusion_matrix.csv")
    
    result_matrix = pd.DataFrame(columns = ["claim", "table", "label", "prediction"])
    for i, entry in enumerate(testset): 
        if entry["label"] == "SUPPORTS": 
                label = 1 # 1 means entailed, 0 means refuted
        elif entry["label"] == "REFUTES":
            label = 2 # 1 means entailed, 0 means refuted
        else:
            label = 0 # 1 means entailed, 0 means refuted

        result_matrix = result_matrix.append({"claim": entry["claim"], "table": entry["table"], 
                                              "label": label, "prediction": preds[i]}, ignore_index=True)

    pd.DataFrame(result_matrix).to_csv("result_matrix.csv")
    
    return {
        'accuracy': acc,
        'f1_micro': f1,
        'precision_micro': precision,
        'recall_micro': recall,
        'f1_macro': f1_macro,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'class_rep': class_rep
    }

def model_init():
    return RobertaForSequenceClassification.from_pretrained(model_dict["roberta"], num_labels=3, return_dict=True)



In [None]:
def model_trainer(model, train_dataset, training_epochs, batch_size, learning_rate, weight_decay):
    
    training_args = TrainingArguments(
    output_dir='./results',                  
    per_device_train_batch_size=batch_size,  
    weight_decay=weight_decay,               
    num_train_epochs=training_epochs,        
    save_strategy="no", 
    learning_rate = learning_rate,           
    per_device_eval_batch_size=batch_size,   
    disable_tqdm=True
    )
    
    evaluation_args = TrainingArguments(
        "test", evaluation_strategy="steps", eval_steps=10000)

    trainer = Trainer(
    model=model,                         
    args=training_args,                  
    train_dataset=train_dataset,         
    compute_metrics = compute_metrics
    )
    
    return trainer


In [None]:
def prepare_input(annotation, t5_tokenizer, t5_model, representation: str, shuffle: str, model: str):
    if shuffle == "claim": 
        claim = annotation["claim"].split(" ")
        random.shuffle(claim)
        claim_shuffled = " ".join(claim)
        sequence = [claim_shuffled]    
        sequence.append(get_table_text(annotation["claim"], annotation["table"], method = representation, shuffle=False,
                                       model=t5_model, tokenizer=t5_tokenizer))

    elif shuffle == "table": 
        sequence = [annotation["claim"]]    
        sequence.append(get_table_text(annotation["claim"], annotation["table"], method = representation, shuffle=True,
                                       model=t5_model, tokenizer=t5_tokenizer))
    else:
        sequence = [annotation["claim"]]    
        sequence.append(get_table_text(annotation["claim"], annotation["table"], method = representation, shuffle=False,
                                       model=t5_model, tokenizer=t5_tokenizer))
        
    if model == "roberta":
        return ' </s> '.join(sequence)
    
    else: 
        return ' [SEP] '.join(sequence)
        


In [None]:
def main_prediction(t5_model, t5_tokenizer, dataset: str, representation: str, model: str, 
                    df: pd.DataFrame, shuffle = ""): 

    # load dataset 
    trainset, evalset, testset = load_dataset(dataset) 
    
    if model: 
        models_list = [model]
    else: # execute predictions for multiple models 
        models_list = list(model_dict.keys())

    for model_name in models_list: 
        model_path = model_dict[model_name]

        # load model and tokenizer 
        if model_name == "roberta": 
            tokenizer = RobertaTokenizer.from_pretrained(model_path)
            model = RobertaForSequenceClassification.from_pretrained(model_path, num_labels=3, return_dict=True)
            
        elif model_name == "albert" or model_name == "albert_pretrained_nli":
            tokenizer = AlbertTokenizer.from_pretrained(model_path)
            model = AlbertForSequenceClassification.from_pretrained(model_path, num_labels=3, return_dict=True)
        elif model_name == "tapas":
            tokenizer = TapasTokenizer.from_pretrained(model_path)
            config = TapasConfig.from_pretrained(model_path)
            config.num_labels = 3
            model = TapasForSequenceClassification(config)
        else: 
            tokenizer = BertTokenizer.from_pretrained(model_path)
            model = BertForSequenceClassification.from_pretrained(model_path, num_labels=3, return_dict=True)
            
        # preparing data for training
        # convert tables into desired representation 
        if model_name != "tapas":
            print("start preparing trainset.")
            input_train = [(prepare_input(entry, t5_tokenizer, t5_model, representation, shuffle, model), entry["label"], model) for entry in trainset]
            
            print("start preparing testset.")
            input_test = [(prepare_input(entry, t5_tokenizer, t5_model, representation, shuffle, model), entry["label"], model) for entry in testset]

            text_train, labels_train = process_data(input_train)
            text_test, labels_test = process_data(input_test)
            
            text_train_tok = tokenizer(text_train, padding=True, truncation=True, max_length=512)
            train_dataset = MYDataset(text_train_tok, labels_train)

            text_test_tok = tokenizer(text_test, padding=True, truncation=True, max_length=512)
            test_dataset = MYDataset(text_test_tok, labels_test)
            
        else:
            train_dataset = TableDataset(trainset, tokenizer)
            test_dataset = TableDataset(testset, tokenizer)

        e = model_hyperparameter_dict[model_name]["training_epochs"]
        b = model_hyperparameter_dict[model_name]["batch_size"]
        l = model_hyperparameter_dict[model_name]["learning_rate"]
        w = model_hyperparameter_dict[model_name]["weight_decay"]
        print(f"Running training with following parameters: model={model_name}, representation={representation}, epoch={e}, batch size={b}, learning rate={l}, weight decay={w}.")

        # train model 
        trainer = model_trainer(model, train_dataset, training_epochs=e, batch_size=b, 
                                learning_rate=l, weight_decay=w)
        trainer.train() # training 

        # test
        predictions = trainer.predict(test_dataset) # testing
    
        print(f"Prediction is: {predictions.metrics['test_f1_macro']}.")

        df.loc[len(df)] = [dataset, model_name, e, b, l, w, 
                           predictions.metrics['test_f1_micro'], 
                           predictions.metrics['test_f1_macro'], 
                           predictions.metrics['test_class_rep']['SUPPORTS']['f1-score'], 
                           predictions.metrics['test_class_rep']['REFUTES']['f1-score'], 
                           predictions.metrics['test_class_rep']['NOT ENOUGH INFO']['f1-score'], 
                           predictions.metrics['test_loss'],
                           predictions.metrics['test_accuracy'], predictions.metrics]

        print(f"Saving results into file: fc_PREDICTION_{dataset}_{representation}_{model_name}.csv")
        df.to_csv(f'./fc_PREDICTION_{dataset}_{representation}_{model_name}.csv')

    return df 


In [None]:
def main_evaluation(t5_model, t5_tokenizer, dataset: str, representation: str, model: str, 
                    df: pd.DataFrame, shuffle = ""): 

    # load dataset 
    trainset, evalset, testset = load_dataset(dataset) 
    model_name = model

    # convert tables into desired representation 
    print("Preparing data with desired representation.")
    
    if model_name != "tapas":
        input_train = [(prepare_input(entry, representation, shuffle, t5_tokenizer, t5_model, model), entry["label"], model) for entry in trainset]
        input_eval = [(prepare_input(entry, representation, shuffle, t5_tokenizer, t5_model, model), entry["label"], model) for entry in evalset]
        input_test = [(prepare_input(entry, representation, shuffle, t5_tokenizer, t5_model, model), entry["label"], model) for entry in testset]

        text_train, labels_train = process_data(input_train)
        text_eval, labels_eval = process_data(input_eval)
        text_test, labels_test = process_data(input_test)

    if model: 
        models_list = [model]
    else: 
        models_list = list(model_dict.keys())
        
    batch_sizes = [4, 8, 16, 32]
    training_epochs = [10, 15, 20, 30]
    learning_rates = [1e-3, 1e-5, 1e-7]
    weight_decays = [0.01, 0.001, 0.0001]

    for model_name in models_list: 
        model_path = model_dict[model_name]

        if model_name == "roberta": 
            tokenizer = RobertaTokenizer.from_pretrained(model_path)
        elif model_name == "albert" or model_name == "albert_pretrained_nli":
            tokenizer = AlbertTokenizer.from_pretrained(model_path)
        elif model_name == "tapas":
            tokenizer = TapasTokenizer.from_pretrained(model_path, num_labels=3)
        else: 
            tokenizer = BertTokenizer.from_pretrained(model_path)
        print("Preparing data for training.")
        
        if model_name == "tapas":
            train_dataset = TableDataset(trainset, tokenizer)
            eval_dataset = TableDataset(evalset, tokenizer)
            test_dataset = TableDataset(testset, tokenizer)
        else:
            text_train_tok = tokenizer(text_train, padding=True, truncation=True, max_length=512)
            train_dataset = MYDataset(text_train_tok, labels_train)

            text_eval_tok = tokenizer(text_eval, padding=True, truncation=True, max_length=512)
            eval_dataset = MYDataset(text_eval_tok, labels_eval)

            text_test_tok = tokenizer(text_test, padding=True, truncation=True, max_length=512)
            test_dataset = MYDataset(text_test_tok, labels_test)

        for e in training_epochs:
            for b in batch_sizes: 
                for l in learning_rates: 
                    for w in weight_decays:
                        print(f"Running training with following parameters: representation={representation}, epoch={e}, batch size={b}, learning rate={l}, weight decay={w}.")
                        
                        if model_name == "roberta": 
                            print("Loading RoBERTa model.")
                            model = RobertaForSequenceClassification.from_pretrained(model_path, num_labels=3, return_dict=True)
                        elif model_name == "albert" or model_name == "albert_pretrained_nli":
                            model = AlbertForSequenceClassification.from_pretrained(model_path, num_labels=3, return_dict=True)
                        elif model_name == "tapas":
                            config = TapasConfig.from_pretrained(model_path)
                            config.num_labels = 3
                            model = TapasForSequenceClassification(config)
                        else: 
                            model = BertForSequenceClassification.from_pretrained(model_path, num_labels=3, return_dict=True)
                        
                        # train model 
                        trainer = model_trainer(model, train_dataset, training_epochs=e, batch_size=b, 
                                                learning_rate=l, weight_decay=w)
                        trainer.train() # training 
                        
                        # validate 
                        predictions = trainer.predict(eval_dataset) # testing

                        # test
#                         predictions = trainer.predict(test_dataset) # testing
                        
                        df.loc[len(df)] = [dataset, model_name, e, b, l, w, 
                                           predictions.metrics['test_f1_micro'], 
                                           predictions.metrics['test_f1_macro'], 
                                           predictions.metrics['test_class_rep']['SUPPORTS']['f1-score'], 
                                           predictions.metrics['test_class_rep']['REFUTES']['f1-score'], 
                                           predictions.metrics['test_class_rep']['NOT ENOUGH INFO']['f1-score'], 
                                           predictions.metrics['test_loss'],
                                           predictions.metrics['test_accuracy'], predictions.metrics]
                        
                        print(f"Saving results into file: fc_experiment_{dataset}_{representation}_{model_name}.csv")
                        df.to_csv(f'./fc_experiment_{dataset}_{representation}_{model_name}.csv')
                        
        return df 
    

In [None]:
test_run = True

t5_tokenizer = AutoTokenizer.from_pretrained("Narrativa/t5-base-finetuned-totto-table-to-text")
t5_model = AutoModelForSeq2SeqLM.from_pretrained("Narrativa/t5-base-finetuned-totto-table-to-text")
# load global variables 
print("Loading T5 model.")


### Evaluation

In [None]:
# EVALUATION FOR HYPERPARAMETER TUNING 

if __name__ == "__main__": 
    results_df = pd.DataFrame(columns=["dataset", "model", "epochs", "batch_size", "learning_rate", "weight_decay", 
                                       "f1_micro", "f1_macro", "f1_SUPPORTS", "f1_REFUTES", "f1_NEI", 
                                       "loss", "accuracy", "metrics"])
    data = "pubhealthtab" # set dataset name, e.g. infotabs, pubhealthtab, ... 
    rep = "concatenation" # set representation type
    mod = "roberta" # set model name
    
    results_df_updated = main_evaluation(t5_model, t5_tokenizer, dataset=data, representation=rep, 
                                         model=mod, df=results_df)


### Prediction

In [None]:
# PREDICTION 

if __name__ == "__main__": 
    results_df = pd.DataFrame(columns=["dataset", "model", "epochs", "batch_size", "learning_rate", "weight_decay", 
                                       "f1_micro", "f1_macro", "f1_SUPPORTS", "f1_REFUTES", "f1_NEI",
                                       "loss", "accuracy", "metrics"])
    # predictions for all models
    data = "pubhealthtab" # set dataset name, e.g. infotabs, pubhealthtab, ... 
    rep = "concatenation" # set representation type
    mod = "roberta" # set model name
    
    results_df_updated = main_prediction(t5_model, t5_tokenizer, dataset=data, representation=rep, 
                                         model=mod, df=results_df)
    