In [15]:
#from shutil import copyfile

# copy file into the working directory (make sure it has .py suffix)
#copyfile(src = "../input/proxy-training-s2s/Proxy_training_S2S.py", dst = "../working/Proxy_training_S2S.py")

# import all functions
#from Proxy_training_S2S import ProxyTraining, DataLoad
#!pip install openpyxl
#!pip install torch

In [16]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import os
from transformers import get_scheduler
from tqdm.auto import tqdm
# Importing the T5 modules from huggingface/transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import ProphetNetTokenizer, ProphetNetForConditionalGeneration
from datasets import load_metric
from sklearn.model_selection import KFold
import copy
import time 

# rich: for a better display on terminal
from rich.table import Column, Table
from rich import box
from rich.console import Console

In [17]:
class DataLoad:

    """

    Class to read in the BABE Dataset for the ProxyTask

    Uses the final labels SG1 for mbic and final labels SG2 for babe and combines them

    """

    @staticmethod

    def read_babe():


        df = pd.read_excel("../input/babe-media-bias-annotations-by-experts/data/final_labels_SG2.xlsx")

        lst = []

        for index, row in df.iterrows():

            if row['label_bias'] == "No agreement":

                pass

            else:

                sub_dict = {'text': 'biased or non-biased:' + row['text']}

                if row['label_bias'] == "Biased":

                    sub_dict['label'] = "Biased"

                elif row['label_bias'] == "Non-biased":

                    sub_dict['label'] = "Non-biased"

                lst.append(sub_dict)

        return pd.DataFrame(lst)

In [18]:
# Setting up the device for GPU usage
from torch import cuda
device = 'cuda:1' if cuda.is_available() else 'cpu'

In [19]:
class YourDataSetClass(Dataset):
    """
    Creating a custom dataset for reading the dataset and
    loading it into the dataloader to pass it to the
    neural network for finetuning the model

    """

    def __init__(
        self, dataframe, tokenizer, source_len, target_len, source_text, target_text
    ):
        """
        Initializes a Dataset class

        Args:
            dataframe (pandas.DataFrame): Input dataframe
            tokenizer (transformers.tokenizer): Transformers tokenizer
            source_len (int): Max length of source text
            target_len (int): Max length of target text
            source_text (str): column name of source text
            target_text (str): column name of target text
        """
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = target_len
        self.target_text = self.data[target_text]
        self.source_text = self.data[source_text]

    def __len__(self):
        """returns the length of dataframe"""

        return len(self.target_text)

    def __getitem__(self, index):
        """return the input ids, attention masks and target ids"""

        source_text = str(self.source_text[index])
        target_text = str(self.target_text[index])

        # cleaning data so as to ensure data is in string type
        source_text = " ".join(source_text.split())
        target_text = " ".join(target_text.split())

        source = self.tokenizer.batch_encode_plus(
            [source_text],
            max_length=self.source_len,
            pad_to_max_length=True,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )
        target = self.tokenizer.batch_encode_plus(
            [target_text],
            max_length=self.summ_len,
            pad_to_max_length=True,
            truncation=True,
            padding="max_length",
            return_tensors="pt",
        )

        source_ids = source["input_ids"].squeeze()
        source_mask = source["attention_mask"].squeeze()
        target_ids = target["input_ids"].squeeze()
        target_mask = target["attention_mask"].squeeze()

        return {
            "source_ids": source_ids.to(dtype=torch.long),
            "source_mask": source_mask.to(dtype=torch.long),
            "target_ids": target_ids.to(dtype=torch.long),
            "target_ids_y": target_ids.to(dtype=torch.long),
        }


In [20]:
def train(epoch, tokenizer, model, device, loader, optimizer):

    """
    Function to be called for training with the parameters passed from main function

    """

    model.train()
    for _, data in enumerate(loader, 0):
        #print(data)
        y = data["target_ids"].to(device, dtype=torch.long)
        y_ids = y[:, :-1].contiguous()
        #print(y_ids)
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        #print(lm_labels.shape)
        #print(lm_labels)
        ids = data["source_ids"].to(device, dtype=torch.long)
        mask = data["source_mask"].to(device, dtype=torch.long)
        #print(ids, mask, y_ids, lm_labels)
        outputs = model(
            input_ids=ids,
            attention_mask=mask,
            labels=y
        )
        loss = outputs[0]

        #if _ % 10 == 0:
         #   training_logger.add_row(str(epoch), str(_), str(loss))
          #  console.print(training_logger)
        lr_scheduler.step()
        progress_bar.update(1)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [21]:
def validate(epoch, tokenizer, model, device, loader):

    """
    Function to evaluate model for predictions

    """
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            #print(data)
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)
            lm_labels = y[:,1:].clone().detach()
            lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
            # y_biased, y_nonbiased = [ 2106,     9,  3843,     1,     0], [ 5388,    18, 15500,  3843,     1] #T5
            y_biased, y_nonbiased = [25352,   102,     0,     0,     0], [ 2512,  1011, 25352,   102,     0] # ProphetNet
            prediction_lst, probs_biased, probs_nonbiased = [], [], []
            for i in range(len(ids)):
                output = model(input_ids=ids[i].unsqueeze(0),
                    attention_mask=mask[i].unsqueeze(0),
                    labels=y[i].unsqueeze(0))
                probs = F.softmax(output[1], dim=1)
                for j in range(len(probs[0])):
                    probs_biased.append(probs[0][j][y_biased[j]])
                    probs_nonbiased.append(probs[0][j][y_nonbiased[j]])
                probs_b = sum(probs_biased)/len(probs_biased)
                probs_nb = sum(probs_nonbiased)/len(probs_nonbiased)
                if probs_b > probs_nb:
                    prediction_lst.append(1)
                else:
                    prediction_lst.append(0)
            # target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
            #if _%10==0:
             #   console.print(f'Completed {_}')

            predictions.extend(prediction_lst)
            actuals.extend(y)
        return predictions, actuals


In [22]:
def ModelTrainer(
    dataframe, source_text, target_text, model_params
):

    """
    Model trainer

    """

    # Set random seeds and deterministic pytorch for reproducibility
    torch.manual_seed(model_params["SEED"])  # pytorch random seed
    np.random.seed(model_params["SEED"])  # numpy random seed
    torch.backends.cudnn.deterministic = True
    print(f"""[Model]: Loading {model_params["MODEL"]}...\n""")
    transformer_model = ProphetNetForConditionalGeneration.from_pretrained(model_params["MODEL"], use_cache = False)
    dataframe = dataframe[[source_text, target_text]]

    # tokenzier for encoding the text
    tokenizer = ProphetNetTokenizer.from_pretrained(model_params["MODEL"])
    splits = KFold(n_splits=5, shuffle=True, random_state=42)  # k-fold k=5 as in the BABE paper
    overall_f1, results = [], [] # list for all f1 scores of the folds
    for fold, (train_ids, val_ids) in enumerate(splits.split(np.arange(len(dataframe)))):

        # Defining the model. We are using a base model and added a Language model layer on top for generation of Summary.
        # Further this model is sent to device (GPU/TPU) for using the hardware.
        
           
        model = copy.deepcopy(transformer_model)
        model.to(device)
        train_dataset = pd.DataFrame(dataframe.iloc[train_ids]).reset_index(drop=True)
        val_dataset = pd.DataFrame(dataframe.iloc[val_ids]).reset_index(drop=True)
    

        print(f"FULL Dataset: {dataframe.shape}")
        print(f"TRAIN Dataset: {train_dataset.shape}")
        print(f"TEST Dataset: {val_dataset.shape}\n")

        # Creating the Training and Validation dataset for further creation of Dataloader
        training_set = YourDataSetClass(
            train_dataset,
            tokenizer,
            model_params["MAX_SOURCE_TEXT_LENGTH"],
            model_params["MAX_TARGET_TEXT_LENGTH"],
            source_text,
            target_text,
        )
        val_set = YourDataSetClass(
            val_dataset,
            tokenizer,
            model_params["MAX_SOURCE_TEXT_LENGTH"],
            model_params["MAX_TARGET_TEXT_LENGTH"],
            source_text,
            target_text,
        )

        # Defining the parameters for creation of dataloaders
        train_params = {
            "batch_size": model_params["TRAIN_BATCH_SIZE"],
            "shuffle": True,
            "num_workers": 0,
        }

        val_params = {
            "batch_size": model_params["VALID_BATCH_SIZE"],
            "shuffle": False,
            "num_workers": 0,
        }

        # Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.
        training_loader = DataLoader(training_set, **train_params)
        val_loader = DataLoader(val_set, **val_params)

        # Defining the optimizer that will be used to tune the weights of the network in the training session.
        optimizer = torch.optim.Adam(
            params=model.parameters(), lr=model_params["LEARNING_RATE"]
        )

        # Training loop
        print(f"[Initiating Fine Tuning]...\n")
        num_training_steps = model_params["TRAIN_EPOCHS"] * len(training_loader)  # for the progress bar
        global lr_scheduler 
        lr_scheduler = get_scheduler(
            "linear",
            optimizer=optimizer,
            num_warmup_steps=0,
            num_training_steps=num_training_steps
        )
        global progress_bar 
        progress_bar = tqdm(range(num_training_steps))
        
        test_f1_lst = []

        for epoch in range(model_params["TRAIN_EPOCHS"]):
            #TRAINING
            print(f"Epoch {epoch}")
            train(epoch, tokenizer, model, device, training_loader, optimizer)

            #EVALUATION
            predictions, actuals = validate(epoch, tokenizer, model, device, val_loader)
            #final_df = pd.DataFrame({"Generated Text": predictions, "Actual Text": actuals})
            #print(predictions, actuals)
            target_transformed = []
            #print(actuals)
            for i in actuals:
                if i.tolist() ==  [ 2512,  1011, 25352,   102,     0]:
                    target_transformed.append(0)
                elif i.tolist() == [25352,   102,     0,     0,     0]:
                    target_transformed.append(1)
                else:
                    target_transformed.append(2)  # only a workaround for now
                    
            #print(predictions, target_transformed)
            metric_f1 = load_metric("f1")
            metric_acc = load_metric("accuracy")
            metric_f1.add_batch(predictions=predictions, references=target_transformed)
            metric_acc.add_batch(predictions=predictions, references=target_transformed)

            test_f1, test_acc = metric_f1.compute(average='weighted'), metric_acc.compute()
            print(f"Fold: {fold} Epoch {epoch}: Avg test acc: {test_acc}, Avg test f1: {test_f1}")
            results.append(f"Fold: {fold} Epoch {epoch}: Avg test acc: {test_acc}, Avg test f1: {test_f1}")
            test_f1_lst.append(test_f1['f1'])
            
        overall_f1.append(max(test_f1_lst))
        
    avg_f1 = sum(overall_f1) / len(overall_f1)
    results.append(avg_f1)
    print(results)
    with open(str(time.time())+model_params["MODEL"]+'results.txt', 'w') as f:
            f.write(str(results))
    print(f'Overall avg f1: {avg_f1}')
    return avg_f1

In [23]:
model_params_t5 = {
    "MODEL": "t5-base",  # model_type: t5-base/t5-large
    "TRAIN_BATCH_SIZE": 4,  # training batch size
    "VALID_BATCH_SIZE": 4,  # validation batch size
    "TRAIN_EPOCHS": 4,  # number of training epochs
    "VAL_EPOCHS": 1,  # number of validation epochs
    "LEARNING_RATE": 5e-5,  # learning rate
    "MAX_SOURCE_TEXT_LENGTH": 512,  # max length of source text
    "MAX_TARGET_TEXT_LENGTH": 5,  # max length of target text
    "SEED": 42,  # set seed for reproducibility
}

In [24]:
model_params_prophetnet = {
    "MODEL": "microsoft/prophetnet-large-uncased",  # model_type
    "TRAIN_BATCH_SIZE": 8,  # training batch size
    "VALID_BATCH_SIZE": 8,  # validation batch size
    "TRAIN_EPOCHS": 4,  # number of training epochs
    "VAL_EPOCHS": 1,  # number of validation epochs
    "LEARNING_RATE": 5e-5,  # learning rate
    "MAX_SOURCE_TEXT_LENGTH": 512,  # max length of source text
    "MAX_TARGET_TEXT_LENGTH": 5,  # max length of target text
    "SEED": 42,  # set seed for reproducibility
}

In [25]:
df = DataLoad.read_babe()
ModelTrainer(
    dataframe=df,
    source_text="text",
    target_text="label",
    model_params=model_params_prophetnet,
)

[Model]: Loading microsoft/prophetnet-large-uncased...



Downloading: 100%|██████████| 1.40k/1.40k [00:00<00:00, 1.92MB/s]
Downloading: 100%|██████████| 1.57G/1.57G [00:20<00:00, 74.6MB/s]
Downloading: 100%|██████████| 232k/232k [00:00<00:00, 534kB/s] 
Downloading: 100%|██████████| 90.0/90.0 [00:00<00:00, 149kB/s]
Downloading: 100%|██████████| 141/141 [00:00<00:00, 230kB/s]


FULL Dataset: (3673, 2)
TRAIN Dataset: (2938, 2)
TEST Dataset: (735, 2)

[Initiating Fine Tuning]...



100%|██████████| 2940/2940 [31:06<00:00,  1.58it/s]
  0%|          | 1/1472 [00:00<03:50,  6.38it/s]

Epoch 0


 25%|██▌       | 369/1472 [03:49<2:58:30,  9.71s/it]

Fold: 0 Epoch 0: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}
Epoch 1


 50%|█████     | 737/1472 [07:37<1:57:03,  9.56s/it]

Fold: 0 Epoch 1: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}
Epoch 2


 75%|███████▌  | 1105/1472 [11:26<1:00:11,  9.84s/it]

Fold: 0 Epoch 2: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}
Epoch 3


100%|██████████| 1472/1472 [14:44<00:00,  1.93it/s]  

Fold: 0 Epoch 3: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}
FULL Dataset: (3673, 2)
TRAIN Dataset: (2938, 2)
TEST Dataset: (735, 2)

[Initiating Fine Tuning]...




100%|██████████| 1472/1472 [15:15<00:00,  1.61it/s]

  0%|          | 1/1472 [00:00<03:14,  7.55it/s][A

Epoch 0



  0%|          | 2/1472 [00:00<09:13,  2.66it/s][A
  0%|          | 3/1472 [00:01<11:01,  2.22it/s][A
  0%|          | 4/1472 [00:01<11:53,  2.06it/s][A
  0%|          | 5/1472 [00:02<12:20,  1.98it/s][A
  0%|          | 6/1472 [00:02<12:36,  1.94it/s][A
  0%|          | 7/1472 [00:03<12:47,  1.91it/s][A
  1%|          | 8/1472 [00:03<12:53,  1.89it/s][A
  1%|          | 9/1472 [00:04<12:59,  1.88it/s][A
  1%|          | 10/1472 [00:04<13:01,  1.87it/s][A
  1%|          | 11/1472 [00:05<13:03,  1.87it/s][A
  1%|          | 12/1472 [00:06<13:03,  1.86it/s][A
  1%|          | 13/1472 [00:06<13:04,  1.86it/s][A
  1%|          | 14/1472 [00:07<13:03,  1.86it/s][A
  1%|          | 15/1472 [00:07<13:03,  1.86it/s][A
  1%|          | 16/1472 [00:08<13:03,  1.86it/s][A
  1%|          | 17/1472 [00:08<13:03,  1.86it/s][A
  1%|          | 18/1472 [00:09<13:02,  1.86it/s][A
  1%|▏         | 19/1472 [00:09<13:02,  1.86it/s][A
  1%|▏         | 20/1472 [00:10<13:01,  1.86it/s][A


 11%|█         | 155/1472 [01:23<11:49,  1.86it/s][A
 11%|█         | 156/1472 [01:23<11:48,  1.86it/s][A
 11%|█         | 157/1472 [01:24<11:48,  1.86it/s][A
 11%|█         | 158/1472 [01:24<11:48,  1.85it/s][A
 11%|█         | 159/1472 [01:25<11:47,  1.86it/s][A
 11%|█         | 160/1472 [01:25<11:47,  1.86it/s][A
 11%|█         | 161/1472 [01:26<11:46,  1.85it/s][A
 11%|█         | 162/1472 [01:26<11:46,  1.85it/s][A
 11%|█         | 163/1472 [01:27<11:45,  1.86it/s][A
 11%|█         | 164/1472 [01:27<11:44,  1.86it/s][A
 11%|█         | 165/1472 [01:28<11:43,  1.86it/s][A
 11%|█▏        | 166/1472 [01:29<11:43,  1.86it/s][A
 11%|█▏        | 167/1472 [01:29<11:42,  1.86it/s][A
 11%|█▏        | 168/1472 [01:30<11:42,  1.86it/s][A
 11%|█▏        | 169/1472 [01:30<11:41,  1.86it/s][A
 12%|█▏        | 170/1472 [01:31<11:41,  1.86it/s][A
 12%|█▏        | 171/1472 [01:31<11:40,  1.86it/s][A
 12%|█▏        | 172/1472 [01:32<11:40,  1.86it/s][A
 12%|█▏        | 173/1472 [0

 21%|██        | 306/1472 [02:44<10:28,  1.86it/s][A
 21%|██        | 307/1472 [02:44<10:27,  1.86it/s][A
 21%|██        | 308/1472 [02:45<10:27,  1.86it/s][A
 21%|██        | 309/1472 [02:46<10:26,  1.86it/s][A
 21%|██        | 310/1472 [02:46<10:26,  1.86it/s][A
 21%|██        | 311/1472 [02:47<10:25,  1.86it/s][A
 21%|██        | 312/1472 [02:47<10:25,  1.86it/s][A
 21%|██▏       | 313/1472 [02:48<10:24,  1.86it/s][A
 21%|██▏       | 314/1472 [02:48<10:24,  1.86it/s][A
 21%|██▏       | 315/1472 [02:49<10:23,  1.86it/s][A
 21%|██▏       | 316/1472 [02:49<10:22,  1.86it/s][A
 22%|██▏       | 317/1472 [02:50<10:22,  1.86it/s][A
 22%|██▏       | 318/1472 [02:50<10:21,  1.86it/s][A
 22%|██▏       | 319/1472 [02:51<10:21,  1.86it/s][A
 22%|██▏       | 320/1472 [02:51<10:20,  1.86it/s][A
 22%|██▏       | 321/1472 [02:52<10:19,  1.86it/s][A
 22%|██▏       | 322/1472 [02:53<10:19,  1.86it/s][A
 22%|██▏       | 323/1472 [02:53<10:18,  1.86it/s][A
 22%|██▏       | 324/1472 [0

Fold: 1 Epoch 0: Avg test acc: {'accuracy': 0.6136054421768707}, Avg test f1: {'f1': 0.5492760451944125}
Epoch 1



 25%|██▌       | 370/1472 [03:49<2:06:18,  6.88s/it][A
 25%|██▌       | 371/1472 [03:49<1:31:18,  4.98s/it][A
 25%|██▌       | 372/1472 [03:50<1:06:48,  3.64s/it][A
 25%|██▌       | 373/1472 [03:50<49:41,  2.71s/it]  [A
 25%|██▌       | 374/1472 [03:51<37:42,  2.06s/it][A
 25%|██▌       | 375/1472 [03:51<29:20,  1.60s/it][A
 26%|██▌       | 376/1472 [03:52<23:28,  1.28s/it][A
 26%|██▌       | 377/1472 [03:52<19:21,  1.06s/it][A
 26%|██▌       | 378/1472 [03:53<16:29,  1.11it/s][A
 26%|██▌       | 379/1472 [03:53<14:28,  1.26it/s][A
 26%|██▌       | 380/1472 [03:54<13:04,  1.39it/s][A
 26%|██▌       | 381/1472 [03:55<12:05,  1.50it/s][A
 26%|██▌       | 382/1472 [03:55<11:23,  1.59it/s][A
 26%|██▌       | 383/1472 [03:56<10:53,  1.67it/s][A
 26%|██▌       | 384/1472 [03:56<10:32,  1.72it/s][A
 26%|██▌       | 385/1472 [03:57<10:18,  1.76it/s][A
 26%|██▌       | 386/1472 [03:57<10:07,  1.79it/s][A
 26%|██▋       | 387/1472 [03:58<10:00,  1.81it/s][A
 26%|██▋       | 38

 35%|███▌      | 521/1472 [05:10<08:30,  1.86it/s][A
 35%|███▌      | 522/1472 [05:10<08:29,  1.86it/s][A
 36%|███▌      | 523/1472 [05:11<08:28,  1.86it/s][A
 36%|███▌      | 524/1472 [05:11<08:28,  1.86it/s][A
 36%|███▌      | 525/1472 [05:12<08:28,  1.86it/s][A
 36%|███▌      | 526/1472 [05:13<08:27,  1.86it/s][A
 36%|███▌      | 527/1472 [05:13<08:26,  1.86it/s][A
 36%|███▌      | 528/1472 [05:14<08:26,  1.86it/s][A
 36%|███▌      | 529/1472 [05:14<08:25,  1.86it/s][A
 36%|███▌      | 530/1472 [05:15<08:25,  1.86it/s][A
 36%|███▌      | 531/1472 [05:15<08:24,  1.86it/s][A
 36%|███▌      | 532/1472 [05:16<08:24,  1.86it/s][A
 36%|███▌      | 533/1472 [05:16<08:24,  1.86it/s][A
 36%|███▋      | 534/1472 [05:17<08:23,  1.86it/s][A
 36%|███▋      | 535/1472 [05:17<08:22,  1.86it/s][A
 36%|███▋      | 536/1472 [05:18<08:22,  1.86it/s][A
 36%|███▋      | 537/1472 [05:18<08:21,  1.86it/s][A
 37%|███▋      | 538/1472 [05:19<08:21,  1.86it/s][A
 37%|███▋      | 539/1472 [0

 46%|████▌     | 672/1472 [06:31<07:09,  1.86it/s][A
 46%|████▌     | 673/1472 [06:31<07:08,  1.86it/s][A
 46%|████▌     | 674/1472 [06:32<07:08,  1.86it/s][A
 46%|████▌     | 675/1472 [06:32<07:07,  1.86it/s][A
 46%|████▌     | 676/1472 [06:33<07:07,  1.86it/s][A
 46%|████▌     | 677/1472 [06:34<07:06,  1.86it/s][A
 46%|████▌     | 678/1472 [06:34<07:05,  1.86it/s][A
 46%|████▌     | 679/1472 [06:35<07:05,  1.86it/s][A
 46%|████▌     | 680/1472 [06:35<07:04,  1.86it/s][A
 46%|████▋     | 681/1472 [06:36<07:04,  1.86it/s][A
 46%|████▋     | 682/1472 [06:36<07:03,  1.86it/s][A
 46%|████▋     | 683/1472 [06:37<07:03,  1.86it/s][A
 46%|████▋     | 684/1472 [06:37<07:02,  1.86it/s][A
 47%|████▋     | 685/1472 [06:38<07:09,  1.83it/s][A
 47%|████▋     | 686/1472 [06:38<07:08,  1.84it/s][A
 47%|████▋     | 687/1472 [06:39<07:10,  1.83it/s][A
 47%|████▋     | 688/1472 [06:39<07:07,  1.83it/s][A
 47%|████▋     | 689/1472 [06:40<07:04,  1.84it/s][A
 47%|████▋     | 690/1472 [0

Fold: 1 Epoch 1: Avg test acc: {'accuracy': 0.6136054421768707}, Avg test f1: {'f1': 0.5492760451944125}
Epoch 2



 50%|█████     | 738/1472 [07:36<1:23:54,  6.86s/it][A
 50%|█████     | 739/1472 [07:37<1:00:37,  4.96s/it][A
 50%|█████     | 740/1472 [07:37<44:20,  3.63s/it]  [A
 50%|█████     | 741/1472 [07:38<32:57,  2.70s/it][A
 50%|█████     | 742/1472 [07:39<24:59,  2.05s/it][A
 50%|█████     | 743/1472 [07:39<19:25,  1.60s/it][A
 51%|█████     | 744/1472 [07:40<15:31,  1.28s/it][A
 51%|█████     | 745/1472 [07:40<12:48,  1.06s/it][A
 51%|█████     | 746/1472 [07:41<10:53,  1.11it/s][A
 51%|█████     | 747/1472 [07:41<09:33,  1.26it/s][A
 51%|█████     | 748/1472 [07:42<08:37,  1.40it/s][A
 51%|█████     | 749/1472 [07:42<07:57,  1.51it/s][A
 51%|█████     | 750/1472 [07:43<07:30,  1.60it/s][A
 51%|█████     | 751/1472 [07:43<07:10,  1.67it/s][A
 51%|█████     | 752/1472 [07:44<06:57,  1.73it/s][A
 51%|█████     | 753/1472 [07:44<06:47,  1.77it/s][A
 51%|█████     | 754/1472 [07:45<06:40,  1.79it/s][A
 51%|█████▏    | 755/1472 [07:45<06:34,  1.82it/s][A
 51%|█████▏    | 756/

 60%|██████    | 889/1472 [08:57<05:12,  1.86it/s][A
 60%|██████    | 890/1472 [08:58<05:12,  1.86it/s][A
 61%|██████    | 891/1472 [08:58<05:11,  1.86it/s][A
 61%|██████    | 892/1472 [08:59<05:11,  1.86it/s][A
 61%|██████    | 893/1472 [09:00<05:10,  1.86it/s][A
 61%|██████    | 894/1472 [09:00<05:10,  1.86it/s][A
 61%|██████    | 895/1472 [09:01<05:09,  1.86it/s][A
 61%|██████    | 896/1472 [09:01<05:08,  1.87it/s][A
 61%|██████    | 897/1472 [09:02<05:08,  1.87it/s][A
 61%|██████    | 898/1472 [09:02<05:07,  1.86it/s][A
 61%|██████    | 899/1472 [09:03<05:07,  1.86it/s][A
 61%|██████    | 900/1472 [09:03<05:06,  1.86it/s][A
 61%|██████    | 901/1472 [09:04<05:06,  1.86it/s][A
 61%|██████▏   | 902/1472 [09:04<05:05,  1.86it/s][A
 61%|██████▏   | 903/1472 [09:05<05:05,  1.86it/s][A
 61%|██████▏   | 904/1472 [09:05<05:04,  1.86it/s][A
 61%|██████▏   | 905/1472 [09:06<05:04,  1.86it/s][A
 62%|██████▏   | 906/1472 [09:06<05:03,  1.86it/s][A
 62%|██████▏   | 907/1472 [0

 71%|███████   | 1039/1472 [10:18<03:52,  1.86it/s][A
 71%|███████   | 1040/1472 [10:18<03:51,  1.86it/s][A
 71%|███████   | 1041/1472 [10:19<03:51,  1.86it/s][A
 71%|███████   | 1042/1472 [10:19<03:50,  1.86it/s][A
 71%|███████   | 1043/1472 [10:20<03:50,  1.86it/s][A
 71%|███████   | 1044/1472 [10:21<03:49,  1.86it/s][A
 71%|███████   | 1045/1472 [10:21<03:49,  1.86it/s][A
 71%|███████   | 1046/1472 [10:22<03:48,  1.86it/s][A
 71%|███████   | 1047/1472 [10:22<03:48,  1.86it/s][A
 71%|███████   | 1048/1472 [10:23<03:47,  1.86it/s][A
 71%|███████▏  | 1049/1472 [10:23<03:46,  1.86it/s][A
 71%|███████▏  | 1050/1472 [10:24<03:46,  1.86it/s][A
 71%|███████▏  | 1051/1472 [10:24<03:45,  1.86it/s][A
 71%|███████▏  | 1052/1472 [10:25<03:45,  1.86it/s][A
 72%|███████▏  | 1053/1472 [10:25<03:44,  1.86it/s][A
 72%|███████▏  | 1054/1472 [10:26<03:44,  1.86it/s][A
 72%|███████▏  | 1055/1472 [10:26<03:43,  1.86it/s][A
 72%|███████▏  | 1056/1472 [10:27<03:43,  1.86it/s][A
 72%|█████

Fold: 1 Epoch 2: Avg test acc: {'accuracy': 0.6136054421768707}, Avg test f1: {'f1': 0.5492760451944125}
Epoch 3



 75%|███████▌  | 1106/1472 [11:25<43:09,  7.07s/it]  [A
 75%|███████▌  | 1107/1472 [11:25<31:06,  5.11s/it][A
 75%|███████▌  | 1108/1472 [11:26<22:41,  3.74s/it][A
 75%|███████▌  | 1109/1472 [11:27<16:48,  2.78s/it][A
 75%|███████▌  | 1110/1472 [11:27<12:42,  2.11s/it][A
 75%|███████▌  | 1111/1472 [11:28<09:50,  1.63s/it][A
 76%|███████▌  | 1112/1472 [11:28<07:49,  1.30s/it][A
 76%|███████▌  | 1113/1472 [11:29<06:25,  1.07s/it][A
 76%|███████▌  | 1114/1472 [11:29<05:26,  1.10it/s][A
 76%|███████▌  | 1115/1472 [11:30<04:45,  1.25it/s][A
 76%|███████▌  | 1116/1472 [11:30<04:16,  1.39it/s][A
 76%|███████▌  | 1117/1472 [11:31<03:56,  1.50it/s][A
 76%|███████▌  | 1118/1472 [11:31<03:41,  1.60it/s][A
 76%|███████▌  | 1119/1472 [11:32<03:31,  1.67it/s][A
 76%|███████▌  | 1120/1472 [11:32<03:24,  1.72it/s][A
 76%|███████▌  | 1121/1472 [11:33<03:19,  1.76it/s][A
 76%|███████▌  | 1122/1472 [11:33<03:15,  1.79it/s][A
 76%|███████▋  | 1123/1472 [11:34<03:12,  1.81it/s][A
 76%|██

 85%|████████▌ | 1254/1472 [12:44<01:56,  1.87it/s][A
 85%|████████▌ | 1255/1472 [12:45<01:56,  1.87it/s][A
 85%|████████▌ | 1256/1472 [12:45<01:55,  1.87it/s][A
 85%|████████▌ | 1257/1472 [12:46<01:55,  1.86it/s][A
 85%|████████▌ | 1258/1472 [12:46<01:54,  1.87it/s][A
 86%|████████▌ | 1259/1472 [12:47<01:54,  1.86it/s][A
 86%|████████▌ | 1260/1472 [12:47<01:53,  1.87it/s][A
 86%|████████▌ | 1261/1472 [12:48<01:53,  1.86it/s][A
 86%|████████▌ | 1262/1472 [12:49<01:52,  1.87it/s][A
 86%|████████▌ | 1263/1472 [12:49<01:52,  1.86it/s][A
 86%|████████▌ | 1264/1472 [12:50<01:51,  1.86it/s][A
 86%|████████▌ | 1265/1472 [12:50<01:51,  1.86it/s][A
 86%|████████▌ | 1266/1472 [12:51<01:50,  1.86it/s][A
 86%|████████▌ | 1267/1472 [12:51<01:49,  1.86it/s][A
 86%|████████▌ | 1268/1472 [12:52<01:49,  1.86it/s][A
 86%|████████▌ | 1269/1472 [12:52<01:48,  1.87it/s][A
 86%|████████▋ | 1270/1472 [12:53<01:48,  1.87it/s][A
 86%|████████▋ | 1271/1472 [12:53<01:47,  1.87it/s][A
 86%|█████

 95%|█████████▌| 1402/1472 [14:04<00:37,  1.87it/s][A
 95%|█████████▌| 1403/1472 [14:04<00:36,  1.87it/s][A
 95%|█████████▌| 1404/1472 [14:05<00:36,  1.86it/s][A
 95%|█████████▌| 1405/1472 [14:05<00:35,  1.86it/s][A
 96%|█████████▌| 1406/1472 [14:06<00:35,  1.86it/s][A
 96%|█████████▌| 1407/1472 [14:06<00:34,  1.86it/s][A
 96%|█████████▌| 1408/1472 [14:07<00:34,  1.87it/s][A
 96%|█████████▌| 1409/1472 [14:07<00:33,  1.87it/s][A
 96%|█████████▌| 1410/1472 [14:08<00:33,  1.87it/s][A
 96%|█████████▌| 1411/1472 [14:08<00:32,  1.87it/s][A
 96%|█████████▌| 1412/1472 [14:09<00:32,  1.87it/s][A
 96%|█████████▌| 1413/1472 [14:09<00:31,  1.87it/s][A
 96%|█████████▌| 1414/1472 [14:10<00:31,  1.87it/s][A
 96%|█████████▌| 1415/1472 [14:11<00:30,  1.87it/s][A
 96%|█████████▌| 1416/1472 [14:11<00:29,  1.87it/s][A
 96%|█████████▋| 1417/1472 [14:12<00:29,  1.87it/s][A
 96%|█████████▋| 1418/1472 [14:12<00:28,  1.87it/s][A
 96%|█████████▋| 1419/1472 [14:13<00:28,  1.87it/s][A
 96%|█████

Fold: 1 Epoch 3: Avg test acc: {'accuracy': 0.6258503401360545}, Avg test f1: {'f1': 0.5681211346968096}
FULL Dataset: (3673, 2)
TRAIN Dataset: (2938, 2)
TEST Dataset: (735, 2)

[Initiating Fine Tuning]...



100%|██████████| 1472/1472 [15:15<00:00,  1.61it/s]
  0%|          | 1/1472 [00:00<03:27,  7.10it/s]

Epoch 0


 25%|██▌       | 369/1472 [03:55<3:39:52, 11.96s/it]

Fold: 2 Epoch 0: Avg test acc: {'accuracy': 0.6517006802721088}, Avg test f1: {'f1': 0.5977181310133804}
Epoch 1


 50%|█████     | 737/1472 [07:48<2:18:10, 11.28s/it]

Fold: 2 Epoch 1: Avg test acc: {'accuracy': 0.6517006802721088}, Avg test f1: {'f1': 0.5977181310133804}
Epoch 2


 75%|███████▌  | 1105/1472 [11:42<1:10:15, 11.49s/it]

Fold: 2 Epoch 2: Avg test acc: {'accuracy': 0.6517006802721088}, Avg test f1: {'f1': 0.5986550768630322}
Epoch 3


100%|██████████| 1472/1472 [14:59<00:00,  1.95it/s]  

Fold: 2 Epoch 3: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.5748372040750218}
FULL Dataset: (3673, 2)
TRAIN Dataset: (2939, 2)
TEST Dataset: (734, 2)

[Initiating Fine Tuning]...




100%|██████████| 1472/1472 [15:30<00:00,  1.58it/s]

  0%|          | 1/1472 [00:00<03:20,  7.35it/s][A

Epoch 0



  0%|          | 2/1472 [00:00<09:15,  2.65it/s][A
  0%|          | 3/1472 [00:01<11:01,  2.22it/s][A
  0%|          | 4/1472 [00:01<11:51,  2.06it/s][A
  0%|          | 5/1472 [00:02<12:18,  1.99it/s][A
  0%|          | 6/1472 [00:02<12:34,  1.94it/s][A
  0%|          | 7/1472 [00:03<12:43,  1.92it/s][A
  1%|          | 8/1472 [00:03<12:49,  1.90it/s][A
  1%|          | 9/1472 [00:04<12:53,  1.89it/s][A
  1%|          | 10/1472 [00:04<12:56,  1.88it/s][A
  1%|          | 11/1472 [00:05<12:58,  1.88it/s][A
  1%|          | 12/1472 [00:06<12:59,  1.87it/s][A
  1%|          | 13/1472 [00:06<13:00,  1.87it/s][A
  1%|          | 14/1472 [00:07<13:00,  1.87it/s][A
  1%|          | 15/1472 [00:07<13:00,  1.87it/s][A
  1%|          | 16/1472 [00:08<13:00,  1.87it/s][A
  1%|          | 17/1472 [00:08<13:00,  1.87it/s][A
  1%|          | 18/1472 [00:09<12:59,  1.87it/s][A
  1%|▏         | 19/1472 [00:09<12:59,  1.87it/s][A
  1%|▏         | 20/1472 [00:10<12:58,  1.87it/s][A


 11%|█         | 155/1472 [01:22<11:46,  1.86it/s][A
 11%|█         | 156/1472 [01:23<11:45,  1.86it/s][A
 11%|█         | 157/1472 [01:23<11:44,  1.87it/s][A
 11%|█         | 158/1472 [01:24<11:44,  1.86it/s][A
 11%|█         | 159/1472 [01:24<11:44,  1.86it/s][A
 11%|█         | 160/1472 [01:25<11:44,  1.86it/s][A
 11%|█         | 161/1472 [01:25<11:43,  1.86it/s][A
 11%|█         | 162/1472 [01:26<11:42,  1.86it/s][A
 11%|█         | 163/1472 [01:27<11:42,  1.86it/s][A
 11%|█         | 164/1472 [01:27<11:41,  1.86it/s][A
 11%|█         | 165/1472 [01:28<11:41,  1.86it/s][A
 11%|█▏        | 166/1472 [01:28<11:40,  1.86it/s][A
 11%|█▏        | 167/1472 [01:29<11:39,  1.86it/s][A
 11%|█▏        | 168/1472 [01:29<11:39,  1.87it/s][A
 11%|█▏        | 169/1472 [01:30<11:38,  1.86it/s][A
 12%|█▏        | 170/1472 [01:30<11:38,  1.86it/s][A
 12%|█▏        | 171/1472 [01:31<11:37,  1.87it/s][A
 12%|█▏        | 172/1472 [01:31<11:36,  1.87it/s][A
 12%|█▏        | 173/1472 [0

 21%|██        | 306/1472 [02:43<10:25,  1.86it/s][A
 21%|██        | 307/1472 [02:44<10:24,  1.86it/s][A
 21%|██        | 308/1472 [02:44<10:24,  1.86it/s][A
 21%|██        | 309/1472 [02:45<10:23,  1.86it/s][A
 21%|██        | 310/1472 [02:45<10:22,  1.87it/s][A
 21%|██        | 311/1472 [02:46<10:22,  1.87it/s][A
 21%|██        | 312/1472 [02:46<10:21,  1.87it/s][A
 21%|██▏       | 313/1472 [02:47<10:21,  1.87it/s][A
 21%|██▏       | 314/1472 [02:48<10:20,  1.86it/s][A
 21%|██▏       | 315/1472 [02:48<10:20,  1.86it/s][A
 21%|██▏       | 316/1472 [02:49<10:19,  1.86it/s][A
 22%|██▏       | 317/1472 [02:49<10:19,  1.87it/s][A
 22%|██▏       | 318/1472 [02:50<10:19,  1.86it/s][A
 22%|██▏       | 319/1472 [02:50<10:18,  1.86it/s][A
 22%|██▏       | 320/1472 [02:51<10:18,  1.86it/s][A
 22%|██▏       | 321/1472 [02:51<10:17,  1.86it/s][A
 22%|██▏       | 322/1472 [02:52<10:16,  1.86it/s][A
 22%|██▏       | 323/1472 [02:52<10:16,  1.87it/s][A
 22%|██▏       | 324/1472 [0

Fold: 3 Epoch 0: Avg test acc: {'accuracy': 0.6648501362397821}, Avg test f1: {'f1': 0.619353134639243}
Epoch 1



 25%|██▌       | 370/1472 [03:49<2:12:22,  7.21s/it][A
 25%|██▌       | 371/1472 [03:50<1:35:32,  5.21s/it][A
 25%|██▌       | 372/1472 [03:50<1:09:45,  3.81s/it][A
 25%|██▌       | 373/1472 [03:51<51:44,  2.82s/it]  [A
 25%|██▌       | 374/1472 [03:51<39:07,  2.14s/it][A
 25%|██▌       | 375/1472 [03:52<30:18,  1.66s/it][A
 26%|██▌       | 376/1472 [03:53<24:08,  1.32s/it][A
 26%|██▌       | 377/1472 [03:53<19:49,  1.09s/it][A
 26%|██▌       | 378/1472 [03:54<16:47,  1.09it/s][A
 26%|██▌       | 379/1472 [03:54<14:41,  1.24it/s][A
 26%|██▌       | 380/1472 [03:55<13:12,  1.38it/s][A
 26%|██▌       | 381/1472 [03:55<12:09,  1.50it/s][A
 26%|██▌       | 382/1472 [03:56<11:26,  1.59it/s][A
 26%|██▌       | 383/1472 [03:56<10:55,  1.66it/s][A
 26%|██▌       | 384/1472 [03:57<10:33,  1.72it/s][A
 26%|██▌       | 385/1472 [03:57<10:18,  1.76it/s][A
 26%|██▌       | 386/1472 [03:58<10:07,  1.79it/s][A
 26%|██▋       | 387/1472 [03:58<09:59,  1.81it/s][A
 26%|██▋       | 38

 35%|███▌      | 521/1472 [05:10<08:30,  1.86it/s][A
 35%|███▌      | 522/1472 [05:11<08:29,  1.86it/s][A
 36%|███▌      | 523/1472 [05:11<08:28,  1.86it/s][A
 36%|███▌      | 524/1472 [05:12<08:28,  1.86it/s][A
 36%|███▌      | 525/1472 [05:12<08:28,  1.86it/s][A
 36%|███▌      | 526/1472 [05:13<08:27,  1.86it/s][A
 36%|███▌      | 527/1472 [05:14<08:27,  1.86it/s][A
 36%|███▌      | 528/1472 [05:14<08:26,  1.86it/s][A
 36%|███▌      | 529/1472 [05:15<08:25,  1.86it/s][A
 36%|███▌      | 530/1472 [05:15<08:25,  1.86it/s][A
 36%|███▌      | 531/1472 [05:16<08:24,  1.86it/s][A
 36%|███▌      | 532/1472 [05:16<08:24,  1.86it/s][A
 36%|███▌      | 533/1472 [05:17<08:23,  1.86it/s][A
 36%|███▋      | 534/1472 [05:17<08:23,  1.86it/s][A
 36%|███▋      | 535/1472 [05:18<08:22,  1.86it/s][A
 36%|███▋      | 536/1472 [05:18<08:22,  1.86it/s][A
 36%|███▋      | 537/1472 [05:19<08:21,  1.86it/s][A
 37%|███▋      | 538/1472 [05:19<08:21,  1.86it/s][A
 37%|███▋      | 539/1472 [0

 46%|████▌     | 672/1472 [06:31<07:09,  1.86it/s][A
 46%|████▌     | 673/1472 [06:32<07:09,  1.86it/s][A
 46%|████▌     | 674/1472 [06:32<07:08,  1.86it/s][A
 46%|████▌     | 675/1472 [06:33<07:07,  1.86it/s][A
 46%|████▌     | 676/1472 [06:33<07:07,  1.86it/s][A
 46%|████▌     | 677/1472 [06:34<07:06,  1.86it/s][A
 46%|████▌     | 678/1472 [06:35<07:05,  1.86it/s][A
 46%|████▌     | 679/1472 [06:35<07:05,  1.86it/s][A
 46%|████▌     | 680/1472 [06:36<07:04,  1.86it/s][A
 46%|████▋     | 681/1472 [06:36<07:04,  1.86it/s][A
 46%|████▋     | 682/1472 [06:37<07:04,  1.86it/s][A
 46%|████▋     | 683/1472 [06:37<07:03,  1.86it/s][A
 46%|████▋     | 684/1472 [06:38<07:03,  1.86it/s][A
 47%|████▋     | 685/1472 [06:38<07:02,  1.86it/s][A
 47%|████▋     | 686/1472 [06:39<07:02,  1.86it/s][A
 47%|████▋     | 687/1472 [06:39<07:01,  1.86it/s][A
 47%|████▋     | 688/1472 [06:40<07:00,  1.86it/s][A
 47%|████▋     | 689/1472 [06:40<07:00,  1.86it/s][A
 47%|████▋     | 690/1472 [0

Fold: 3 Epoch 1: Avg test acc: {'accuracy': 0.6648501362397821}, Avg test f1: {'f1': 0.619353134639243}
Epoch 2



 50%|█████     | 738/1472 [07:37<1:23:51,  6.85s/it][A
 50%|█████     | 739/1472 [07:37<1:00:35,  4.96s/it][A
 50%|█████     | 740/1472 [07:38<44:18,  3.63s/it]  [A
 50%|█████     | 741/1472 [07:38<32:56,  2.70s/it][A
 50%|█████     | 742/1472 [07:39<24:59,  2.05s/it][A
 50%|█████     | 743/1472 [07:40<19:25,  1.60s/it][A
 51%|█████     | 744/1472 [07:40<15:31,  1.28s/it][A
 51%|█████     | 745/1472 [07:41<12:48,  1.06s/it][A
 51%|█████     | 746/1472 [07:41<10:54,  1.11it/s][A
 51%|█████     | 747/1472 [07:42<09:33,  1.26it/s][A
 51%|█████     | 748/1472 [07:42<08:37,  1.40it/s][A
 51%|█████     | 749/1472 [07:43<07:58,  1.51it/s][A
 51%|█████     | 750/1472 [07:43<07:30,  1.60it/s][A
 51%|█████     | 751/1472 [07:44<07:11,  1.67it/s][A
 51%|█████     | 752/1472 [07:44<06:57,  1.72it/s][A
 51%|█████     | 753/1472 [07:45<06:47,  1.76it/s][A
 51%|█████     | 754/1472 [07:45<06:40,  1.79it/s][A
 51%|█████▏    | 755/1472 [07:46<06:35,  1.81it/s][A
 51%|█████▏    | 756/

 60%|██████    | 889/1472 [08:58<05:13,  1.86it/s][A
 60%|██████    | 890/1472 [08:58<05:12,  1.86it/s][A
 61%|██████    | 891/1472 [08:59<05:11,  1.86it/s][A
 61%|██████    | 892/1472 [08:59<05:11,  1.86it/s][A
 61%|██████    | 893/1472 [09:00<05:10,  1.86it/s][A
 61%|██████    | 894/1472 [09:01<05:10,  1.86it/s][A
 61%|██████    | 895/1472 [09:01<05:09,  1.86it/s][A
 61%|██████    | 896/1472 [09:02<05:09,  1.86it/s][A
 61%|██████    | 897/1472 [09:02<05:08,  1.86it/s][A
 61%|██████    | 898/1472 [09:03<05:08,  1.86it/s][A
 61%|██████    | 899/1472 [09:03<05:07,  1.86it/s][A
 61%|██████    | 900/1472 [09:04<05:07,  1.86it/s][A
 61%|██████    | 901/1472 [09:04<05:06,  1.86it/s][A
 61%|██████▏   | 902/1472 [09:05<05:06,  1.86it/s][A
 61%|██████▏   | 903/1472 [09:05<05:05,  1.86it/s][A
 61%|██████▏   | 904/1472 [09:06<05:04,  1.86it/s][A
 61%|██████▏   | 905/1472 [09:06<05:04,  1.86it/s][A
 62%|██████▏   | 906/1472 [09:07<05:03,  1.86it/s][A
 62%|██████▏   | 907/1472 [0

 71%|███████   | 1039/1472 [10:18<03:54,  1.85it/s][A
 71%|███████   | 1040/1472 [10:19<03:53,  1.85it/s][A
 71%|███████   | 1041/1472 [10:19<03:52,  1.86it/s][A
 71%|███████   | 1042/1472 [10:20<03:51,  1.86it/s][A
 71%|███████   | 1043/1472 [10:21<03:50,  1.86it/s][A
 71%|███████   | 1044/1472 [10:21<03:49,  1.86it/s][A
 71%|███████   | 1045/1472 [10:22<03:49,  1.86it/s][A
 71%|███████   | 1046/1472 [10:22<03:48,  1.86it/s][A
 71%|███████   | 1047/1472 [10:23<03:47,  1.86it/s][A
 71%|███████   | 1048/1472 [10:23<03:47,  1.86it/s][A
 71%|███████▏  | 1049/1472 [10:24<03:47,  1.86it/s][A
 71%|███████▏  | 1050/1472 [10:24<03:46,  1.86it/s][A
 71%|███████▏  | 1051/1472 [10:25<03:45,  1.86it/s][A
 71%|███████▏  | 1052/1472 [10:25<03:45,  1.86it/s][A
 72%|███████▏  | 1053/1472 [10:26<03:44,  1.86it/s][A
 72%|███████▏  | 1054/1472 [10:26<03:44,  1.86it/s][A
 72%|███████▏  | 1055/1472 [10:27<03:43,  1.86it/s][A
 72%|███████▏  | 1056/1472 [10:28<03:43,  1.86it/s][A
 72%|█████

Fold: 3 Epoch 2: Avg test acc: {'accuracy': 0.6648501362397821}, Avg test f1: {'f1': 0.619353134639243}
Epoch 3



 75%|███████▌  | 1106/1472 [11:30<49:36,  8.13s/it]  [A
 75%|███████▌  | 1107/1472 [11:31<35:36,  5.85s/it][A
 75%|███████▌  | 1108/1472 [11:32<25:49,  4.26s/it][A
 75%|███████▌  | 1109/1472 [11:32<19:00,  3.14s/it][A
 75%|███████▌  | 1110/1472 [11:33<14:14,  2.36s/it][A
 75%|███████▌  | 1111/1472 [11:33<10:54,  1.81s/it][A
 76%|███████▌  | 1112/1472 [11:34<08:34,  1.43s/it][A
 76%|███████▌  | 1113/1472 [11:34<06:57,  1.16s/it][A
 76%|███████▌  | 1114/1472 [11:35<05:48,  1.03it/s][A
 76%|███████▌  | 1115/1472 [11:35<05:00,  1.19it/s][A
 76%|███████▌  | 1116/1472 [11:36<04:27,  1.33it/s][A
 76%|███████▌  | 1117/1472 [11:36<04:03,  1.46it/s][A
 76%|███████▌  | 1118/1472 [11:37<03:47,  1.56it/s][A
 76%|███████▌  | 1119/1472 [11:37<03:35,  1.64it/s][A
 76%|███████▌  | 1120/1472 [11:38<03:26,  1.70it/s][A
 76%|███████▌  | 1121/1472 [11:39<03:20,  1.75it/s][A
 76%|███████▌  | 1122/1472 [11:39<03:16,  1.78it/s][A
 76%|███████▋  | 1123/1472 [11:40<03:13,  1.81it/s][A
 76%|██

 85%|████████▌ | 1254/1472 [12:50<01:56,  1.87it/s][A
 85%|████████▌ | 1255/1472 [12:50<01:56,  1.87it/s][A
 85%|████████▌ | 1256/1472 [12:51<01:55,  1.87it/s][A
 85%|████████▌ | 1257/1472 [12:51<01:55,  1.87it/s][A
 85%|████████▌ | 1258/1472 [12:52<01:54,  1.87it/s][A
 86%|████████▌ | 1259/1472 [12:52<01:54,  1.87it/s][A
 86%|████████▌ | 1260/1472 [12:53<01:53,  1.87it/s][A
 86%|████████▌ | 1261/1472 [12:54<01:53,  1.87it/s][A
 86%|████████▌ | 1262/1472 [12:54<01:52,  1.87it/s][A
 86%|████████▌ | 1263/1472 [12:55<01:52,  1.87it/s][A
 86%|████████▌ | 1264/1472 [12:55<01:51,  1.86it/s][A
 86%|████████▌ | 1265/1472 [12:56<01:51,  1.86it/s][A
 86%|████████▌ | 1266/1472 [12:56<01:50,  1.86it/s][A
 86%|████████▌ | 1267/1472 [12:57<01:49,  1.86it/s][A
 86%|████████▌ | 1268/1472 [12:57<01:49,  1.86it/s][A
 86%|████████▌ | 1269/1472 [12:58<01:48,  1.86it/s][A
 86%|████████▋ | 1270/1472 [12:58<01:48,  1.86it/s][A
 86%|████████▋ | 1271/1472 [12:59<01:47,  1.86it/s][A
 86%|█████

 95%|█████████▌| 1402/1472 [14:09<00:37,  1.86it/s][A
 95%|█████████▌| 1403/1472 [14:10<00:36,  1.87it/s][A
 95%|█████████▌| 1404/1472 [14:10<00:36,  1.87it/s][A
 95%|█████████▌| 1405/1472 [14:11<00:35,  1.87it/s][A
 96%|█████████▌| 1406/1472 [14:11<00:35,  1.87it/s][A
 96%|█████████▌| 1407/1472 [14:12<00:34,  1.86it/s][A
 96%|█████████▌| 1408/1472 [14:12<00:34,  1.86it/s][A
 96%|█████████▌| 1409/1472 [14:13<00:33,  1.86it/s][A
 96%|█████████▌| 1410/1472 [14:13<00:33,  1.86it/s][A
 96%|█████████▌| 1411/1472 [14:14<00:32,  1.86it/s][A
 96%|█████████▌| 1412/1472 [14:15<00:32,  1.86it/s][A
 96%|█████████▌| 1413/1472 [14:15<00:31,  1.87it/s][A
 96%|█████████▌| 1414/1472 [14:16<00:31,  1.87it/s][A
 96%|█████████▌| 1415/1472 [14:16<00:30,  1.87it/s][A
 96%|█████████▌| 1416/1472 [14:17<00:30,  1.87it/s][A
 96%|█████████▋| 1417/1472 [14:17<00:29,  1.86it/s][A
 96%|█████████▋| 1418/1472 [14:18<00:28,  1.86it/s][A
 96%|█████████▋| 1419/1472 [14:18<00:28,  1.86it/s][A
 96%|█████

Fold: 3 Epoch 3: Avg test acc: {'accuracy': 0.6648501362397821}, Avg test f1: {'f1': 0.619353134639243}
FULL Dataset: (3673, 2)
TRAIN Dataset: (2939, 2)
TEST Dataset: (734, 2)

[Initiating Fine Tuning]...



100%|██████████| 1472/1472 [15:20<00:00,  1.60it/s]
  0%|          | 1/1472 [00:00<03:48,  6.43it/s]

Epoch 0


 25%|██▌       | 369/1472 [03:47<2:56:14,  9.59s/it]

Fold: 4 Epoch 0: Avg test acc: {'accuracy': 0.6416893732970027}, Avg test f1: {'f1': 0.5883898923062777}
Epoch 1


 50%|█████     | 737/1472 [07:34<1:56:01,  9.47s/it]

Fold: 4 Epoch 1: Avg test acc: {'accuracy': 0.6471389645776566}, Avg test f1: {'f1': 0.597343047683996}
Epoch 2


 75%|███████▌  | 1105/1472 [11:25<1:03:49, 10.43s/it]

Fold: 4 Epoch 2: Avg test acc: {'accuracy': 0.6471389645776566}, Avg test f1: {'f1': 0.597343047683996}
Epoch 3


100%|██████████| 1472/1472 [14:42<00:00,  1.93it/s]  

Fold: 4 Epoch 3: Avg test acc: {'accuracy': 0.6471389645776566}, Avg test f1: {'f1': 0.597343047683996}
["Fold: 0 Epoch 0: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}", "Fold: 0 Epoch 1: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}", "Fold: 0 Epoch 2: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}", "Fold: 0 Epoch 3: Avg test acc: {'accuracy': 0.636734693877551}, Avg test f1: {'f1': 0.577809672385237}", "Fold: 1 Epoch 0: Avg test acc: {'accuracy': 0.6136054421768707}, Avg test f1: {'f1': 0.5492760451944125}", "Fold: 1 Epoch 1: Avg test acc: {'accuracy': 0.6136054421768707}, Avg test f1: {'f1': 0.5492760451944125}", "Fold: 1 Epoch 2: Avg test acc: {'accuracy': 0.6136054421768707}, Avg test f1: {'f1': 0.5492760451944125}", "Fold: 1 Epoch 3: Avg test acc: {'accuracy': 0.6258503401360545}, Avg test f1: {'f1': 0.5681211346968096}", "Fold: 2 Epoch 0: Avg test acc: {'accur

FileNotFoundError: [Errno 2] No such file or directory: '1665198240.282313microsoft/prophetnet-large-uncasedresults.txt'