### Reference: https://mccormickml.com/2019/07/22/BERT-fine-tuning/

# Manual training and evaluation

### Train

In [None]:
from train import train_model_on_train_data


# Parameters
TRAIN_DATA_PATH = "../data/train.csv"
MODEL_NAME = 'microsoft/deberta-base'
BATCH_SIZE = 16
NUM_EPOCHS = 1

model, training_stats = train_model_on_train_data(TRAIN_DATA_PATH, MODEL_NAME, BATCH_SIZE, NUM_EPOCHS)

training_stats

### Plot learning curve

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

df_training = pd.DataFrame(training_stats)

plt.figure(figsize=(12,6))
sns.set(style='darkgrid')
sns.set(font_scale=1.5)

plt.plot(df_training['training_loss'], 'b-o', label="Training")
plt.plot(df_training['validation_loss'], 'g-o', label="Validation")

plt.title("Training & Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.xticks([1, 2, 3, 4])

plt.show()

### Evaluate test data

In [None]:
from evaluate import evaluate_on_test_data


TEST_DATA_PATH = "../data/eval.csv"


testing_stats = evaluate_on_test_data(model, TEST_DATA_PATH, MODEL_NAME, BATCH_SIZE)

testing_stats

# Improving Model F1 Score

### Load results_dictionary

In [None]:
import pandas as pd

results_dictionary = {
        "model_name": [],
        "pipeline": [],
        "training_loss": [],
        "validation_loss": [],
        "validation_f1": [],
        "test_loss": [],
        "test_f1": [],
        "augmented": []
    }


old_df = pd.read_csv("../results/04-DeBERTa-base many configurations/2nd_models_comparison.csv")

for i in range(len(old_df)):
    aa = old_df.iloc[i].to_dict()

    for k,v in aa.items():
        results_dictionary[k].append(v)

pd.set_option('display.max_colwidth', None)
results_df = pd.DataFrame(results_dictionary)
results_df.head(40)

In [None]:
results_dictionary = {
        "model_name": [],
        "pipeline": [],
        "training_loss": [],
        "validation_loss": [],
        "validation_f1": [],
        "test_loss": [],
        "test_f1": []
    }

### Start training

In [None]:
import pandas as pd
import torch
import numpy as np
from datasets import load_metric

from data_preparation import _load_dataset, _prepare_data, _create_dataloaders, _create_tensors
from helper_functions import get_device
from model_preparation import Model, set_seed


# Parameters
TRAIN_DATA_PATH = "../data/train.csv"
TEST_DATA_PATH = "../data/eval.csv"
BATCH_SIZE = 16
NUM_EPOCHS = 1
SEED = 42

set_seed(SEED)

device = get_device()


# 'microsoft/deberta-v2-xlarge'
# "bert-base-cased"
# "bert-base-uncased"
for MODEL_NAME in ['microsoft/deberta-base']:
    for pipeline in [['hyperlinks', 'mentions', 'hashtags', 'retweet', 'repetitions', 'emojis', 'smileys', 'spaces']
                    ]:
        
        if TRAIN_DATA_PATH == "../data/train_aug.csv":
            df = _load_dataset(TRAIN_DATA_PATH)
        else:
            df = _load_dataset(TRAIN_DATA_PATH)
            df = _prepare_data(df, pipeline)

        input_ids, attention_masks, labels = _create_tensors(df, MODEL_NAME)
        train_dataloader, validation_dataloader = _create_dataloaders(input_ids, attention_masks, labels, BATCH_SIZE, 
                                                                    create_validation_set= True)

        df = _load_dataset(TEST_DATA_PATH)
        df = _prepare_data(df, pipeline)
        input_ids, attention_masks, labels = _create_tensors(df, MODEL_NAME)
        test_dataloader = _create_dataloaders(input_ids, attention_masks, labels, BATCH_SIZE, create_validation_set= False)





    
        model_class = Model(MODEL_NAME, NUM_EPOCHS, len(train_dataloader))
        model, optimizer, lr_scheduler = model_class.get_model_optimizer_scheduler()
        model = model.to(device)

        training_loss = 0
        val_loss = 0
        val_f1 = 0

        training_stats = []
        try:
            for epoch in range(NUM_EPOCHS):
                print(f"EPOCH {epoch+1}/{NUM_EPOCHS}\n")
                model.train()
                total_train_loss = 0

                for step, batch in enumerate(train_dataloader):
                    model.zero_grad()
                    parameters = {
                        "input_ids" : batch[0].to(device),
                        "attention_mask" :  batch[1].to(device), 
                        "labels" : batch[2].to(device)
                    }
                    outputs = model(**parameters)

                    loss = outputs.loss
                    total_train_loss += loss.item()
                    loss.backward()

                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

                    optimizer.step()
                    lr_scheduler.step()
                    optimizer.zero_grad()
                    # progress_bar.update(1)

                    if step % 100 == 0 and step != 0:
                        print(f"BATCH {step}/{len(train_dataloader)}:\tTraining loss({loss.item()})")

                training_stats.append({
                    "epoch":epoch+1,
                    "training_loss":total_train_loss/len(train_dataloader)
                    })

                total_val_loss = 0
                metric = load_metric("f1")

                model.eval()
                for batch in validation_dataloader:

                    parameters = {
                        "input_ids" : batch[0].to(device),
                        "attention_mask" :  batch[1].to(device), 
                        "labels" : batch[2].to(device)
                    }
                    with torch.no_grad():
                        outputs = model(**parameters)

                    logits = outputs.logits
                    loss = outputs.loss
                    total_val_loss += loss.item()

                    predictions = torch.argmax(logits, dim=-1)
                    metric.add_batch(predictions=predictions, references=parameters["labels"])

                training_stats[epoch]["validation_loss"] = total_val_loss/len(validation_dataloader)
                training_stats[epoch]["validation_f1_score"] = metric.compute()

                print(f"\nAvg training loss:    {training_stats[epoch]['training_loss']}")
                print(f"Avg validation loss:  {training_stats[epoch]['validation_loss']}")
                print(f"F1 validation score:  {training_stats[epoch]['validation_f1_score']}\n")

                training_loss = training_stats[epoch]['training_loss']
                val_loss = training_stats[epoch]['validation_loss']
                val_f1 = training_stats[epoch]['validation_f1_score']

        except RuntimeError as e:
            print(e)






        

        model = model.to(device)
        testing_stats = []

        try:
            total_test_loss = 0
            metric = load_metric("f1")

            model.eval()

            for n, batch in enumerate(test_dataloader):

                parameters = {
                    "input_ids" : batch[0].to(device),
                    "attention_mask" :  batch[1].to(device), 
                    "labels" : batch[2].to(device)
                }
                with torch.no_grad():
                    outputs = model(**parameters)
                
                logits = outputs.logits
                loss = outputs.loss
                total_test_loss += loss.item()

                predictions = torch.argmax(logits, dim=-1)
                metric.add_batch(predictions=predictions, references=parameters["labels"])

            testing_stats.append({
                "test_loss": total_test_loss/len(test_dataloader),
                "test_f1_score": metric.compute()
            })

            print(f"\nAvg test loss:  {testing_stats[0]['test_loss']}")
            print(f"F1 test score:  {testing_stats[0]['test_f1_score']}\n")


            results_dictionary["model_name"].append(MODEL_NAME)
            results_dictionary["pipeline"].append(str(pipeline))
            results_dictionary["training_loss"].append(training_loss)
            results_dictionary["validation_loss"].append(val_loss)
            results_dictionary["validation_f1"].append(val_f1)
            results_dictionary["test_loss"].append(testing_stats[0]['test_loss'])
            results_dictionary["test_f1"].append(testing_stats[0]['test_f1_score'])
            if TRAIN_DATA_PATH == "../data/train_aug.csv":
                results_dictionary["augmented"].append("yes")
            else:
                results_dictionary["augmented"].append("no")


        except RuntimeError as e:
            print(e)

In [None]:
pd.set_option('display.max_colwidth', None)
results_df = pd.DataFrame(results_dictionary)
results_df.to_csv("../results/04-DeBERTa-base many configurations/2nd_models_comparison.csv", index=False)
results_df.head(40)

# Check error cases

In [None]:
from train import train_model_on_train_data


# Parameters
TRAIN_DATA_PATH = "../data/train.csv"
MODEL_NAME = 'microsoft/deberta-base'
BATCH_SIZE = 16
NUM_EPOCHS = 1

model, training_stats = train_model_on_train_data(TRAIN_DATA_PATH, MODEL_NAME, BATCH_SIZE, NUM_EPOCHS)

In [None]:
import pandas as pd

from model_preparation import set_seed
from data_preparation import _load_dataset, _prepare_data, _create_dataloaders, _create_tensors


data_path = "../data/eval.csv"
model_name = 'microsoft/deberta-base'
batch_size = 16
create_validation_set = False
SEED = 42

set_seed(SEED)


df = _load_dataset(data_path)

final_df = df["tweet"].copy()

pipeline = ['hyperlinks', 'mentions', 'hashtags', 'retweet', 'repetitions', 'emojis', 'smileys', 'spaces']
df = _prepare_data(df, pipeline)

final_df = pd.concat([final_df, df[["text","label"]]], axis = 1)

input_ids, attention_masks, labels = _create_tensors(df, model_name)
dataloaders = _create_dataloaders(input_ids, attention_masks, labels, batch_size, create_validation_set)

In [None]:
import torch
import numpy as np
from datasets import load_metric

from helper_functions import get_device


device = get_device()
model = model.to(device)

pred_list = np.array([])

test_dataloader = dataloaders

testing_stats = []

try:
    total_test_loss = 0
    metric = load_metric("f1")

    model.eval()

    for n, batch in enumerate(test_dataloader):

        parameters = {
            "input_ids" : batch[0].to(device),
            "attention_mask" :  batch[1].to(device), 
            "labels" : batch[2].to(device)
        }
        with torch.no_grad():
            outputs = model(**parameters)
        
        logits = outputs.logits
        loss = outputs.loss
        total_test_loss += loss.item()

        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=parameters["labels"])

        if input_ids[n*32:(n+1)*32].equal(parameters["input_ids"].cpu()):
            pred_list = np.append(pred_list,predictions.cpu().numpy())

    testing_stats.append({
        "test_loss": total_test_loss/len(test_dataloader),
        "test_f1_score": metric.compute()
    })

    print(f"\nAvg test loss:  {testing_stats[0]['test_loss']}")
    print(f"F1 test score:  {testing_stats[0]['test_f1_score']}\n")

except RuntimeError as e:
    print(e)

final_df["prediction"] = pred_list
final_df["match"] = final_df.apply(lambda row: "" if row["label"] == row["prediction"] else "NO", axis=1)

# final_df.to_csv("predictions.csv",index=False)
pd.set_option('display.max_colwidth', None)
final_df[final_df["match"]=="NO"].head(40)