In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

from transformers import AutoTokenizer

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

from datasets.Dataset import *
from models.electra_baseline import *
from train.train import *
from utils.utils import *

In [None]:
df = pd.read_csv('/data/New Augmented_Dataset.csv')
category_mapping = {category: idx for idx, category in enumerate(df['prompt'].unique())}
df['prompt_id'] = df['prompt'].map(category_mapping)
df

In [None]:
train_df = df[~np.isin(df['prompt_id'], np.arange(100))]
test_df =  df[np.isin(df['prompt_id'],  np.arange(100))]
dev_df, test_df = train_test_split(test_df, test_size=0.5, shuffle=True, random_state=42)
train_df = train_df.reset_index(drop=True)
dev_df = dev_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)
train_df.shape, dev_df.shape, test_df.shape

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
tokenizer = AutoTokenizer.from_pretrained('google/electra-small-discriminator')

In [None]:
train_dataset = CustomDataset(train_df, tokenizer)
valid_dataset = CustomDataset(dev_df, tokenizer)
test_dataset = CustomDataset(test_df, tokenizer)

In [None]:
batch_size = 16
max_len = 512
epochs = 20
learning_rate = 2e-5
experiment_name = "electra_baseline"
train_loader = DataLoader(train_dataset, batch_size=batch_size, 
                          num_workers=4, shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, 
                          num_workers=4, shuffle=False, pin_memory=True)
set_seed(42)
model = ELECTRA()
model = model.to(device)
optimizer = AdamW(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
total_steps = len(train_loader) * epochs
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=int(total_steps * 0.1))
history = train_model(model, criterion, optimizer, scheduler, train_loader, valid_loader, device, experiment_name, epochs=20)
results = pd.DataFrame(history)
results.to_csv('checkpoints/results_{}.csv'.format(experiment_name), index=False)

In [None]:
best_model = ELECTRA()
best_model.load_state_dict(torch.load('/checkpoints/electra-w-topic-regression-baseline/best_electra_model1.pth'))
best_model = best_model.to(device)
test_loader = DataLoader(test_dataset, batch_size=10, 
                          num_workers=4, shuffle=False, pin_memory=False)
criterion = nn.MSELoss()
maes, qwks, loss = evaluate_model(best_model, test_loader, criterion, device)
np.mean(maes), np.mean(qwks)