In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModel
from pytorch_metric_learning import miners, losses
from torch.optim import AdamW
from tqdm import tqdm
from dataloader import get_MELD_dataloader
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")
model = AutoModel.from_pretrained("SamLowe/roberta-base-go_emotions")
model.to(device)


train_dataloader = get_MELD_dataloader(
    "D:/MELD/MELD.Raw/MELD.Raw/train_sent_emo.csv",
    tokenizer=tokenizer,
    train = True
    )

dev_dataloader = get_MELD_dataloader(
    "D:/MELD/MELD.Raw/MELD.Raw/dev_sent_emo.csv",
    tokenizer=tokenizer,
    train = False
    )

Some weights of RobertaModel were not initialized from the model checkpoint at SamLowe/roberta-base-go_emotions and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
miner = miners.MultiSimilarityMiner()
criterion = losses.MultiSimilarityLoss(1,60,0.5)

In [5]:
def train(dataloader,loss_func):
    model.train()
    losses = []
    progress_bar = tqdm(dataloader, total=len(dataloader), position=0, leave=True)
    for batch in progress_bar:
        optimizer.zero_grad()
        
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask)
        embeddings = outputs.last_hidden_state.mean(dim=1)
        
        hard_pairs = miner(embeddings, labels)
        
        loss = loss_func(embeddings, labels, hard_pairs)
        
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        avg_loss = np.mean(losses) if losses else 0.0
        progress_bar.set_description(f"[Train] Avg Loss: {avg_loss:.4f}, Loss: {loss.item():.4f}")
        
    return np.mean(losses)

def validation(dataloader,loss_func):
    model.eval()
    losses = []
    progress_bar = tqdm(dataloader, total=len(dataloader), position=0, leave=True, desc="Evaluating")
    with torch.no_grad():
        for batch in dev_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids, attention_mask=attention_mask)
            embeddings = outputs.last_hidden_state.mean(dim=1)
            
            hard_pairs = miner(embeddings, labels)
            
            loss = loss_func(embeddings, labels, hard_pairs)
            
            losses.append(loss.item())
            avg_loss = np.mean(losses) if losses else 0.0
            progress_bar.set_description(f"[Val] Avg Loss: {avg_loss:.4f}, Loss: {loss.item():.4f}")
        
    return np.mean(losses)

In [6]:
# Training loop with validation
num_epochs = 3

best_val_loss = float('inf')

for epoch in range(num_epochs):
    train_loss = train(train_dataloader, loss_func = criterion)
    print(f'Train Loss :{train_loss}')
    
    # Validation Phase
    dev_loss = validation(dev_dataloader, loss_func = criterion)
    print(f'Train Loss :{dev_loss}')
    # Save the model if the validation loss has improved
    if dev_loss  < best_val_loss:
        print(f"Validation loss improved from {best_val_loss} to {dev_loss}. Saving model to model_best.pth")
        best_val_loss = dev_loss 
        torch.save(model.state_dict(), "model_best.pth")

[Train] Avg Loss: 1.5170, Loss: 0.5743: 100%|██████████| 625/625 [02:30<00:00,  4.15it/s]


Train Loss :1.51699901638031


[Val] Avg Loss: 1.5471, Loss: 0.0000:   0%|          | 0/70 [00:05<?, ?it/s]


Train Loss :1.547099541766303
Validation loss improved from inf to 1.547099541766303. Saving model to model_best.pth


[Train] Avg Loss: 1.4921, Loss: 0.6548: 100%|██████████| 625/625 [02:33<00:00,  4.08it/s]


Train Loss :1.4920808530807494


[Val] Avg Loss: 1.5442, Loss: 0.0000:   0%|          | 0/70 [00:06<?, ?it/s]


Train Loss :1.5441982090473174
Validation loss improved from 1.547099541766303 to 1.5441982090473174. Saving model to model_best.pth


[Train] Avg Loss: 1.5675, Loss: 1.5675:   0%|          | 1/625 [00:00<06:00,  1.73it/s]