In [1]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModel
from pytorch_metric_learning import miners, losses
from torch.optim import AdamW
from tqdm import tqdm
from dataloader import get_MELD_dataloader
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained("SamLowe/roberta-base-go_emotions")
model = AutoModel.from_pretrained("SamLowe/roberta-base-go_emotions")
model.to(device)

train_dataloader = get_MELD_dataloader(
    "D:/MELD/MELD.Raw/MELD.Raw/train_sent_emo.csv",
    tokenizer=tokenizer,
    train = True
    )

dev_dataloader = get_MELD_dataloader(
    "D:/MELD/MELD.Raw/MELD.Raw/dev_sent_emo.csv",
    tokenizer=tokenizer,
    train = False
    )

Some weights of RobertaModel were not initialized from the model checkpoint at SamLowe/roberta-base-go_emotions and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
miner = miners.MultiSimilarityMiner()
criterion = losses.MultiSimilarityLoss(1,60,0.5)

In [5]:
def train(dataloader,loss_func):
    model.train()
    losses = []
    progress_bar = tqdm(dataloader, total=len(dataloader), position=0, leave=True)
    for batch in progress_bar:
        optimizer.zero_grad()
        
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(input_ids, attention_mask=attention_mask)
        embeddings = outputs.last_hidden_state.mean(dim=1)
        
        hard_pairs = miner(embeddings, labels)
        
        loss = loss_func(embeddings, labels, hard_pairs)
        
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        avg_loss = np.mean(losses) if losses else 0.0
        progress_bar.set_description(f"[Train] Avg Loss: {avg_loss:.4f}, Loss: {loss.item():.4f}")
        
    return np.mean(losses)

def validation(dataloader,loss_func):
    model.eval()
    losses = []
    progress_bar = tqdm(dataloader, total=len(dataloader), position=0, leave=True, desc="Evaluating")
    with torch.no_grad():
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids, attention_mask=attention_mask)
            embeddings = outputs.last_hidden_state.mean(dim=1)
            
            hard_pairs = miner(embeddings, labels)
            
            loss = loss_func(embeddings, labels, hard_pairs)
            
            losses.append(loss.item())
            avg_loss = np.mean(losses) if losses else 0.0
            progress_bar.set_description(f"[Val] Avg Loss: {avg_loss:.4f}, Loss: {loss.item():.4f}")
        
    return np.mean(losses)

In [6]:
# Training loop with validation
num_epochs = 10

best_val_loss = float('inf')

for epoch in range(num_epochs):
    train_loss = train(train_dataloader, loss_func = criterion)
    print(f'Train Loss :{train_loss}')
    
    # Validation Phase
    dev_loss = validation(dev_dataloader, loss_func = criterion)
    print(f'Validation Loss :{dev_loss}')
    # Save the model if the validation loss has improved
    if dev_loss  < best_val_loss:
        print(f"Validation loss improved from {best_val_loss} to {dev_loss}. Saving model to model_best.pth")
        best_val_loss = dev_loss 
        torch.save(model.state_dict(), "model_best.pth")

[Train] Avg Loss: 2.1227, Loss: 0.9416: 100%|██████████| 313/313 [04:10<00:00,  1.25it/s]


Train Loss :2.122682854580803


[Val] Avg Loss: 1.5484, Loss: 0.0000:   0%|          | 0/70 [00:04<?, ?it/s]


Train Loss :1.5484079403536661
Validation loss improved from inf to 1.5484079403536661. Saving model to model_best.pth


[Train] Avg Loss: 2.1066, Loss: 2.0298:  35%|███▍      | 108/313 [02:29<04:19,  1.27s/it]