In [None]:
# Stage 3: Multi-Task RoBERTa Model Training
# ============================================

# Install transformers and training libraries
!pip install transformers datasets torch scikit-learn accelerate evaluate

In [3]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    RobertaTokenizer, 
    RobertaModel, 
    get_linear_schedule_with_warmup
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
  from .autonotebook import tqdm as notebook_tqdm  

In [4]:
from torch.optim import AdamW # New/Standard way


In [5]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"üñ•Ô∏è  Using device: {device}")

üñ•Ô∏è  Using device: cpu


In [6]:
# ===========================
# LOAD PROCESSED DATA
# ===========================
print("\n" + "="*60)
print("LOADING PROCESSED DATA")
print("="*60)

sentiment_df = pd.read_csv('data/processed/sentiment_processed.csv')
intent_df = pd.read_csv('data/processed/intent_processed.csv')

print(f"‚úÖ Loaded {len(sentiment_df)} sentiment samples")
print(f"‚úÖ Loaded {len(intent_df)} intent samples")



LOADING PROCESSED DATA
‚úÖ Loaded 10000 sentiment samples
‚úÖ Loaded 138 intent samples


In [7]:
# ===========================
# PREPARE LABELS
# ===========================
print("\n" + "="*60)
print("ENCODING LABELS")
print("="*60)

# Encode sentiment labels
sentiment_encoder = LabelEncoder()
sentiment_df['sentiment_encoded'] = sentiment_encoder.fit_transform(sentiment_df['sentiment'])
sentiment_classes = sentiment_encoder.classes_
print(f"Sentiment classes: {sentiment_classes}")



ENCODING LABELS
Sentiment classes: ['negative' 'positive']


In [8]:
# Encode intent labels
intent_encoder = LabelEncoder()
intent_df['intent_encoded'] = intent_encoder.fit_transform(intent_df['intent'])
intent_classes = intent_encoder.classes_
print(f"Intent classes ({len(intent_classes)}): {intent_classes[:10]}...")  # Show first 10

# Save encoders
encoders = {
    'sentiment_classes': sentiment_classes.tolist(),
    'intent_classes': intent_classes.tolist()
}
with open('models/label_encoders.json', 'w') as f:
    json.dump(encoders, f, indent=2)

Intent classes (22): ['Clever' 'CourtesyGoodBye' 'CourtesyGreeting' 'CourtesyGreetingResponse'
 'CurrentHumanQuery' 'GoodBye' 'Gossip' 'Greeting' 'GreetingResponse'
 'Jokes']...


In [9]:
# ===========================
# TRAIN/VAL/TEST SPLIT
# ===========================
print("\n" + "="*60)
print("SPLITTING DATA")
print("="*60)

# CRITICAL: Shrink sentiment data to 1000 rows so CPU training finishes today
sentiment_df = sentiment_df.sample(n=1000, random_state=42)
# Split sentiment data (70% train, 15% val, 15% test)
sent_train, sent_temp = train_test_split(sentiment_df, test_size=0.3, random_state=42, stratify=sentiment_df['sentiment_encoded'])
sent_val, sent_test = train_test_split(sent_temp, test_size=0.5, random_state=42, stratify=sent_temp['sentiment_encoded'])

print(f"Sentiment - Train: {len(sent_train)}, Val: {len(sent_val)}, Test: {len(sent_test)}")

# Split intent data
intent_train, intent_temp = train_test_split(
    intent_df, 
    test_size=0.3, 
    random_state=42, 
    stratify=intent_df['intent_encoded']
)

# Second split: Split temp (30%) into Val (15%) and Test (15%)
# REMOVE stratify here to avoid the ValueError
intent_val, intent_test = train_test_split(
    intent_temp, 
    test_size=0.5, 
    random_state=42
)
print(f"‚úÖ Intent Split Success!")
print(f"Train: {len(intent_train)}, Val: {len(intent_val)}, Test: {len(intent_test)}")


SPLITTING DATA
Sentiment - Train: 700, Val: 150, Test: 150
‚úÖ Intent Split Success!
Train: 96, Val: 21, Test: 21


In [10]:
# ===========================
# CUSTOM DATASET CLASS
# ===========================

class MultiTaskDataset(Dataset):
    """Dataset for multi-task learning"""
    
    def __init__(self, sentiment_data, intent_data, tokenizer, max_length=64):
        self.tokenizer = tokenizer
        self.max_length = max_length
        
        # Combine both datasets
        self.samples = []
        
        # Add sentiment samples
        for _, row in sentiment_data.iterrows():
            self.samples.append({
                'text': row['text_cleaned'],
                'sentiment_label': row['sentiment_encoded'],
                'intent_label': -1,  # No intent label
                'task': 'sentiment'
            })
        
        # Add intent samples
        for _, row in intent_data.iterrows():
            self.samples.append({
                'text': row['text_cleaned'],
                'sentiment_label': -1,  # No sentiment label
                'intent_label': row['intent_encoded'],
                'task': 'intent'
            })
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        sample = self.samples[idx]
        
        # Tokenize
        encoding = self.tokenizer(
            sample['text'],
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'sentiment_label': torch.tensor(sample['sentiment_label'], dtype=torch.long),
            'intent_label': torch.tensor(sample['intent_label'], dtype=torch.long),
            'task': sample['task']
        }


In [11]:
# ===========================
# MULTI-TASK MODEL ARCHITECTURE
# ===========================

class MultiTaskRoBERTa(nn.Module):
    """
    Multi-task RoBERTa model with two classification heads:
    - Sentiment classification (2 classes: positive/negative)
    - Intent classification (N classes)
    """
    
    def __init__(self, num_sentiment_classes, num_intent_classes, dropout=0.3):
        super(MultiTaskRoBERTa, self).__init__()
        
        # Shared RoBERTa base
        self.roberta = RobertaModel.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(dropout)
        
        # Sentiment classification head
        self.sentiment_classifier = nn.Linear(self.roberta.config.hidden_size, num_sentiment_classes)
        
        # Intent classification head
        self.intent_classifier = nn.Linear(self.roberta.config.hidden_size, num_intent_classes)
    
    def forward(self, input_ids, attention_mask):
        # Get RoBERTa outputs
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        
        # Use [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0, :]  # [batch_size, hidden_size]
        pooled_output = self.dropout(pooled_output)
        
        # Get logits from both heads
        sentiment_logits = self.sentiment_classifier(pooled_output)
        intent_logits = self.intent_classifier(pooled_output)
        
        return sentiment_logits, intent_logits


In [12]:
import os

In [13]:
# ===========================
# INITIALIZE MODEL & TOKENIZER
# ===========================
print("\n" + "="*60)
print("INITIALIZING MODEL")
print("="*60)

model_name = 'roberta-base'

# Set a much longer timeout for slow connections
os.environ['HTTpx_TIMEOUT'] = '600' 

print("‚è≥ Attempting to download/load RoBERTa (this may take a few mins)...")

try:
    # 1. Try loading normally with a forced download if needed
    tokenizer = RobertaTokenizer.from_pretrained(model_name, resume_download=True)
    base_model = RobertaModel.from_pretrained(model_name, resume_download=True)
    print("‚úÖ Successfully downloaded from Hugging Face!")
except Exception as e:
    print(f"‚ö†Ô∏è Connection still failing: {e}")
    print("üîÑ Checking if a local copy exists...")
    # 2. Fallback: try loading only from local cache
    tokenizer = RobertaTokenizer.from_pretrained(model_name, local_files_only=True)
    base_model = RobertaModel.from_pretrained(model_name, local_files_only=True)
    print("‚úÖ Found local cache!")


INITIALIZING MODEL
‚è≥ Attempting to download/load RoBERTa (this may take a few mins)...




‚ö†Ô∏è Connection still failing: RobertaModel.__init__() got an unexpected keyword argument 'resume_download'
üîÑ Checking if a local copy exists...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà| 197/197 [00:01<00:00, 159.17it/s, Materializing param=encoder.layer.11.output.dense.weight]
RobertaModel LOAD REPORT from: roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
pooler.dense.bias               | MISSING    | 
pooler.dense.weight             | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


‚úÖ Found local cache!


In [14]:
# ===========================
# CREATE DATALOADERS
# ===========================
print("\n" + "="*60)
print("CREATING DATALOADERS")
print("="*60)

train_dataset = MultiTaskDataset(sent_train, intent_train, tokenizer)
val_dataset = MultiTaskDataset(sent_val, intent_val, tokenizer)
test_dataset = MultiTaskDataset(sent_test, intent_test, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

print(f"‚úÖ Train batches: {len(train_loader)}")
print(f"‚úÖ Val batches: {len(val_loader)}")
print(f"‚úÖ Test batches: {len(test_loader)}")

# ===========================


CREATING DATALOADERS
‚úÖ Train batches: 50
‚úÖ Val batches: 11
‚úÖ Test batches: 11


In [15]:
# ===========================
# TRAINING SETUP
# ===========================
# ============================================================
# TRAINING SETUP & CONFIGURATION
# ============================================================
print("\n" + "="*60)
print("STAGE 3: INITIALIZING TRAINING")
print("="*60)

# 1. Configuration
EPOCHS = 3 
LEARNING_RATE = 2e-5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Build the Multi-Task Model
# We use 'base_model' which was the RoBERTa we loaded from cache earlier
model = MultiTaskRoBERTa(
    num_sentiment_classes=len(sentiment_classes),
    num_intent_classes=len(intent_classes)
)
model.roberta = base_model  # Inject the pre-trained weights
model.to(device)

# 3. Loss Functions
sentiment_criterion = nn.CrossEntropyLoss()
intent_criterion = nn.CrossEntropyLoss(ignore_index=-1) # Skip the 'empty' labels

# 4. Optimizer (Imported from torch.optim to avoid library version errors)
from torch.optim import AdamW
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)

# 5. Scheduler (Crucial for Transformers)
from transformers import get_linear_schedule_with_warmup
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),
    num_training_steps=total_steps
)

print(f"Epochs: {EPOCHS}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Total training steps: {total_steps}")



STAGE 3: INITIALIZING TRAINING


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà| 197/197 [00:01<00:00, 167.85it/s, Materializing param=encoder.layer.11.output.dense.weight]
RobertaModel LOAD REPORT from: roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
pooler.dense.bias               | MISSING    | 
pooler.dense.weight             | MISSING    | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.


Epochs: 3
Learning rate: 2e-05
Total training steps: 150


In [16]:
# ===========================
# CORRECTED TRAINING & EVAL FUNCTIONS
# ===========================
def train_epoch(model, dataloader, optimizer, scheduler):
    model.train()
    total_loss, s_correct, s_total, i_correct, i_total = 0, 0, 0, 0, 0
    
    for batch in tqdm(dataloader, desc="Training"):
        input_ids, mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
        s_labels, i_labels = batch['sentiment_label'].to(device), batch['intent_label'].to(device)
        
        optimizer.zero_grad()
        s_logits, i_logits = model(input_ids, mask)
        
        # MASKING: Only calculate loss for valid labels (!= -1)
        sent_mask, int_mask = s_labels != -1, i_labels != -1
        loss = 0
        if sent_mask.any():
            loss += sentiment_criterion(s_logits[sent_mask], s_labels[sent_mask])
            s_correct += (torch.argmax(s_logits[sent_mask], dim=1) == s_labels[sent_mask]).sum().item()
            s_total += sent_mask.sum().item()
        if int_mask.any():
            loss += intent_criterion(i_logits[int_mask], i_labels[int_mask])
            i_correct += (torch.argmax(i_logits[int_mask], dim=1) == i_labels[int_mask]).sum().item()
            i_total += int_mask.sum().item()
        
        if isinstance(loss, torch.Tensor):
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
            
    return total_loss/len(dataloader), s_correct/s_total, i_correct/i_total

def evaluate(model, dataloader):
    model.eval()
    total_loss, s_correct, s_total, i_correct, i_total = 0, 0, 0, 0, 0
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids, mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
            s_labels, i_labels = batch['sentiment_label'].to(device), batch['intent_label'].to(device)
            s_logits, i_logits = model(input_ids, mask)
            
            sent_mask, int_mask = s_labels != -1, i_labels != -1
            loss = 0
            if sent_mask.any():
                loss += sentiment_criterion(s_logits[sent_mask], s_labels[sent_mask])
                s_correct += (torch.argmax(s_logits[sent_mask], dim=1) == s_labels[sent_mask]).sum().item()
                s_total += sent_mask.sum().item()
            if int_mask.any():
                loss += intent_criterion(i_logits[int_mask], i_labels[int_mask])
                i_correct += (torch.argmax(i_logits[int_mask], dim=1) == i_labels[int_mask]).sum().item()
                i_total += int_mask.sum().item()
            total_loss += loss.item() if isinstance(loss, torch.Tensor) else 0
            
    return total_loss/len(dataloader), s_correct/(s_total or 1), i_correct/(i_total or 1)

In [17]:

# Training history
history = {
    'train_loss': [],
    'val_loss': [],
    'train_sentiment_acc': [],
    'val_sentiment_acc': [],
    'train_intent_acc': [],
    'val_intent_acc': []
}

In [18]:
history

{'train_loss': [],
 'val_loss': [],
 'train_sentiment_acc': [],
 'val_sentiment_acc': [],
 'train_intent_acc': [],
 'val_intent_acc': []}

In [19]:
# Re-declaring variables to ensure they are in memory
EPOCHS = 3 
LEARNING_RATE = 2e-5

# Ensure the history dictionary exists
if 'history' not in locals():
    history = {
        'train_loss': [], 'val_loss': [],
        'train_sentiment_acc': [], 'val_sentiment_acc': [],
        'train_intent_acc': [], 'val_intent_acc': []
    }

print(f"‚úÖ Configuration re-loaded. Ready to train for {EPOCHS} epochs.")

‚úÖ Configuration re-loaded. Ready to train for 3 epochs.


In [20]:
# Train the model
for epoch in range(EPOCHS):
    print(f"\nüìä Epoch {epoch+1}/{EPOCHS}")
    
    train_loss, train_sent_acc, train_intent_acc = train_epoch(model, train_loader, optimizer, scheduler)
    val_loss, val_sent_acc, val_intent_acc = evaluate(model, val_loader)
    
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_sentiment_acc'].append(train_sent_acc)
    history['val_sentiment_acc'].append(val_sent_acc)
    history['train_intent_acc'].append(train_intent_acc)
    history['val_intent_acc'].append(val_intent_acc)
    
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
    print(f"Train Sentiment Acc: {train_sent_acc:.4f} | Val Sentiment Acc: {val_sent_acc:.4f}")
    print(f"Train Intent Acc: {train_intent_acc:.4f} | Val Intent Acc: {val_intent_acc:.4f}")



üìä Epoch 1/3


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [13:38<00:00, 16.37s/it]
Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:45<00:00,  4.17s/it]


Train Loss: 3.1983 | Val Loss: 1.0917
Train Sentiment Acc: 0.5514 | Val Sentiment Acc: 0.6600
Train Intent Acc: 0.0833 | Val Intent Acc: 0.1429

üìä Epoch 2/3


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [12:21<00:00, 14.82s/it]
Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:37<00:00,  3.40s/it]


Train Loss: 3.1687 | Val Loss: 0.8566
Train Sentiment Acc: 0.8300 | Val Sentiment Acc: 0.8333
Train Intent Acc: 0.1146 | Val Intent Acc: 0.2857

üìä Epoch 3/3


Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [11:32<00:00, 13.84s/it]
Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:41<00:00,  3.79s/it]

Train Loss: 2.5846 | Val Loss: 0.7639
Train Sentiment Acc: 0.9071 | Val Sentiment Acc: 0.8400
Train Intent Acc: 0.3750 | Val Intent Acc: 0.5238





In [21]:
#===========================
# FINAL EVALUATION ON TEST SET
# ===========================
print("\n" + "="*60)
print("FINAL EVALUATION ON TEST SET")
print("="*60)

test_loss, test_sent_acc, test_intent_acc = evaluate(model, test_loader)
print(f"\nüéØ Test Results:")
print(f"Loss: {test_loss:.4f}")
print(f"Sentiment Accuracy: {test_sent_acc:.4f}")
print(f"Intent Accuracy: {test_intent_acc:.4f}")



FINAL EVALUATION ON TEST SET


Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11/11 [00:33<00:00,  3.07s/it]


üéØ Test Results:
Loss: 0.7777
Sentiment Accuracy: 0.8467
Intent Accuracy: 0.3810





In [23]:
# ===========================
# SAVE MODEL (FINAL VERSION)
# ===========================
print("\n" + "="*60)
print("SAVING MODEL")
print("="*60)

import os
import json

# This path includes the subfolder
model_save_path = 'models/multitask_roberta'

# This ensures the specific subfolder exists
os.makedirs(model_save_path, exist_ok=True)

# 1. Save weights and metadata
torch.save({
    'model_state_dict': model.state_dict(),
    'sentiment_classes': sentiment_classes.tolist(),
    'intent_classes': intent_classes.tolist(),
    'history': history
}, f'{model_save_path}/model.pth')

# 2. Save tokenizer (crucial for Streamlit)
tokenizer.save_pretrained(model_save_path)

# 3. Save classes as JSON (makes Streamlit loading much faster)
classes_data = {
    'sentiment_classes': sentiment_classes.tolist(),
    'intent_classes': intent_classes.tolist()
}
with open(f'{model_save_path}/classes.json', 'w') as f:
    json.dump(classes_data, f)

print(f"‚úÖ Successfully saved to {model_save_path}")


SAVING MODEL
‚úÖ Successfully saved to models/multitask_roberta
