# File for doing our axis classification

In [None]:
# imports

import json
import os
import glob
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from torch import nn
from transformers import RobertaTokenizer, RobertaModel, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from collections import Counter
from datetime import datetime

In [None]:
# Directory containing speech files
data_dir = '.'

# Get list of files in directory, filtering out directories
files = [f for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f))]

# Initialize lists to store data
speech_counts = []
emotional_intensity = []
political_spectrum = []

# Iterate over files
for file in files:
    with open(file, 'r') as f:
        data = json.load(f)

    # Get speech count
    speech_counts.append(len(data))

    # Extract ratings
    emotional_intensity.extend([value['emotional_intensity'] for value in data.values()])
    political_spectrum.extend([value['political_spectrum'] for value in data.values()])

data = {
    'File': files,
    'Speech Count': speech_counts
}

# Display total distribution of ratings
total_distribution = {
    'Emotional Intensity': pd.Series(emotional_intensity).value_counts(normalize=True),
    'Political Spectrum': pd.Series(political_spectrum).value_counts(normalize=True)
}

print('Total Distribution of Ratings:')
print(pd.DataFrame(total_distribution))

# make sure we sort by file name so we get files in increasing order (they contain numbers)
print('\nTotal number of speeches by File:')
df = pd.DataFrame(data).sort_values('File')
print(df)

# get the total number of speeches across all files
total_speeches = df['Speech Count'].sum()
print(f'\nTotal number of speeches: {total_speeches}')

# now, get the distribution of ratings by file
for file in files:
    with open(os.path.join(data_dir, file), 'r') as f:
        data = json.load(f)

    emotional_intensity = [value['emotional_intensity'] for value in data.values()]
    political_spectrum = [value['political_spectrum'] for value in data.values()]

    distribution = {
        'Emotional Intensity': pd.Series(emotional_intensity).value_counts(normalize=True),
        'Political Spectrum': pd.Series(political_spectrum).value_counts(normalize=True)
    }

    print(f'\nDistribution of Ratings for {file}:')
    print(pd.DataFrame(distribution))

In [None]:
def load_all_data(file_pattern='speeches_*_gpt_axis_labels.json'):
    """
    Load and combine all data files

    Args:
        file_pattern (str): File pattern to match

    Returns:
        dict: Combined data
    """
    all_data = {}
    for file_path in glob.glob(file_pattern):
        with open(file_path, 'r') as f:
            data = json.load(f)
            all_data.update(data)
    return all_data

def calculate_class_weights(data):
    """
    Calculate class weights for both tasks
    
    Args:
        data (dict): Data dictionary

    Returns:
        dict: Emotional intensity and political spectrum weights
    """
    emotional_counts = Counter(item['emotional_intensity'] for item in data.values())
    political_counts = Counter(item['political_spectrum'] for item in data.values())

    total = len(data)

    emotional_weights = {
        label: total / (count * 5)  # 5 = number of classes
        for label, count in emotional_counts.items()
    }

    political_weights = {
        label: total / (count * 5)
        for label, count in political_counts.items()
    }

    return emotional_weights, political_weights

def create_balanced_sampler(data):
    """
    Create a weighted sampler to balance the dataset
    
    Args:
        data (dict): Data dictionary

    Returns:
        WeightedRandomSampler: Weight
    """
    # Get counts for both tasks
    emotional_labels = [item['emotional_intensity'] for item in data.values()]
    political_labels = [item['political_spectrum'] for item in data.values()]

    emotional_counts = Counter(emotional_labels)
    political_counts = Counter(political_labels)

    # Calculate weights for each sample
    weights = []
    for id in data.keys():
        e_label = data[id]['emotional_intensity']
        p_label = data[id]['political_spectrum']

        # Combined weight is product of individual weights
        e_weight = 1 / emotional_counts[e_label]
        p_weight = 1 / political_counts[p_label]
        weight = np.sqrt(e_weight * p_weight)  # mean of weights
        weights.append(weight)

    return WeightedRandomSampler(
        weights=weights,
        num_samples=len(weights),
        replacement=True
    )

class SpeechDataset(Dataset):
    def __init__(self, speeches, tokenizer=None, max_length=512):
        self.speeches = speeches
        self.speech_ids = list(speeches.keys())
        self.tokenizer = tokenizer or RobertaTokenizer.from_pretrained('roberta-base')
        self.max_length = max_length

    def __len__(self):
        return len(self.speech_ids)

    def __getitem__(self, idx):
        speech_id = self.speech_ids[idx]
        speech_data = self.speeches[speech_id]

        encoding = self.tokenizer(
            speech_data['speech'],
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        item = {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'emotional_intensity': torch.tensor(speech_data['emotional_intensity'] - 1),
            'political_spectrum': torch.tensor(speech_data['political_spectrum'] - 1),
            'speech_id': speech_id
        }

        return item

def plot_class_distributions(data, save_path='distribution_plots.png'):
    """
    Plot distribution of classes for both tasks
    
    Args:
        data (dict): Data dictionary
        save_path (str): Path to save the plot

    Returns:
        None
    """
    emotional_labels = [item['emotional_intensity'] for item in data.values()]
    political_labels = [item['political_spectrum'] for item in data.values()]

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Emotional Intensity Distribution
    sns.countplot(x=emotional_labels, ax=ax1)
    ax1.set_title('Emotional Intensity Distribution')
    ax1.set_xlabel('Intensity Level')
    ax1.set_ylabel('Count')

    # Political Spectrum Distribution
    sns.countplot(x=political_labels, ax=ax2)
    ax2.set_title('Political Spectrum Distribution')
    ax2.set_xlabel('Political Position')
    ax2.set_ylabel('Count')

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def plot_confusion_matrices(true_emotional, pred_emotional, true_political, pred_political, save_path='confusion_matrices.png'):
    """
    Plot confusion matrices for both tasks
    
    Args:
        true_emotional (list): True emotional intensity labels
        pred_emotional (list): Predicted emotional intensity labels
        true_political (list): True political spectrum labels
        pred_political (list): Predicted political spectrum labels
        save_path (str): Path to save the plot

    Returns:
        None
    """
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    # Emotional Intensity Confusion Matrix
    cm_emotional = confusion_matrix(true_emotional, pred_emotional)
    sns.heatmap(cm_emotional, annot=True, fmt='d', ax=ax1)
    ax1.set_title('Emotional Intensity Confusion Matrix')
    ax1.set_xlabel('Predicted')
    ax1.set_ylabel('True')

    # Political Spectrum Confusion Matrix
    cm_political = confusion_matrix(true_political, pred_political)
    sns.heatmap(cm_political, annot=True, fmt='d', ax=ax2)
    ax2.set_title('Political Spectrum Confusion Matrix')
    ax2.set_xlabel('Predicted')
    ax2.set_ylabel('True')

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

In [None]:
class PoliticalSpeechClassifier(nn.Module):
    def __init__(self, num_classes=5, dropout_rate=0.2):
        super().__init__()
        self.roberta = RobertaModel.from_pretrained('roberta-base')

        # Unfreeze more layers since we have more data
        for param in self.roberta.encoder.layer[-8:].parameters():
            param.requires_grad = True

        hidden_size = self.roberta.config.hidden_size

        # Shared features layer
        self.shared_features = nn.Sequential(
            nn.Linear(hidden_size, 1024),
            nn.LayerNorm(1024),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(1024, 512),
            nn.LayerNorm(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )

        # Task-specific layers
        self.emotional_classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)
        )

        self.political_classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.LayerNorm(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)

        # Use mean pooling instead of just [CLS] token
        token_embeddings = outputs.last_hidden_state
        attention_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * attention_expanded, 1)
        sum_mask = torch.clamp(attention_expanded.sum(1), min=1e-9)
        pooled_output = sum_embeddings / sum_mask

        # Get shared features
        shared_features = self.shared_features(pooled_output)

        # Get task-specific predictions
        emotional_logits = self.emotional_classifier(shared_features)
        political_logits = self.political_classifier(shared_features)

        return emotional_logits, political_logits

class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        ce_loss = nn.functional.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return focal_loss.mean()

class MetricTracker:
    def __init__(self):
        self.reset()

    def reset(self):
        self.train_losses = []
        self.val_losses = []
        self.emotional_accuracies = []
        self.political_accuracies = []
        self.best_val_loss = float('inf')
        self.best_emotional_acc = 0
        self.best_political_acc = 0

    def update(self, train_loss, val_loss, emotional_acc, political_acc):
        self.train_losses.append(train_loss)
        self.val_losses.append(val_loss)
        self.emotional_accuracies.append(emotional_acc)
        self.political_accuracies.append(political_acc)

        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
        if emotional_acc > self.best_emotional_acc:
            self.best_emotional_acc = emotional_acc
        if political_acc > self.best_political_acc:
            self.best_political_acc = political_acc

def train_model(model, train_loader, val_loader, num_epochs=10, device='cuda'):
    model = model.to(device)

    # Initialize focal loss for both
    emotional_criterion = FocalLoss(gamma=2)
    political_criterion = FocalLoss(gamma=2)

    # Optimizer with different learning rates
    optimizer_grouped_parameters = [
        {
            'params': [p for n, p in model.named_parameters() if 'roberta' in n],
            'lr': 1e-5
        },
        {
            'params': [p for n, p in model.named_parameters() if 'roberta' not in n],
            'lr': 3e-4
        }
    ]

    optimizer = torch.optim.AdamW(optimizer_grouped_parameters, weight_decay=0.01)

    # Learning rate scheduler with warmup
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 10
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )

    metric_tracker = MetricTracker()
    best_model_state = {
        'model_state_dict': model.state_dict(),
        'epoch': 0,
        'val_loss': float('inf'),
        'emotional_accuracy': 0,
        'political_accuracy': 0,
        'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S")
    }

    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        train_emotional_correct = 0
        train_political_correct = 0
        train_total = 0

        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')

        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            emotional_labels = batch['emotional_intensity'].to(device)
            political_labels = batch['political_spectrum'].to(device)

            optimizer.zero_grad()
            emotional_logits, political_logits = model(input_ids, attention_mask)

            # Calculate losses
            emotional_loss = emotional_criterion(emotional_logits, emotional_labels)
            political_loss = political_criterion(political_logits, political_labels)
            total_loss = emotional_loss + political_loss

            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()

            # Update metrics
            total_train_loss += total_loss.item()

            _, emotional_preds = torch.max(emotional_logits, 1)
            _, political_preds = torch.max(political_logits, 1)

            train_emotional_correct += (emotional_preds == emotional_labels).sum().item()
            train_political_correct += (political_preds == political_labels).sum().item()
            train_total += emotional_labels.size(0)

            # Update progress bar
            progress_bar.set_postfix({
                'loss': total_loss.item(),
                'e_acc': 100 * train_emotional_correct / train_total,
                'p_acc': 100 * train_political_correct / train_total
            })

        # Validation phase
        model.eval()
        total_val_loss = 0
        val_emotional_correct = 0
        val_political_correct = 0
        val_total = 0

        val_emotional_preds = []
        val_emotional_true = []
        val_political_preds = []
        val_political_true = []

        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                emotional_labels = batch['emotional_intensity'].to(device)
                political_labels = batch['political_spectrum'].to(device)

                emotional_logits, political_logits = model(input_ids, attention_mask)

                emotional_loss = emotional_criterion(emotional_logits, emotional_labels)
                political_loss = political_criterion(political_logits, political_labels)
                val_loss = emotional_loss + political_loss

                total_val_loss += val_loss.item()

                _, emotional_preds = torch.max(emotional_logits, 1)
                _, political_preds = torch.max(political_logits, 1)

                val_emotional_correct += (emotional_preds == emotional_labels).sum().item()
                val_political_correct += (political_preds == political_labels).sum().item()
                val_total += emotional_labels.size(0)

                # Store predictions and true labels for confusion matrix
                val_emotional_preds.extend(emotional_preds.cpu().numpy())
                val_emotional_true.extend(emotional_labels.cpu().numpy())
                val_political_preds.extend(political_preds.cpu().numpy())
                val_political_true.extend(political_labels.cpu().numpy())

        # Calculate epoch metrics
        avg_train_loss = total_train_loss / len(train_loader)
        avg_val_loss = total_val_loss / len(val_loader)
        emotional_accuracy = 100 * val_emotional_correct / val_total
        political_accuracy = 100 * val_political_correct / val_total

        # Update metric tracker
        metric_tracker.update(avg_train_loss, avg_val_loss, emotional_accuracy, political_accuracy)

        # Save best model
        if avg_val_loss < metric_tracker.best_val_loss:
            best_model_state = {
                'model_state_dict': model.state_dict(),
                'epoch': epoch,
                'val_loss': avg_val_loss,
                'emotional_accuracy': emotional_accuracy,
                'political_accuracy': political_accuracy,
                'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S")
            }

        # Save checkpoint every 3 epochs and at the end
        if (epoch + 1) % 3 == 0 or epoch == num_epochs - 1:
            checkpoint_dir = save_checkpoint(
                best_model_state,
                metric_tracker,
                epoch + 1
            )
            print(f"\nCheckpoint saved at: {checkpoint_dir}")

        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'Average Train Loss: {avg_train_loss:.4f}')
        print(f'Average Val Loss: {avg_val_loss:.4f}')
        print(f'Emotional Accuracy: {emotional_accuracy:.2f}%')
        print(f'Political Accuracy: {political_accuracy:.2f}%')

        # Plot confusion matrices for this epoch
        plot_confusion_matrices(
            val_emotional_true, val_emotional_preds,
            val_political_true, val_political_preds,
            save_path=f'confusion_matrices_epoch_{epoch+1}.png'
        )

    return best_model_state, metric_tracker

In [None]:
def save_checkpoint(model_state, metric_tracker, epoch, base_path='model_checkpoints'):
    """
    Save model checkpoint and metrics
    
    Args:
        model_state (dict): Model state dictionary
        metric_tracker (MetricTracker): Metric tracker object
        epoch (int): Current epoch
        base_path (str): Base path to save the checkpoint

    Returns:
        str: Path to saved checkpoint
    """
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    save_dir = f"{base_path}_epoch{epoch}_{timestamp}"
    os.makedirs(save_dir, exist_ok=True)

    # Save model state
    torch.save(model_state, f"{save_dir}/model.pt")

    # Save metrics history
    metrics_dict = {
        'train_losses': metric_tracker.train_losses,
        'val_losses': metric_tracker.val_losses,
        'emotional_accuracies': metric_tracker.emotional_accuracies,
        'political_accuracies': metric_tracker.political_accuracies,
        'best_val_loss': metric_tracker.best_val_loss,
        'best_emotional_acc': metric_tracker.best_emotional_acc,
        'best_political_acc': metric_tracker.best_political_acc
    }

    with open(f"{save_dir}/metrics.json", 'w') as f:
        json.dump(metrics_dict, f, indent=4)

    return save_dir

Using device: cpu


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/10: 100%|██████████| 5/5 [02:18<00:00, 27.76s/it, loss=3.24]


Validation Loss: 3.1975
Emotional Accuracy: 15.00%
Political Accuracy: 55.00%


Epoch 2/10: 100%|██████████| 5/5 [02:09<00:00, 25.85s/it, loss=3.2] 


Validation Loss: 3.1929
Emotional Accuracy: 15.00%
Political Accuracy: 55.00%


Epoch 3/10: 100%|██████████| 5/5 [02:13<00:00, 26.69s/it, loss=3.19]


Validation Loss: 3.1831
Emotional Accuracy: 15.00%
Political Accuracy: 55.00%


Epoch 4/10: 100%|██████████| 5/5 [02:16<00:00, 27.39s/it, loss=3.17]


Validation Loss: 3.1669
Emotional Accuracy: 15.00%
Political Accuracy: 55.00%


Epoch 5/10: 100%|██████████| 5/5 [02:13<00:00, 26.65s/it, loss=3.17]


Validation Loss: 3.1351
Emotional Accuracy: 30.00%
Political Accuracy: 55.00%


Epoch 6/10: 100%|██████████| 5/5 [02:05<00:00, 25.07s/it, loss=3.07]


Validation Loss: 3.0512
Emotional Accuracy: 30.00%
Political Accuracy: 55.00%


Epoch 7/10: 100%|██████████| 5/5 [02:09<00:00, 26.00s/it, loss=2.94]


Validation Loss: 2.9830
Emotional Accuracy: 30.00%
Political Accuracy: 55.00%


Epoch 8/10: 100%|██████████| 5/5 [02:05<00:00, 25.10s/it, loss=2.68]


Validation Loss: 2.9271
Emotional Accuracy: 30.00%
Political Accuracy: 55.00%


Epoch 9/10: 100%|██████████| 5/5 [02:12<00:00, 26.54s/it, loss=2.88]


Validation Loss: 2.8738
Emotional Accuracy: 30.00%
Political Accuracy: 55.00%


Epoch 10/10: 100%|██████████| 5/5 [02:05<00:00, 25.14s/it, loss=2.63]


Validation Loss: 2.8214
Emotional Accuracy: 30.00%
Political Accuracy: 55.00%


Generating predictions: 100%|██████████| 6/6 [00:17<00:00,  2.89s/it]


In [None]:
def plot_training_history(metric_tracker, save_path='training_history.png'):
    """
    Plot training and validation metrics over time
    
    Args:
        metric_tracker (MetricTracker): Metric tracker object
        save_path (str): Path to save the plot

    Returns:
        None
    """
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))

    # Plot losses
    epochs = range(1, len(metric_tracker.train_losses) + 1)
    ax1.plot(epochs, metric_tracker.train_losses, 'b-', label='Training Loss')
    ax1.plot(epochs, metric_tracker.val_losses, 'r-', label='Validation Loss')
    ax1.set_title('Training and Validation Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.grid(True)

    # Plot accuracies
    ax2.plot(epochs, metric_tracker.emotional_accuracies, 'g-', label='Emotional Accuracy')
    ax2.plot(epochs, metric_tracker.political_accuracies, 'p-', label='Political Accuracy')
    ax2.set_title('Validation Accuracies')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def save_model_artifacts(model_state, metric_tracker, base_path='model_artifacts'):
    """
    Save model and related artifacts
    
    Args:
        model_state (dict): Model state dictionary
        metric_tracker (MetricTracker): Metric tracker object
        base_path (str): Base path to save the artifacts
    
    Returns:
        str: Path to saved model artifacts
    """
    timestamp = model_state['timestamp']
    save_dir = f"{base_path}_{timestamp}"
    os.makedirs(save_dir, exist_ok=True)

    # Save model state
    torch.save(model_state, f"{save_dir}/model.pt")

    # Save metrics history
    metrics_dict = {
        'train_losses': metric_tracker.train_losses,
        'val_losses': metric_tracker.val_losses,
        'emotional_accuracies': metric_tracker.emotional_accuracies,
        'political_accuracies': metric_tracker.political_accuracies,
        'best_val_loss': metric_tracker.best_val_loss,
        'best_emotional_acc': metric_tracker.best_emotional_acc,
        'best_political_acc': metric_tracker.best_political_acc
    }

    with open(f"{save_dir}/metrics.json", 'w') as f:
        json.dump(metrics_dict, f, indent=4)

    return save_dir

def load_model_for_inference(model_path):
    """
    Load a trained model for inference
    
    Args:
        model_path (str): Path to saved model

    Returns:
        PoliticalSpeechClassifier: Loaded model
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_state = torch.load(model_path, map_location=device)

    model = PoliticalSpeechClassifier()
    model.load_state_dict(model_state['model_state_dict'])
    model.to(device)
    model.eval()

    return model

def predict_speech(model, tokenizer, speech_text):
    """
    Make predictions for a single speech
    
    Args:
        model (PoliticalSpeechClassifier): Trained model
        tokenizer (RobertaTokenizer): Tokenizer
        speech_text (str): Speech text

    Returns:
        dict: Predicted emotional intensity and political spectrum of the speech (1-5) along with confidence scores
    """
    device = next(model.parameters()).device

    encoding = tokenizer(
        speech_text,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        emotional_logits, political_logits = model(input_ids, attention_mask)

        emotional_probs = torch.softmax(emotional_logits, dim=1)
        political_probs = torch.softmax(political_logits, dim=1)

        emotional_pred = torch.argmax(emotional_probs, dim=1).item() + 1
        political_pred = torch.argmax(political_probs, dim=1).item() + 1

        emotional_confidence = emotional_probs[0][emotional_pred-1].item()
        political_confidence = political_probs[0][political_pred-1].item()

    return {
        'emotional_intensity': emotional_pred,
        'emotional_confidence': emotional_confidence,
        'political_spectrum': political_pred,
        'political_confidence': political_confidence
    }

def main():
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)

    # Set device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")

    # Load all data
    print("Loading data...")
    all_data = load_all_data()
    print(f"Loaded {len(all_data)} speeches")

    # Plot initial class distributions
    plot_class_distributions(all_data)

    # Split data into training and validation sets
    train_data, val_data = train_test_split(
        list(all_data.items()),
        test_size=0.15,
        random_state=42
    )
    train_data = dict(train_data)
    val_data = dict(val_data)

    # Create datasets and samplers
    print("Creating datasets...")
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    train_dataset = SpeechDataset(train_data, tokenizer=tokenizer)
    val_dataset = SpeechDataset(val_data, tokenizer=tokenizer)

    # Create sampler for training data
    train_sampler = create_balanced_sampler(train_data)

    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=32,
        sampler=train_sampler,
        num_workers=4
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=False,
        num_workers=4
    )

    # Initialize and train model
    print("Training model...")
    model = PoliticalSpeechClassifier()
    best_model_state, metric_tracker = train_model(
        model,
        train_loader,
        val_loader,
        num_epochs=10,
        device=device
    )

    # Plot training history
    plot_training_history(metric_tracker)

    # Save model artifacts
    save_dir = save_model_artifacts(best_model_state, metric_tracker)
    print(f"Model artifacts saved to: {save_dir}")

    # Example
    example_speech = (
        "Mr. Speaker, I rise today to express my strong support for this crucial "
        "legislation that will help working families across our great nation..."
    )

    model = load_model_for_inference(f"{save_dir}/model.pt")
    predictions = predict_speech(model, tokenizer, example_speech)

    print("\nExample Prediction:")
    print(f"Emotional Intensity: {predictions['emotional_intensity']} "
          f"(Confidence: {predictions['emotional_confidence']:.2%})")
    print(f"Political Spectrum: {predictions['political_spectrum']} "
          f"(Confidence: {predictions['political_confidence']:.2%})")

In [None]:
if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/8:   2%|▏         | 1/49 [01:18<1:02:58, 78.72s/it, loss=3.39, emotional_acc=3.12, political_acc=21.9]

: 