<a href="https://colab.research.google.com/github/nivash-glitch/tamil_sentiment_analysis/blob/main/Analysis_0_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
uploaded = files.upload()

Saving filtered_sentiment_data.csv to filtered_sentiment_data.csv


In [2]:
import pandas as pd
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU only'}")

Using device: cpu
GPU: CPU only


In [None]:
df = pd.read_csv('/content/filtered_sentiment_data.csv')

print(f"Dataset loaded successfully!")
print(f"Original dataset shape: {df.shape}")
print(f"Original columns: {df.columns.tolist()}")
print("\nFirst few rows:")
print(df.head())


Dataset loaded successfully!
Original dataset shape: (9919, 2)
Original columns: ['text_preprocessed', 'sentiment']

First few rows:
                                   text_preprocessed sentiment
0  ஒரு நாளும், ஒரு நாளும், ஒரு நாளும், ஒரு நாளும்...  Positive
1  பன்னுவாங்காவைப் போலவே படத்தின் டிரைலரில் அஜித்...  Negative
2  மன்காதா பில்போஸ் வார்ப்புரு ரேனையம் கலான்த்து ...  Positive
3  அதன் ரோக்கிங்.. தேசார் நாலா இருகு.. ஆனால் படம்...  Positive
4                யூ சூரியா வஸ்திரம் ஒரு பார் டென்டர்  Negative


In [None]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"GPU Name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'No GPU'}")

Using device: cuda
GPU Name: Tesla T4


In [None]:
possible_text_cols = ['tweet', 'text', 'content', 'message', 'tamil_tweet', 'tamil_text', 'Tamil tweets','text_preprocessed']
possible_label_cols = ['sentiment', 'label', 'class', 'category', 'Sentiment']

text_col = None
label_col = None

# Find text column
for col in df.columns:
    if any(keyword.lower() in col.lower() for keyword in ['tweet', 'text', 'content', 'message', 'tamil']):
        text_col = col
        print(f"Detected text column: '{col}'")
        break

# Find label column
for col in df.columns:
    if any(keyword.lower() in col.lower() for keyword in ['sentiment', 'label', 'class', 'category']):
        label_col = col
        print(f"Detected label column: '{col}'")
        break

if not text_col or not label_col:
    print("Could not auto-detect columns. Available columns:")
    for i, col in enumerate(df.columns):
        print(f"  {i}: {col}")
    print("\nPlease manually specify in the next cell.")

Detected text column: 'text_preprocessed'
Detected label column: 'sentiment'


In [None]:
if text_col and label_col:
    df_renamed = df.rename(columns={text_col: 'text', label_col: 'label'})
    print(f"Renamed '{text_col}' → 'text'")
    print(f"Renamed '{label_col}' → 'label'")
else:
    # Manual column specification - adjust indices as needed
    print("Manual column assignment (adjust if needed):")
    df_renamed = df.copy()
    df_renamed.columns = ['text', 'label'] + list(df.columns[2:]) if len(df.columns) > 2 else ['text', 'label']
    print(f"Assigned columns: {df_renamed.columns.tolist()}")

print(f"\nRenamed dataset shape: {df_renamed.shape}")
print(f"New columns: {df_renamed.columns.tolist()}")

Renamed 'text_preprocessed' → 'text'
Renamed 'sentiment' → 'label'

Renamed dataset shape: (9919, 2)
New columns: ['text', 'label']


In [None]:
print(" DATA EXPLORATION:")
print("=" * 40)
print(f"Text column sample:")
print(df_renamed['text'].head())
print(f"\nLabel column unique values:")
print(df_renamed['label'].value_counts())
print(f"\nSample text lengths:")
text_lengths = df_renamed['text'].str.len()
print(f"Min: {text_lengths.min()}, Max: {text_lengths.max()}, Mean: {text_lengths.mean():.1f}")

 DATA EXPLORATION:
Text column sample:
0    ஒரு நாளும், ஒரு நாளும், ஒரு நாளும், ஒரு நாளும்...
1    பன்னுவாங்காவைப் போலவே படத்தின் டிரைலரில் அஜித்...
2    மன்காதா பில்போஸ் வார்ப்புரு ரேனையம் கலான்த்து ...
3    அதன் ரோக்கிங்.. தேசார் நாலா இருகு.. ஆனால் படம்...
4                  யூ சூரியா வஸ்திரம் ஒரு பார் டென்டர்
Name: text, dtype: object

Label column unique values:
label
Negative    5163
Positive    4756
Name: count, dtype: int64

Sample text lengths:
Min: 5, Max: 941, Mean: 63.5


In [None]:
print(f"Dataset before cleaning: {len(df_renamed)}")

df_clean = df_renamed.dropna(subset=['text', 'label'])
print(f"After removing NaN: {len(df_clean)}")

df_clean = df_clean[df_clean['text'].str.strip() != '']
df_clean = df_clean[df_clean['label'].astype(str).str.strip() != '']
print(f"After removing empty strings: {len(df_clean)}")

df_clean = df_clean.reset_index(drop=True)

print(f"Final cleaned dataset: {len(df_clean)} samples")

Dataset before cleaning: 9919
After removing NaN: 9919
After removing empty strings: 9919
Final cleaned dataset: 9919 samples


In [None]:
print("LABEL MAPPING:")
print("=" * 30)
print("Current unique labels:")
print(df_clean['label'].value_counts())

label_mapping = {
    'positive': 1, 'Positive': 1, 'POSITIVE': 1, 'pos': 1, 'POS': 1,
    '1': 1, 1: 1, 'good': 1, 'Good': 1, 'GOOD': 1,
    'happy': 1, 'Happy': 1, 'joy': 1, 'love': 1,

    'negative': 0, 'Negative': 0, 'NEGATIVE': 0, 'neg': 0, 'NEG': 0,
    '0': 0, 0: 0, 'bad': 0, 'Bad': 0, 'BAD': 0,
    'sad': 0, 'Sad': 0, 'hate': 0, 'anger': 0
}

df_clean['binary_label'] = df_clean['label'].map(label_mapping)

unmapped = df_clean[df_clean['binary_label'].isna()]
if len(unmapped) > 0:
    print(f" Unmapped labels found: {unmapped['label'].unique()}")
    print("Assigning to negative (0) by default...")

df_clean['binary_label'] = df_clean['binary_label'].fillna(0).astype(int)

print(f"\n Binary label distribution:")
print(f"Negative (0): {sum(df_clean['binary_label'] == 0)}")
print(f"Positive (1): {sum(df_clean['binary_label'] == 1)}")

LABEL MAPPING:
Current unique labels:
label
Negative    5163
Positive    4756
Name: count, dtype: int64

 Binary label distribution:
Negative (0): 5163
Positive (1): 4756


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Use the GPU device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "xlm-roberta-base"
print(f"Loading {model_name}...")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("Tokenizer loaded")

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,  # binary sentiment
    problem_type="single_label_classification"
)

# Move model to GPU
model = model.to(device)
model.eval()

print(f"Model ready on {device}")
print(f"Model parameters: {model.num_parameters():,}")


Loading xlm-roberta-base...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Tokenizer loaded
Model ready on cuda
Model parameters: 278,045,186


In [None]:
class TamilSentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        if isinstance(idx, (list, np.ndarray)):
            # Handle batch indexing
            texts = [str(self.texts[i]) for i in idx]
            labels = [self.labels[i] for i in idx]

            encoding = self.tokenizer(
                texts,
                truncation=True,
                padding='max_length',
                max_length=self.max_length,
                return_tensors='pt'
            )

            return {
                'input_ids': encoding['input_ids'],
                'attention_mask': encoding['attention_mask'],
                'labels': torch.tensor(labels, dtype=torch.long)
            }
        else:
            # Handle single item indexing
            text = str(self.texts[idx])
            label = self.labels[idx]

            encoding = self.tokenizer(
                text,
                truncation=True,
                padding='max_length',
                max_length=self.max_length,
                return_tensors='pt'
            )

            return {
                'input_ids': encoding['input_ids'].flatten(),
                'attention_mask': encoding['attention_mask'].flatten(),
                'labels': torch.tensor(label, dtype=torch.long)
            }

print("Dataset class defined")

Dataset class defined


In [None]:

from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup


In [None]:
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

# Define hyperparameters
batch_size = 16
epochs = 5

# Create DataLoader
dataloader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    pin_memory=torch.cuda.is_available()
)

# Optimizer
optimizer = AdamW(model.parameters(), lr=3e-5, weight_decay=0.01)

# Total training steps = batches_per_epoch * epochs
total_steps = len(dataloader) * epochs

# Scheduler
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),
    num_training_steps=total_steps
)
print(epochs)

5


In [None]:
import torch
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup
from tqdm import tqdm

# ============================================
# TRAINING CONFIGURATION
# ============================================

batch_size = 16
epochs = 3
learning_rate = 2e-5
weight_decay = 0.01
warmup_ratio = 0.1
gradient_accumulation_steps = 2

# Calculate class weights
class_counts = df_clean['binary_label'].value_counts().sort_index().values
total_samples = len(df_clean)
class_weights = torch.tensor([total_samples / (2 * count) for count in class_counts],
                             dtype=torch.float).to(device)

print("=" * 60)
print("TRAINING CONFIGURATION")
print("=" * 60)
print(f"Batch size: {batch_size}")
print(f"Gradient accumulation: {gradient_accumulation_steps}")
print(f"Effective batch size: {batch_size * gradient_accumulation_steps}")
print(f"Epochs: {epochs}")
print(f"Learning rate: {learning_rate}")
print(f"Class weights: {class_weights.cpu().numpy()}")
print(f"Total training samples: {len(train_texts)}")

# ============================================
# CREATE DATALOADER
# ============================================

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

# ============================================
# OPTIMIZER AND SCHEDULER
# ============================================

optimizer = AdamW(
    model.parameters(),
    lr=learning_rate,
    weight_decay=weight_decay,
    eps=1e-8
)

total_steps = len(train_dataloader) * epochs // gradient_accumulation_steps
warmup_steps = int(warmup_ratio * total_steps)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

print(f"Total training steps: {total_steps}")
print(f"Warmup steps: {warmup_steps}")

# ============================================
# TRAINING LOOP
# ============================================

print("\n" + "=" * 60)
print("STARTING TRAINING")
print("=" * 60)

model.train()

for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    print("-" * 60)

    total_loss = 0
    progress_bar = tqdm(train_dataloader, desc="Training")
    optimizer.zero_grad()

    for step, batch in enumerate(progress_bar):
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        # Apply class weights to loss
        loss = outputs.loss
        weighted_loss = loss * class_weights[labels].mean()
        weighted_loss = weighted_loss / gradient_accumulation_steps

        # Backward pass
        weighted_loss.backward()

        # Update weights every N steps
        if (step + 1) % gradient_accumulation_steps == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        total_loss += loss.item()

        # Update progress bar
        progress_bar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'lr': f'{scheduler.get_last_lr()[0]:.2e}'
        })

    avg_loss = total_loss / len(train_dataloader)
    print(f"Average Loss: {avg_loss:.4f}")

print("\n" + "=" * 60)
print("TRAINING COMPLETED!")
print("=" * 60)

# Save trained model
output_dir = "tamil_sentiment_xlm_roberta"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved to: {output_dir}")

# Now run your inference code...
print("\nStarting inference on Tamil texts...")
print("=" * 50)


TRAINING CONFIGURATION
Batch size: 16
Gradient accumulation: 2
Effective batch size: 32
Epochs: 3
Learning rate: 2e-05
Class weights: [0.96058494 1.042788  ]
Total training samples: 7935
Total training steps: 744
Warmup steps: 74

STARTING TRAINING

Epoch 1/3
------------------------------------------------------------


Training: 100%|██████████| 496/496 [12:29<00:00,  1.51s/it, loss=0.6855, lr=1.48e-05]


Average Loss: 0.6871

Epoch 2/3
------------------------------------------------------------


Training: 100%|██████████| 496/496 [12:28<00:00,  1.51s/it, loss=0.5006, lr=7.40e-06]


Average Loss: 0.6352

Epoch 3/3
------------------------------------------------------------


Training: 100%|██████████| 496/496 [12:28<00:00,  1.51s/it, loss=0.5383, lr=0.00e+00]


Average Loss: 0.5768

TRAINING COMPLETED!
Model saved to: tamil_sentiment_xlm_roberta

Starting inference on Tamil texts...


In [None]:
# ============================================
# EVALUATION METRICS AND PREDICTIONS
# ============================================

from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                             f1_score, classification_report, confusion_matrix)
import numpy as np

print("\nStarting inference on Tamil texts...")
print("=" * 60)

# Prepare full dataset for inference
full_dataset = TamilSentimentDataset(
    df_clean['text'].tolist(),
    df_clean['binary_label'].tolist(),
    tokenizer
)

full_dataloader = DataLoader(
    full_dataset,
    batch_size=batch_size * 2,  # Larger batch for inference
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# Run inference
model.eval()
all_predictions = []
all_probabilities = []
all_true_labels = []

with torch.no_grad():
    for batch in tqdm(full_dataloader, desc="Processing batches"):
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels']

        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        # Get probabilities and predictions
        probabilities = torch.nn.functional.softmax(logits, dim=-1)
        predictions = torch.argmax(logits, dim=-1)

        # Store results
        all_predictions.extend(predictions.cpu().numpy())
        all_probabilities.extend(probabilities.cpu().numpy())
        all_true_labels.extend(labels.numpy())

print("Inference completed!")
print(f"Processed {len(all_predictions)} samples\n")

# ============================================
# CALCULATE EVALUATION METRICS
# ============================================

all_predictions = np.array(all_predictions)
all_probabilities = np.array(all_probabilities)
all_true_labels = np.array(all_true_labels)

# Core metrics
accuracy = accuracy_score(all_true_labels, all_predictions)
precision = precision_score(all_true_labels, all_predictions, average='weighted')
recall = recall_score(all_true_labels, all_predictions, average='weighted')
f1 = f1_score(all_true_labels, all_predictions, average='weighted')

# Macro metrics (for imbalanced data)
macro_precision = precision_score(all_true_labels, all_predictions, average='macro')
macro_recall = recall_score(all_true_labels, all_predictions, average='macro')
macro_f1 = f1_score(all_true_labels, all_predictions, average='macro')

# Per-class metrics
precision_per_class = precision_score(all_true_labels, all_predictions, average=None)
recall_per_class = recall_score(all_true_labels, all_predictions, average=None)
f1_per_class = f1_score(all_true_labels, all_predictions, average=None)

print("=" * 60)
print("EVALUATION METRICS SUMMARY")
print("=" * 60)
print(f"\n{'Metric':<25} {'Value':>10}")
print("-" * 60)
print(f"{'Accuracy':<25} {accuracy:>10.3f}")
print(f"{'Weighted Precision':<25} {precision:>10.3f}")
print(f"{'Weighted Recall':<25} {recall:>10.3f}")
print(f"{'Weighted F1-Score':<25} {f1:>10.3f}")
print(f"\n{'Macro Precision':<25} {macro_precision:>10.3f}")
print(f"{'Macro Recall':<25} {macro_recall:>10.3f}")
print(f"{'Macro F1-Score':<25} {macro_f1:>10.3f}")

print("\n" + "=" * 60)
print("PER-CLASS METRICS")
print("=" * 60)
print(f"\n{'Class':<15} {'Precision':>12} {'Recall':>12} {'F1-Score':>12}")
print("-" * 60)
print(f"{'Negative':<15} {precision_per_class[0]:>12.3f} {recall_per_class[0]:>12.3f} {f1_per_class[0]:>12.3f}")
print(f"{'Positive':<15} {precision_per_class[1]:>12.3f} {recall_per_class[1]:>12.3f} {f1_per_class[1]:>12.3f}")

# ============================================
# DETAILED CLASSIFICATION REPORT
# ============================================

print("\n" + "=" * 60)
print("CLASSIFICATION REPORT")
print("=" * 60)
print()
print(classification_report(
    all_true_labels,
    all_predictions,
    target_names=['Negative', 'Positive'],
    digits=3
))

# ============================================
# CONFUSION MATRIX
# ============================================

cm = confusion_matrix(all_true_labels, all_predictions)
print("=" * 60)
print("CONFUSION MATRIX")
print("=" * 60)
print(f"\n{'':>15} {'Predicted Neg':>15} {'Predicted Pos':>15}")
print("-" * 60)
print(f"{'Actual Neg':<15} {cm[0][0]:>15} {cm[0][1]:>15}")
print(f"{'Actual Pos':<15} {cm[1][0]:>15} {cm[1][1]:>15}")

# ============================================
# PREDICTION DISTRIBUTION
# ============================================

neg_count = np.sum(all_predictions == 0)
pos_count = np.sum(all_predictions == 1)
avg_confidence = np.max(all_probabilities, axis=1).mean()

print("\n" + "=" * 60)
print("PREDICTION DISTRIBUTION")
print("=" * 60)
print(f"Total samples: {len(all_predictions)}")
print(f"Predicted Negative: {neg_count} ({neg_count/len(all_predictions)*100:.1f}%)")
print(f"Predicted Positive: {pos_count} ({pos_count/len(all_predictions)*100:.1f}%)")
print(f"Average confidence: {avg_confidence:.3f}")

# ============================================
# SAMPLE PREDICTIONS
# ============================================

print("\n" + "=" * 60)
print("SAMPLE PREDICTIONS")
print("=" * 60)

# Show 10 sample predictions
sample_indices = np.random.choice(len(all_predictions), min(10, len(all_predictions)), replace=False)
label_map = {0: 'Negative', 1: 'Positive'}

for i, idx in enumerate(sample_indices, 1):
    text = df_clean.iloc[idx]['text']
    true_label = all_true_labels[idx]
    pred_label = all_predictions[idx]
    neg_prob = all_probabilities[idx][0]
    pos_prob = all_probabilities[idx][1]

    print(f"\nSample {i}:")
    print(f"Text: {text[:80]}{'...' if len(text) > 80 else ''}")
    print(f"True: {label_map[true_label]} | Predicted: {label_map[pred_label]}")
    print(f"Confidence: {max(neg_prob, pos_prob):.3f}")
    print(f"Pos: {pos_prob:.3f} | Neg: {neg_prob:.3f}")
    print("-" * 60)

# ============================================
# ERROR ANALYSIS
# ============================================

# Find misclassified samples
misclassified_indices = np.where(all_predictions != all_true_labels)[0]
misclassification_rate = len(misclassified_indices) / len(all_predictions)

print("\n" + "=" * 60)
print("ERROR ANALYSIS")
print("=" * 60)
print(f"Total misclassifications: {len(misclassified_indices)}")
print(f"Misclassification rate: {misclassification_rate:.3f} ({misclassification_rate*100:.1f}%)")

# False positives and false negatives
false_positives = np.where((all_predictions == 1) & (all_true_labels == 0))[0]
false_negatives = np.where((all_predictions == 0) & (all_true_labels == 1))[0]

print(f"\nFalse Positives: {len(false_positives)} ({len(false_positives)/len(all_predictions)*100:.1f}%)")
print(f"False Negatives: {len(false_negatives)} ({len(false_negatives)/len(all_predictions)*100:.1f}%)")

# Show worst predictions (low confidence misclassifications)
if len(misclassified_indices) > 0:
    misclassified_confidences = np.max(all_probabilities[misclassified_indices], axis=1)
    worst_indices = misclassified_indices[np.argsort(misclassified_confidences)[:5]]

    print("\n" + "-" * 60)
    print("TOP 5 MISCLASSIFICATIONS (Lowest Confidence):")
    print("-" * 60)

    for i, idx in enumerate(worst_indices, 1):
        text = df_clean.iloc[idx]['text']
        true_label = all_true_labels[idx]
        pred_label = all_predictions[idx]
        confidence = np.max(all_probabilities[idx])

        print(f"\n{i}. Text: {text[:60]}...")
        print(f"   True: {label_map[true_label]} | Predicted: {label_map[pred_label]}")
        print(f"   Confidence: {confidence:.3f}")

print("\n" + "=" * 60)
print("EVALUATION COMPLETE!")
print("=" * 60)



Starting inference on Tamil texts...


Processing batches: 100%|██████████| 310/310 [04:35<00:00,  1.12it/s]

Inference completed!
Processed 9919 samples

EVALUATION METRICS SUMMARY

Metric                         Value
------------------------------------------------------------
Accuracy                       0.739
Weighted Precision             0.739
Weighted Recall                0.739
Weighted F1-Score              0.739

Macro Precision                0.738
Macro Recall                   0.738
Macro F1-Score                 0.738

PER-CLASS METRICS

Class              Precision       Recall     F1-Score
------------------------------------------------------------
Negative               0.747        0.753        0.750
Positive               0.730        0.723        0.726

CLASSIFICATION REPORT

              precision    recall  f1-score   support

    Negative      0.747     0.753     0.750      5163
    Positive      0.730     0.723     0.726      4756

    accuracy                          0.739      9919
   macro avg      0.738     0.738     0.738      9919
weighted avg      0.739    




In [None]:
# ============================================
# EXAMPLE PREDICTIONS ON NEW TAMIL TEXTS
# ============================================

import torch
import numpy as np

def predict_sentiment(texts, model, tokenizer, device):
    """
    Predict sentiment for a list of Tamil texts

    Args:
        texts: List of Tamil text strings
        model: Trained sentiment model
        tokenizer: XLM-RoBERTa tokenizer
        device: torch device (cuda/cpu)

    Returns:
        predictions, probabilities, and labels
    """
    model.eval()

    results = []

    with torch.no_grad():
        for text in texts:
            # Tokenize
            encoding = tokenizer(
                text,
                add_special_tokens=True,
                max_length=128,
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )

            input_ids = encoding['input_ids'].to(device)
            attention_mask = encoding['attention_mask'].to(device)

            # Predict
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Get probabilities
            probabilities = torch.nn.functional.softmax(logits, dim=-1)
            prediction = torch.argmax(logits, dim=-1).item()

            neg_prob = probabilities[0][0].item()
            pos_prob = probabilities[0][1].item()
            confidence = max(neg_prob, pos_prob)

            sentiment = "Positive" if prediction == 1 else "Negative"

            results.append({
                'text': text,
                'sentiment': sentiment,
                'confidence': confidence,
                'positive_prob': pos_prob,
                'negative_prob': neg_prob
            })

    return results


# ============================================
# TEST EXAMPLES - TAMIL SENTIMENT TEXTS
# ============================================

# Positive Examples
positive_examples = [
    "இந்த திரைப்படம் மிகவும் அருமையாக இருந்தது",  # This movie was very good
    "இது சிறந்த உணவகம், உணவு மிகவும் சுவையாக இருந்தது",  # Great restaurant, food was delicious
    "நான் மிகவும் மகிழ்ச்சியாக இருக்கிறேன்",  # I am very happy
    "அருமையான சேவை, நன்றி",  # Excellent service, thank you
    "இந்த தொலைபேசி மிகவும் நன்றாக வேலை செய்கிறது",  # This phone works very well
    "என் வாழ்க்கையில் சிறந்த நாள்",  # Best day of my life
    "இந்த புத்தகம் படிக்க மிகவும் சுவாரஸ்யமாக இருந்தது",  # This book was very interesting to read
    "ஆசிரியர் மிகவும் நன்றாக கற்பித்தார்",  # Teacher taught very well
]

# Negative Examples
negative_examples = [
    "இந்த தொலைபேசி கேமரா நன்றாக இல்லை",  # This phone camera is not good
    "மோசமான சேவை, மிகவும் விலை அதிகம்",  # Bad service, very expensive
    "இந்த உணவு சுவையற்றது",  # This food is tasteless
    "நான் மிகவும் வருத்தமாக இருக்கிறேன்",  # I am very sad
    "இது வீணான பணம்",  # This is wasted money
    "திரைப்படம் மிகவும் மோசமாக இருந்தது",  # Movie was very bad
    "தரம் மிகவும் மோசமானது",  # Quality is very poor
    "நான் படிக்கவில்லை, தோல்வியடைந்தேன்",  # I did not study, I failed
]

# Mixed/Neutral Examples
mixed_examples = [
    "திரைப்படம் நன்றாக இருந்தது ஆனால் முடிவு சரியில்லை",  # Movie was good but ending was not right
    "உணவு சுவையாக இருந்தது ஆனால் சேவை மெதுவாக இருந்தது",  # Food was tasty but service was slow
    "சில பகுதிகள் நல்லது, சில பகுதிகள் மோசம்",  # Some parts good, some parts bad
]

# Code-Mixed Tamil-English (Tanglish) Examples
tanglish_examples = [
    "super movie, romba nalla irunthathu",  # Super movie, was very good
    "worst experience, never going back",  # Worst experience, never going back
    "food taste illa, waste of money",  # Food has no taste, waste of money
    "best hotel in Chennai, must try",  # Best hotel in Chennai, must try
]

print("=" * 80)
print("TESTING MODEL ON TAMIL SENTIMENT EXAMPLES")
print("=" * 80)

# ============================================
# TEST POSITIVE EXAMPLES
# ============================================

print("\n" + "=" * 80)
print("POSITIVE EXAMPLES")
print("=" * 80)

positive_results = predict_sentiment(positive_examples, model, tokenizer, device)

for i, result in enumerate(positive_results, 1):
    print(f"\nExample {i}:")
    print(f"Text: {result['text']}")
    print(f"Predicted: {result['sentiment']}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Positive: {result['positive_prob']:.3f} | Negative: {result['negative_prob']:.3f}")

    # Check if prediction is correct
    if result['sentiment'] == 'Positive':
        print("✓ CORRECT")
    else:
        print("✗ INCORRECT (Should be Positive)")
    print("-" * 80)

# Calculate accuracy
correct_positive = sum(1 for r in positive_results if r['sentiment'] == 'Positive')
positive_accuracy = correct_positive / len(positive_results)
print(f"\nPositive Examples Accuracy: {positive_accuracy:.2%} ({correct_positive}/{len(positive_results)})")

# ============================================
# TEST NEGATIVE EXAMPLES
# ============================================

print("\n" + "=" * 80)
print("NEGATIVE EXAMPLES")
print("=" * 80)

negative_results = predict_sentiment(negative_examples, model, tokenizer, device)

for i, result in enumerate(negative_results, 1):
    print(f"\nExample {i}:")
    print(f"Text: {result['text']}")
    print(f"Predicted: {result['sentiment']}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Positive: {result['positive_prob']:.3f} | Negative: {result['negative_prob']:.3f}")

    # Check if prediction is correct
    if result['sentiment'] == 'Negative':
        print("✓ CORRECT")
    else:
        print("✗ INCORRECT (Should be Negative)")
    print("-" * 80)

# Calculate accuracy
correct_negative = sum(1 for r in negative_results if r['sentiment'] == 'Negative')
negative_accuracy = correct_negative / len(negative_results)
print(f"\nNegative Examples Accuracy: {negative_accuracy:.2%} ({correct_negative}/{len(negative_results)})")

# ============================================
# TEST MIXED EXAMPLES
# ============================================

print("\n" + "=" * 80)
print("MIXED/NEUTRAL EXAMPLES (Expected: Variable)")
print("=" * 80)

mixed_results = predict_sentiment(mixed_examples, model, tokenizer, device)

for i, result in enumerate(mixed_results, 1):
    print(f"\nExample {i}:")
    print(f"Text: {result['text']}")
    print(f"Predicted: {result['sentiment']}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Positive: {result['positive_prob']:.3f} | Negative: {result['negative_prob']:.3f}")
    print("-" * 80)

# ============================================
# TEST TANGLISH (CODE-MIXED) EXAMPLES
# ============================================

print("\n" + "=" * 80)
print("TANGLISH (CODE-MIXED TAMIL-ENGLISH) EXAMPLES")
print("=" * 80)

tanglish_results = predict_sentiment(tanglish_examples, model, tokenizer, device)

for i, result in enumerate(tanglish_results, 1):
    print(f"\nExample {i}:")
    print(f"Text: {result['text']}")
    print(f"Predicted: {result['sentiment']}")
    print(f"Confidence: {result['confidence']:.3f}")
    print(f"Positive: {result['positive_prob']:.3f} | Negative: {result['negative_prob']:.3f}")
    print("-" * 80)

# ============================================
# OVERALL SUMMARY
# ============================================

print("\n" + "=" * 80)
print("OVERALL TESTING SUMMARY")
print("=" * 80)

total_correct = correct_positive + correct_negative
total_tested = len(positive_examples) + len(negative_examples)
overall_accuracy = total_correct / total_tested

print(f"\nPositive Examples: {positive_accuracy:.1%} correct")
print(f"Negative Examples: {negative_accuracy:.1%} correct")
print(f"Overall Accuracy: {overall_accuracy:.1%} ({total_correct}/{total_tested})")

# Average confidence scores
all_results = positive_results + negative_results
avg_confidence = np.mean([r['confidence'] for r in all_results])
print(f"Average Confidence: {avg_confidence:.3f}")

print("\n" + "=" * 80)


TESTING MODEL ON TAMIL SENTIMENT EXAMPLES

POSITIVE EXAMPLES

Example 1:
Text: இந்த திரைப்படம் மிகவும் அருமையாக இருந்தது
Predicted: Positive
Confidence: 0.785
Positive: 0.785 | Negative: 0.215
✓ CORRECT
--------------------------------------------------------------------------------

Example 2:
Text: இது சிறந்த உணவகம், உணவு மிகவும் சுவையாக இருந்தது
Predicted: Positive
Confidence: 0.610
Positive: 0.610 | Negative: 0.390
✓ CORRECT
--------------------------------------------------------------------------------

Example 3:
Text: நான் மிகவும் மகிழ்ச்சியாக இருக்கிறேன்
Predicted: Positive
Confidence: 0.899
Positive: 0.899 | Negative: 0.101
✓ CORRECT
--------------------------------------------------------------------------------

Example 4:
Text: அருமையான சேவை, நன்றி
Predicted: Positive
Confidence: 0.898
Positive: 0.898 | Negative: 0.102
✓ CORRECT
--------------------------------------------------------------------------------

Example 5:
Text: இந்த தொலைபேசி மிகவும் நன்றாக வேலை செய்கிறது
Pre