# RoBERTa Baseline for Sarcasm Detection
## Standard RoBERTa Model (No Incongruity Features)

This notebook implements a **baseline RoBERTa model** for sarcasm detection on the Sarcasm Headlines Dataset v2.

**Purpose**: Compare performance with the enhanced IDL-RoBERTa model

**Dataset**: News headlines from The Onion (sarcastic) and HuffPost (non-sarcastic)

## 1. Install Required Dependencies

In [None]:
!pip install -q --upgrade accelerate>=0.26.0
!pip install -q --upgrade transformers>=4.30.0
!pip install -q torch pandas numpy scikit-learn tqdm

## 2. Import Libraries

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import pandas as pd
import numpy as np
import json
from tqdm import tqdm
import torch
from torch.utils.data import Dataset
from transformers import (
    RobertaTokenizer, 
    RobertaForSequenceClassification, 
    Trainer, 
    TrainingArguments
)
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings("ignore")

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 3. Load Sarcasm Headlines Dataset

Load the JSON dataset and perform train/test split (80/20).

In [None]:
print("üìÇ Loading Sarcasm Headlines Dataset v2...")

# Dataset path
dataset_path = '/kaggle/input/sarcasm-headlines-dataset/Sarcasm_Headlines_Dataset_v2.json'

# Load JSON dataset (line-by-line format)
data_list = []
with open(dataset_path, 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        if line:
            data_list.append(json.loads(line))

# Convert to DataFrame
df = pd.DataFrame(data_list)
df = df.dropna(subset=['headline', 'is_sarcastic'])
df['headline'] = df['headline'].astype(str).str.strip()
df = df[df['headline'].str.len() > 0]
df = df.reset_index(drop=True)

print(f"‚úÖ Loaded {len(df)} samples")
print(f"\nüìä Class distribution:")
print(df['is_sarcastic'].value_counts())
print(f"\nüìù Sample headlines:")
display(df[['headline', 'is_sarcastic']].head(10))

# Train-test split (80/20)
train_df, test_df = train_test_split(
    df, 
    test_size=0.2, 
    random_state=42, 
    stratify=df['is_sarcastic']
)

train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

print(f"\n‚úÖ Train samples: {len(train_df)}")
print(f"‚úÖ Test samples: {len(test_df)}")
print(f"Train distribution: {train_df['is_sarcastic'].value_counts().to_dict()}")
print(f"Test distribution: {test_df['is_sarcastic'].value_counts().to_dict()}")

## 4. Create Dataset Class

In [None]:
class SarcasmDataset(Dataset):
    """Simple dataset for RoBERTa baseline"""
    
    def __init__(self, data, tokenizer, max_len=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        text = item['headline']
        label = item['is_sarcastic']
        
        # Tokenize text
        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': torch.tensor(label, dtype=torch.long)
        }

print("‚úÖ Dataset class defined")

## 5. Prepare Datasets

In [None]:
print("üìä Preparing datasets...")

# Convert to dict
train_data = train_df.to_dict('records')
eval_data = test_df.to_dict('records')

# Load tokenizer
print("üì• Loading RoBERTa tokenizer...")
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Create datasets
train_dataset = SarcasmDataset(train_data, tokenizer, max_len=128)
eval_dataset = SarcasmDataset(eval_data, tokenizer, max_len=128)

print(f"‚úÖ Train dataset: {len(train_dataset)} samples")
print(f"‚úÖ Eval dataset: {len(eval_dataset)} samples")

## 6. Define Metrics Function

In [None]:
def compute_metrics(pred):
    """Compute accuracy and F1 score"""
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='binary')
    
    return {
        'accuracy': acc,
        'f1': f1
    }

print("‚úÖ Metrics function defined")

## 7. Configure Training Arguments

In [None]:
print("‚öôÔ∏è  Configuring training arguments...")

training_args = TrainingArguments(
    output_dir='/kaggle/working/roberta_baseline_results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='/kaggle/working/logs',
    logging_steps=50,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    learning_rate=2e-5,
    report_to='none',
    fp16=torch.cuda.is_available(),
    save_total_limit=2,
    seed=42,
)

print(f"‚úÖ Training configuration:")
print(f"   Epochs: {training_args.num_train_epochs}")
print(f"   Batch size: {training_args.per_device_train_batch_size}")
print(f"   Learning rate: {training_args.learning_rate}")
print(f"   FP16: {training_args.fp16}")

## 8. Load RoBERTa Model

In [None]:
print("üöÄ Loading RoBERTa model...")

# Load pre-trained RoBERTa for sequence classification
model = RobertaForSequenceClassification.from_pretrained(
    'roberta-base',
    num_labels=2,  # Binary classification
    problem_type="single_label_classification"
)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(f"‚úÖ Model loaded on {device}")
print(f"üìä Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"üìä Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## 9. Initialize Trainer

In [None]:
print("üîß Initializing Trainer...")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

print("‚úÖ Trainer initialized successfully")

## 10. Train the Model

In [None]:
print("üöÄ Starting training...")
print("="*70)

# Train the model
train_result = trainer.train()

print("\n‚úÖ Training completed!")
print(f"üíæ Model saved to: {training_args.output_dir}")

## 11. Evaluate on Test Set

In [None]:
print("="*70)
print("üìä EVALUATING ON TEST SET")
print("="*70)

# Evaluate
results = trainer.evaluate()

print("\nüìä Evaluation Metrics:")
for key, value in results.items():
    print(f"   {key}: {value:.4f}")

# Get predictions
predictions = trainer.predict(eval_dataset)
preds = predictions.predictions.argmax(-1)
true_labels = predictions.label_ids

print("\n" + "="*70)
print("üìä CLASSIFICATION REPORT")
print("="*70)
print(classification_report(
    true_labels, 
    preds, 
    target_names=['Non-Sarcastic', 'Sarcastic'],
    digits=4
))

## 12. Save Results and Predictions

In [None]:
# Save results to file
results_file = '/kaggle/working/roberta_baseline_results.txt'
with open(results_file, 'w', encoding='utf-8') as f:
    f.write("="*70 + "\n")
    f.write("ROBERTA BASELINE - SARCASM DETECTION RESULTS\n")
    f.write("="*70 + "\n\n")
    f.write(f"Model: RoBERTa-base (Standard)\n")
    f.write(f"Dataset: Sarcasm Headlines Dataset v2\n")
    f.write(f"Total samples: {len(df)}\n")
    f.write(f"Train samples: {len(train_df)}\n")
    f.write(f"Test samples: {len(test_df)}\n\n")
    
    f.write("Evaluation Metrics:\n")
    for key, value in results.items():
        f.write(f"  {key}: {value:.4f}\n")
    
    f.write("\n" + "="*70 + "\n")
    f.write("CLASSIFICATION REPORT\n")
    f.write("="*70 + "\n")
    f.write(classification_report(
        true_labels, 
        preds, 
        target_names=['Non-Sarcastic', 'Sarcastic'],
        digits=4
    ))

print(f"üìÑ Results saved to: {results_file}")

# Save predictions to CSV
predictions_df = pd.DataFrame({
    'headline': test_df['headline'].values,
    'true_label': true_labels,
    'predicted_label': preds,
    'correct': (true_labels == preds).astype(int)
})
predictions_csv = '/kaggle/working/roberta_baseline_predictions.csv'
predictions_df.to_csv(predictions_csv, index=False)
print(f"üìÑ Predictions saved to: {predictions_csv}")

# Calculate accuracy
accuracy = (true_labels == preds).mean()
print(f"\n‚úÖ Overall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

## 13. Display Sample Predictions

In [None]:
print("="*70)
print("üìù SAMPLE PREDICTIONS")
print("="*70)

# Show random sample predictions
sample_indices = np.random.choice(len(test_df), 15, replace=False)

for idx in sample_indices:
    headline = test_df.iloc[idx]['headline']
    true_label = true_labels[idx]
    pred_label = preds[idx]
    
    emoji = "‚úÖ" if true_label == pred_label else "‚ùå"
    true_str = "Sarcastic" if true_label == 1 else "Non-Sarcastic"
    pred_str = "Sarcastic" if pred_label == 1 else "Non-Sarcastic"
    
    print(f"\n{emoji} {headline[:90]}...")
    print(f"   True: {true_str:15} | Predicted: {pred_str}")

print("\n" + "="*70)
print("üéâ BASELINE EXPERIMENT COMPLETED!")
print("="*70)

## 14. Performance Summary

### Model Comparison

**RoBERTa Baseline**: Standard RoBERTa-base model
- No additional features
- Direct classification from text
- Simpler architecture

**IDL-RoBERTa** (compare with other notebook):
- Enhanced with incongruity features
- Commonsense knowledge integration
- Token-level attention mechanism

### Next Steps

Compare the results from this baseline with the IDL-RoBERTa model to measure the improvement gained from incongruity features!