# IDL-RoBERTa for Sarcasm Detection
## Enhanced Model with Commonsense Incongruity Features

This notebook implements an Incongruity-Driven Learning (IDL) approach combined with RoBERTa for sarcasm detection using the **Sarcasm Headlines Dataset v2**.

**Dataset**: News headlines from The Onion (sarcastic) and HuffPost (non-sarcastic)

## 1. Install Required Dependencies

In [None]:
!pip install tqdm
!pip install scikit-learn
!pip install nltk
!pip uninstall -y transformers accelerate
!pip install git+https://github.com/huggingface/transformers.git
!pip install accelerate
!pip install protobuf==4.25.3 --force-reinstall

## 2. Import Libraries and Configure Environment

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import pandas as pd
import numpy as np
from tqdm import tqdm
import nltk
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaTokenizer, RobertaPreTrainedModel, RobertaModel, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, f1_score, classification_report
from collections import defaultdict
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import warnings

warnings.filterwarnings("ignore")

## 3. Download NLTK Resources

In [None]:
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('vader_lexicon', quiet=True)

## 4. Load Sarcasm Headlines Dataset

Load the JSON dataset and split into train/test sets with 80/20 ratio.

In [None]:
import json
from sklearn.model_selection import train_test_split

print("üìÇ Loading Sarcasm Headlines Dataset v2...")

# Load JSON dataset (line-by-line format)
dataset_path = '/kaggle/input/sarcasm-headlines-dataset/Sarcasm_Headlines_Dataset_v2.json'

data_list = []
with open(dataset_path, 'r') as f:
    for line in f:
        data_list.append(json.loads(line))

# Convert to DataFrame
df = pd.DataFrame(data_list)

print(f"‚úÖ Loaded {len(df)} samples")
print(f"üìä Columns: {df.columns.tolist()}")
print(f"\nüìä Class distribution:")
print(df['is_sarcastic'].value_counts())
print(f"\nüìù Sample headlines:")
print(df[['headline', 'is_sarcastic']].head(10))

# Train-test split (80/20)
train_df, test_df = train_test_split(
    df, 
    test_size=0.2, 
    random_state=42, 
    stratify=df['is_sarcastic']
)

# Reset indices
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

print(f"\n‚úÖ Train samples: {len(train_df)}")
print(f"‚úÖ Test samples: {len(test_df)}")
print(f"Train distribution: {train_df['is_sarcastic'].value_counts().to_dict()}")
print(f"Test distribution: {test_df['is_sarcastic'].value_counts().to_dict()}")

## 5. Build Enhanced Commonsense Dictionary

In [None]:
print("\nüî® Building enhanced commonsense dictionary...")
sia = SentimentIntensityAnalyzer()

# Collect sentiment scores per word and label
word_sentiments = defaultdict(lambda: {'non_sarc': [], 'sarc': []})

for _, row in tqdm(train_df.iterrows(), total=len(train_df), desc="Analyzing training data"):
    text = row['headline']  # Changed from 'Tweet' to 'headline'
    label = row['is_sarcastic']
    
    # Get sentence-level sentiment
    sent_score = sia.polarity_scores(text)['compound']
    
    # Tokenize
    tokens = nltk.word_tokenize(text.lower())
    
    for token in tokens:
        if token.isalpha() and len(token) > 2:  # Filter meaningful words
            if label == 0:
                word_sentiments[token]['non_sarc'].append(sent_score)
            else:
                word_sentiments[token]['sarc'].append(sent_score)

# Build incongruity dictionary with stronger signals
commonsense_dict = {}
min_count = 10  # Increased for reliability
threshold = 0.15  # Higher threshold for clearer signals

for word, sents in word_sentiments.items():
    non_sarc = sents['non_sarc']
    sarc = sents['sarc']
    
    if len(non_sarc) >= min_count and len(sarc) >= min_count:
        expected_sent = np.mean(non_sarc)
        sarcastic_sent = np.mean(sarc)
        incongruity = abs(expected_sent - sarcastic_sent)
        
        if incongruity > threshold:
            commonsense_dict[word] = {
                'expected': expected_sent,
                'sarcastic': sarcastic_sent,
                'incongruity': incongruity,
                'flip': 1 if (expected_sent > 0 and sarcastic_sent < 0) or 
                             (expected_sent < 0 and sarcastic_sent > 0) else 0
            }

print(f"‚úÖ Built dictionary with {len(commonsense_dict)} incongruent words")

# Show some examples
if len(commonsense_dict) > 0:
    print("\nüìù Sample incongruent words:")
    sample_words = list(commonsense_dict.items())[:10]
    for word, info in sample_words:
        print(f"  '{word}': incongruity={info['incongruity']:.3f}, flip={info['flip']}")

## 6. Define Incongruity Feature Extraction Function

In [None]:
def extract_incongruity_features(text, max_len=32):
    """Extract token-level AND sentence-level incongruity features"""
    tokens = nltk.word_tokenize(text.lower())
    
    # Token-level features
    token_features = []
    incongruity_scores = []
    flip_count = 0
    
    for token in tokens[:max_len]:
        if token in commonsense_dict:
            info = commonsense_dict[token]
            incongruity = info['incongruity']
            flip = info['flip']
            expected = info['expected']
            sarcastic = info['sarcastic']
            
            incongruity_scores.append(incongruity)
            flip_count += flip
            
            # Multi-dimensional features per token
            token_features.append([
                incongruity,           # Magnitude of incongruity
                flip,                  # Sentiment flip indicator
                expected,              # Expected sentiment
                sarcastic,             # Sarcastic sentiment
                expected - sarcastic   # Signed difference
            ])
        else:
            token_features.append([0.0, 0.0, 0.0, 0.0, 0.0])
    
    # Pad or truncate
    while len(token_features) < max_len:
        token_features.append([0.0, 0.0, 0.0, 0.0, 0.0])
    token_features = token_features[:max_len]
    
    # Sentence-level aggregated features
    if incongruity_scores:
        mean_incong = np.mean(incongruity_scores)
        max_incong = np.max(incongruity_scores)
        std_incong = np.std(incongruity_scores)
        flip_ratio = flip_count / len(tokens)
    else:
        mean_incong = max_incong = std_incong = flip_ratio = 0.0
    
    sentence_features = [mean_incong, max_incong, std_incong, flip_ratio]
    
    return (torch.tensor(token_features, dtype=torch.float), 
            torch.tensor(sentence_features, dtype=torch.float))

## 7. Define IDL-RoBERTa Model Architecture

In [None]:
class IDLRobertaForSarcasm(RobertaPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.roberta = RobertaModel(config)
        
        self.token_feature_dim = 5  # 5 features per token
        self.sentence_feature_dim = 4  # 4 sentence-level features
        
        # Token-level attention with incongruity
        self.token_attention = nn.Sequential(
            nn.Linear(config.hidden_size + self.token_feature_dim, 256),
            nn.Tanh(),
            nn.Dropout(0.1),
            nn.Linear(256, 1)
        )
        
        # Incongruity feature processor
        self.feature_processor = nn.Sequential(
            nn.Linear(self.sentence_feature_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, 32)
        )
        
        # Final classifier combining everything
        self.classifier = nn.Sequential(
            nn.Linear(config.hidden_size + 32, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, 2)
        )
        
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, input_ids, attention_mask, token_features, sentence_features, 
                labels=None):
        # Get RoBERTa embeddings
        outputs = self.roberta(input_ids, attention_mask=attention_mask)
        sequence_output = outputs[0]  # [batch, seq_len, hidden]
        
        # Combine RoBERTa embeddings with token-level incongruity features
        combined = torch.cat([sequence_output, token_features], dim=-1)
        
        # Compute attention scores using incongruity-aware features
        attention_scores = self.token_attention(combined).squeeze(-1)
        attention_scores = attention_scores.masked_fill(attention_mask == 0, float('-inf'))
        attention_weights = torch.softmax(attention_scores, dim=-1)
        
        # Weighted context vector
        context = torch.bmm(attention_weights.unsqueeze(1), sequence_output).squeeze(1)
        context = self.dropout(context)
        
        # Process sentence-level incongruity features
        sentence_repr = self.feature_processor(sentence_features)
        
        # Combine context and incongruity features
        final_repr = torch.cat([context, sentence_repr], dim=-1)
        
        # Classification
        logits = self.classifier(final_repr)
        
        # Calculate loss
        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, 2), labels.view(-1))
        
        return {'loss': loss, 'logits': logits} if loss is not None else logits

## 8. Create Custom Dataset Class

In [None]:
class SarcasmDataset(Dataset):
    def __init__(self, data, tokenizer, max_len=32):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        text = item['headline']  # Changed from 'Tweet' to 'headline'
        label = item['is_sarcastic']
        
        # Tokenize
        encoding = self.tokenizer(
            text,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        # Extract incongruity features
        token_feats, sentence_feats = extract_incongruity_features(text, self.max_len)
        
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'token_features': token_feats,
            'sentence_features': sentence_feats,
            'labels': torch.tensor(label, dtype=torch.long)
        }

## 9. Prepare Training and Evaluation Datasets

In [None]:
print("\nüìä Preparing datasets...")
train_data = train_df.to_dict('records')
eval_data = test_df.to_dict('records')

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
train_dataset = SarcasmDataset(train_data, tokenizer)
eval_dataset = SarcasmDataset(eval_data, tokenizer)

print(f"‚úÖ Train dataset: {len(train_dataset)} samples")
print(f"‚úÖ Eval dataset: {len(eval_dataset)} samples")

## 10. Define Metrics Computation Function

In [None]:
def compute_metrics(pred):
    labels = pred.label_ids
    if isinstance(labels, tuple):
        labels = labels[0]
    preds = pred.predictions.argmax(-1)
    
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='binary')
    
    return {'accuracy': acc, 'f1': f1}

## 11. Configure Training Arguments

In [None]:
training_args = TrainingArguments(
    output_dir='/kaggle/working/idl_roberta_results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='/kaggle/working/logs',
    logging_steps=50,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    learning_rate=2e-5,
    report_to='none',
    fp16=True,
    gradient_accumulation_steps=2,  # Effective batch size: 32
    save_total_limit=2,
)

## 12. Initialize Model and Trainer

In [None]:
print("\nüöÄ Initializing IDL-RoBERTa model...")
model = IDLRobertaForSarcasm.from_pretrained('roberta-base')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(f"‚úÖ Model loaded on {device}")
print(f"üìä Total parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"üìä Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

## 13. Train the Model

In [None]:
print("\nüöÄ Starting training...")
trainer.train()

## 14. Evaluate and Generate Classification Report

In [None]:
print("\n" + "="*60)
print("üìä FINAL EVALUATION ON TEST SET")
print("="*60)
results = trainer.evaluate()

for key, value in results.items():
    print(f"{key}: {value:.4f}")

# Get predictions for detailed analysis
predictions = trainer.predict(eval_dataset)
preds = predictions.predictions.argmax(-1)
true_labels = predictions.label_ids

print("\n" + "="*60)
print("üìä DETAILED CLASSIFICATION REPORT")
print("="*60)
print(classification_report(true_labels, preds, 
                          target_names=['Non-Sarcastic', 'Sarcastic'],
                          digits=4))

print("\n‚úÖ Training complete!")
print(f"üíæ Best model saved to: {training_args.output_dir}")

In [None]:
# Save detailed results
results_file = '/kaggle/working/final_results.txt'
with open(results_file, 'w') as f:
    f.write("="*70 + "\n")
    f.write("IDL-ROBERTA SARCASM DETECTION RESULTS\n")
    f.write("="*70 + "\n\n")
    f.write(f"Dataset: Sarcasm Headlines Dataset v2\n")
    f.write(f"Total samples: {len(df)}\n")
    f.write(f"Train samples: {len(train_df)}\n")
    f.write(f"Test samples: {len(test_df)}\n\n")
    
    f.write("Evaluation Metrics:\n")
    for key, value in results.items():
        f.write(f"  {key}: {value:.4f}\n")
    
    f.write("\n" + "="*70 + "\n")
    f.write("DETAILED CLASSIFICATION REPORT\n")
    f.write("="*70 + "\n")
    f.write(classification_report(true_labels, preds, 
                                 target_names=['Non-Sarcastic', 'Sarcastic'],
                                 digits=4))

print(f"\nüìÑ Results saved to: {results_file}")

# Display some example predictions
print("\n" + "="*70)
print("üìù SAMPLE PREDICTIONS")
print("="*70)
sample_indices = np.random.choice(len(test_df), 10, replace=False)
for idx in sample_indices:
    headline = test_df.iloc[idx]['headline']
    true_label = test_df.iloc[idx]['is_sarcastic']
    pred_label = preds[idx]
    
    emoji = "‚úÖ" if true_label == pred_label else "‚ùå"
    true_str = "Sarcastic" if true_label == 1 else "Non-Sarcastic"
    pred_str = "Sarcastic" if pred_label == 1 else "Non-Sarcastic"
    
    print(f"\n{emoji} Headline: {headline}")
    print(f"   True: {true_str} | Predicted: {pred_str}")

## 15. Save Results to File