# Sentiment Analysis with VADER & TextBlob

This notebook implements rule-based sentiment analysis using:
- **VADER** - Valence Aware Dictionary and sEntiment Reasoner, optimized for social media
- **TextBlob** - Lexicon-based sentiment analysis library

Performance evaluation is conducted against ground truth labels.

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings

# Sentiment Analysis libraries
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

# Model evaluation
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score,
    confusion_matrix,
    classification_report,
    roc_auc_score,
    roc_curve
)

warnings.filterwarnings('ignore')
tqdm.pandas()
plt.style.use('seaborn-v0_8-darkgrid')

print('Libraries imported successfully!')

## 2. Load Preprocessed Data

Loading the cleaned dataset and creating a balanced sample for analysis.

In [None]:
# Load the cleaned dataset
df = pd.read_csv('dataset/cleaned_tweets.csv')

print(f'Dataset loaded: {len(df):,} tweets')
print(f'\nColumns: {list(df.columns)}')
print(f'\nSentiment Distribution:')
print(df['sentiment_label'].value_counts())

df.head()

## 3. Create Sample Dataset

For faster testing and comparison, we'll create a balanced sample of 100,000 tweets.

In [None]:
# Create a balanced sample
SAMPLE_SIZE = 50000  # 50k tweets per sentiment (100k total)

df_sample = pd.concat([
    df[df['sentiment'] == 0].sample(n=SAMPLE_SIZE, random_state=42),
    df[df['sentiment'] == 1].sample(n=SAMPLE_SIZE, random_state=42)
]).sample(frac=1, random_state=42).reset_index(drop=True)

print(f'Sample created: {len(df_sample):,} tweets')
print(f'\nSample Sentiment Distribution:')
print(df_sample['sentiment_label'].value_counts())
print(f'\nBalance: {(df_sample["sentiment_label"].value_counts() / len(df_sample) * 100).round(2).to_dict()}')

## 4. VADER Sentiment Analysis

VADER is specifically designed for social media text and provides:
- Positive score
- Negative score  
- Neutral score
- Compound score (normalized between -1 to +1)

In [None]:
# Initialize VADER
vader_analyzer = SentimentIntensityAnalyzer()

def get_vader_sentiment(text):
    """
    Get VADER sentiment scores for text.
    Returns compound score and classification.
    """
    scores = vader_analyzer.polarity_scores(text)
    
    # Classification based on compound score
    if scores['compound'] >= 0.05:
        sentiment = 1  # Positive
    elif scores['compound'] <= -0.05:
        sentiment = 0  # Negative
    else:
        # For neutral, classify based on which is higher: pos or neg
        sentiment = 1 if scores['pos'] > scores['neg'] else 0
    
    return scores['compound'], sentiment

print('VADER analyzer initialized!')

### 4.1 Apply VADER to Sample Data

In [None]:
print('Analyzing sentiment with VADER...')
print('This may take a few minutes...')

# Apply VADER to original text (works better with original text than cleaned)
vader_results = df_sample['text_original'].progress_apply(get_vader_sentiment)
df_sample['vader_compound'] = vader_results.apply(lambda x: x[0])
df_sample['vader_sentiment'] = vader_results.apply(lambda x: x[1])
df_sample['vader_label'] = df_sample['vader_sentiment'].map({0: 'Negative', 1: 'Positive'})

print('\nVADER analysis completed!')

### 4.2 VADER Results Preview

In [None]:
# Display sample results
print('Sample VADER Results:')
print('='*100)

sample_display = df_sample[['text_original', 'sentiment_label', 'vader_compound', 'vader_label']].head(10)
for idx, row in sample_display.iterrows():
    match = 'Match' if row['sentiment_label'] == row['vader_label'] else 'Mismatch'
    print(f"\n[{match}] Tweet: {row['text_original'][:80]}...")
    print(f"   True: {row['sentiment_label']:8s} | VADER: {row['vader_label']:8s} (Score: {row['vader_compound']:.3f})")

## 5. TextBlob Sentiment Analysis

TextBlob provides:
- Polarity: Float from -1 (negative) to +1 (positive)
- Subjectivity: Float from 0 (objective) to 1 (subjective)

In [None]:
def get_textblob_sentiment(text):
    """
    Get TextBlob sentiment for text.
    Returns polarity and classification.
    """
    try:
        blob = TextBlob(text)
        polarity = blob.sentiment.polarity
        
        # Classification based on polarity
        sentiment = 1 if polarity > 0 else 0
        
        return polarity, sentiment
    except:
        return 0.0, 0

print('TextBlob analyzer ready!')

### 5.1 Apply TextBlob to Sample Data

In [None]:
print('Analyzing sentiment with TextBlob...')
print('This may take a few minutes...')

# Apply TextBlob to original text
textblob_results = df_sample['text_original'].progress_apply(get_textblob_sentiment)
df_sample['textblob_polarity'] = textblob_results.apply(lambda x: x[0])
df_sample['textblob_sentiment'] = textblob_results.apply(lambda x: x[1])
df_sample['textblob_label'] = df_sample['textblob_sentiment'].map({0: 'Negative', 1: 'Positive'})

print('\nTextBlob analysis completed!')

### 5.2 TextBlob Results Preview

In [None]:
# Display sample results
print('Sample TextBlob Results:')
print('='*100)

sample_display = df_sample[['text_original', 'sentiment_label', 'textblob_polarity', 'textblob_label']].head(10)
for idx, row in sample_display.iterrows():
    match = 'Match' if row['sentiment_label'] == row['textblob_label'] else 'Mismatch'
    print(f"\n[{match}] Tweet: {row['text_original'][:80]}...")
    print(f"   True: {row['sentiment_label']:8s} | TextBlob: {row['textblob_label']:8s} (Polarity: {row['textblob_polarity']:.3f})")

## 6. Model Evaluation

Let's evaluate both models using various metrics.

### 6.1 VADER Performance Metrics

In [None]:
# Calculate metrics for VADER
y_true = df_sample['sentiment']
y_pred_vader = df_sample['vader_sentiment']

vader_accuracy = accuracy_score(y_true, y_pred_vader)
vader_precision = precision_score(y_true, y_pred_vader)
vader_recall = recall_score(y_true, y_pred_vader)
vader_f1 = f1_score(y_true, y_pred_vader)
vader_roc_auc = roc_auc_score(y_true, df_sample['vader_compound'])

print('VADER SENTIMENT ANALYZER - PERFORMANCE METRICS')
print('='*70)
print(f'\nAccuracy:  {vader_accuracy:.4f} ({vader_accuracy*100:.2f}%)')
print(f'Precision: {vader_precision:.4f}')
print(f'Recall:    {vader_recall:.4f}')
print(f'F1-Score:  {vader_f1:.4f}')
print(f'ROC-AUC:   {vader_roc_auc:.4f}')
print('\n' + '='*70)

In [None]:
# Detailed classification report for VADER
print('\nVADER - Detailed Classification Report:')
print(classification_report(y_true, y_pred_vader, target_names=['Negative', 'Positive']))

### 6.2 TextBlob Performance Metrics

In [None]:
# Calculate metrics for TextBlob
y_pred_textblob = df_sample['textblob_sentiment']

textblob_accuracy = accuracy_score(y_true, y_pred_textblob)
textblob_precision = precision_score(y_true, y_pred_textblob)
textblob_recall = recall_score(y_true, y_pred_textblob)
textblob_f1 = f1_score(y_true, y_pred_textblob)
textblob_roc_auc = roc_auc_score(y_true, df_sample['textblob_polarity'])

print('TEXTBLOB SENTIMENT ANALYZER - PERFORMANCE METRICS')
print('='*70)
print(f'\nAccuracy:  {textblob_accuracy:.4f} ({textblob_accuracy*100:.2f}%)')
print(f'Precision: {textblob_precision:.4f}')
print(f'Recall:    {textblob_recall:.4f}')
print(f'F1-Score:  {textblob_f1:.4f}')
print(f'ROC-AUC:   {textblob_roc_auc:.4f}')
print('\n' + '='*70)

In [None]:
# Detailed classification report for TextBlob
print('\nTextBlob - Detailed Classification Report:')
print(classification_report(y_true, y_pred_textblob, target_names=['Negative', 'Positive']))

## 7. Confusion Matrices

In [None]:
# Plot confusion matrices
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# VADER Confusion Matrix
cm_vader = confusion_matrix(y_true, y_pred_vader)
sns.heatmap(cm_vader, annot=True, fmt='d', cmap='Blues', ax=axes[0],
            xticklabels=['Negative', 'Positive'],
            yticklabels=['Negative', 'Positive'])
axes[0].set_title(f'VADER Confusion Matrix\nAccuracy: {vader_accuracy:.2%}', 
                  fontsize=14, fontweight='bold')
axes[0].set_ylabel('True Label', fontsize=12)
axes[0].set_xlabel('Predicted Label', fontsize=12)

# TextBlob Confusion Matrix
cm_textblob = confusion_matrix(y_true, y_pred_textblob)
sns.heatmap(cm_textblob, annot=True, fmt='d', cmap='Greens', ax=axes[1],
            xticklabels=['Negative', 'Positive'],
            yticklabels=['Negative', 'Positive'])
axes[1].set_title(f'TextBlob Confusion Matrix\nAccuracy: {textblob_accuracy:.2%}', 
                  fontsize=14, fontweight='bold')
axes[1].set_ylabel('True Label', fontsize=12)
axes[1].set_xlabel('Predicted Label', fontsize=12)

plt.tight_layout()
plt.show()

## 8. ROC Curves

In [None]:
# Calculate ROC curves
fpr_vader, tpr_vader, _ = roc_curve(y_true, df_sample['vader_compound'])
fpr_textblob, tpr_textblob, _ = roc_curve(y_true, df_sample['textblob_polarity'])

# Plot ROC curves
plt.figure(figsize=(10, 6))
plt.plot(fpr_vader, tpr_vader, label=f'VADER (AUC = {vader_roc_auc:.4f})', linewidth=2)
plt.plot(fpr_textblob, tpr_textblob, label=f'TextBlob (AUC = {textblob_roc_auc:.4f})', linewidth=2)
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier', linewidth=1)

plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('ROC Curves - Rule-Based Sentiment Analysis', fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

## 9. Model Comparison

In [None]:
# Create comparison DataFrame
comparison_df = pd.DataFrame({
    'Model': ['VADER', 'TextBlob'],
    'Accuracy': [vader_accuracy, textblob_accuracy],
    'Precision': [vader_precision, textblob_precision],
    'Recall': [vader_recall, textblob_recall],
    'F1-Score': [vader_f1, textblob_f1],
    'ROC-AUC': [vader_roc_auc, textblob_roc_auc]
})

print('\nMODEL COMPARISON - RULE-BASED APPROACHES')
print('='*80)
print(comparison_df.to_string(index=False))
print('='*80)

# Determine best model
best_model = comparison_df.loc[comparison_df['F1-Score'].idxmax(), 'Model']
print(f'\nBest Performing Model: {best_model}')

In [None]:
# Visualize comparison
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC']
vader_scores = [vader_accuracy, vader_precision, vader_recall, vader_f1, vader_roc_auc]
textblob_scores = [textblob_accuracy, textblob_precision, textblob_recall, textblob_f1, textblob_roc_auc]

x = np.arange(len(metrics))
width = 0.35

fig, ax = plt.subplots(figsize=(12, 6))
bars1 = ax.bar(x - width/2, vader_scores, width, label='VADER', color='skyblue', edgecolor='black')
bars2 = ax.bar(x + width/2, textblob_scores, width, label='TextBlob', color='lightcoral', edgecolor='black')

ax.set_xlabel('Metrics', fontsize=12)
ax.set_ylabel('Score', fontsize=12)
ax.set_title('Performance Comparison: VADER vs TextBlob', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(metrics)
ax.legend(fontsize=11)
ax.set_ylim(0, 1)
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}',
                ha='center', va='bottom', fontsize=9)

plt.tight_layout()
plt.show()

## 10. Sentiment Score Distributions

In [None]:
# Plot sentiment score distributions
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# VADER - Negative vs Positive
df_sample[df_sample['sentiment'] == 0]['vader_compound'].hist(
    bins=50, ax=axes[0, 0], color='salmon', alpha=0.7, edgecolor='black'
)
axes[0, 0].set_title('VADER Scores - True Negative Tweets', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('VADER Compound Score')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].axvline(0, color='red', linestyle='--', linewidth=2, label='Threshold')
axes[0, 0].legend()
axes[0, 0].grid(alpha=0.3)

df_sample[df_sample['sentiment'] == 1]['vader_compound'].hist(
    bins=50, ax=axes[0, 1], color='lightgreen', alpha=0.7, edgecolor='black'
)
axes[0, 1].set_title('VADER Scores - True Positive Tweets', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('VADER Compound Score')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].axvline(0, color='red', linestyle='--', linewidth=2, label='Threshold')
axes[0, 1].legend()
axes[0, 1].grid(alpha=0.3)

# TextBlob - Negative vs Positive
df_sample[df_sample['sentiment'] == 0]['textblob_polarity'].hist(
    bins=50, ax=axes[1, 0], color='salmon', alpha=0.7, edgecolor='black'
)
axes[1, 0].set_title('TextBlob Scores - True Negative Tweets', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('TextBlob Polarity Score')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].axvline(0, color='red', linestyle='--', linewidth=2, label='Threshold')
axes[1, 0].legend()
axes[1, 0].grid(alpha=0.3)

df_sample[df_sample['sentiment'] == 1]['textblob_polarity'].hist(
    bins=50, ax=axes[1, 1], color='lightgreen', alpha=0.7, edgecolor='black'
)
axes[1, 1].set_title('TextBlob Scores - True Positive Tweets', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('TextBlob Polarity Score')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].axvline(0, color='red', linestyle='--', linewidth=2, label='Threshold')
axes[1, 1].legend()
axes[1, 1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 11. Error Analysis

Let's examine some misclassified examples to understand where these models struggle.

In [None]:
# Find misclassified examples for VADER
vader_errors = df_sample[df_sample['sentiment'] != df_sample['vader_sentiment']].copy()
print(f'VADER Misclassifications: {len(vader_errors):,} ({len(vader_errors)/len(df_sample)*100:.2f}%)')

print('\nSample VADER Misclassifications:')
print('='*100)
for idx, row in vader_errors.head(10).iterrows():
    print(f"\nTweet: {row['text_original'][:100]}...")
    print(f"True: {row['sentiment_label']:8s} | Predicted: {row['vader_label']:8s} | Score: {row['vader_compound']:.3f}")

In [None]:
# Find misclassified examples for TextBlob
textblob_errors = df_sample[df_sample['sentiment'] != df_sample['textblob_sentiment']].copy()
print(f'TextBlob Misclassifications: {len(textblob_errors):,} ({len(textblob_errors)/len(df_sample)*100:.2f}%)')

print('\nSample TextBlob Misclassifications:')
print('='*100)
for idx, row in textblob_errors.head(10).iterrows():
    print(f"\nTweet: {row['text_original'][:100]}...")
    print(f"True: {row['sentiment_label']:8s} | Predicted: {row['textblob_label']:8s} | Score: {row['textblob_polarity']:.3f}")

## 12. Save Results

In [None]:
# Save sample with predictions
df_sample[[
    'text_original', 
    'sentiment', 
    'sentiment_label',
    'vader_compound', 
    'vader_sentiment', 
    'vader_label',
    'textblob_polarity', 
    'textblob_sentiment', 
    'textblob_label'
]].to_csv('dataset/rule_based_predictions.csv', index=False)

print('Predictions saved to: dataset/rule_based_predictions.csv')

In [None]:
# Save performance metrics
comparison_df.to_csv('dataset/rule_based_performance.csv', index=False)
print('Performance metrics saved to: dataset/rule_based_performance.csv')

## 13. Summary Report

In [None]:
print('\n' + '='*80)
print('RULE-BASED SENTIMENT ANALYSIS SUMMARY')
print('='*80)

print('\nDATASET:')
print(f'  - Sample size: {len(df_sample):,} tweets')
print(f'  - Negative tweets: {(df_sample["sentiment"] == 0).sum():,}')
print(f'  - Positive tweets: {(df_sample["sentiment"] == 1).sum():,}')

print('\nMODELS EVALUATED:')
print('  1. VADER Sentiment Analyzer')
print('  2. TextBlob Sentiment Analyzer')

print('\nVADER PERFORMANCE:')
print(f'  - Accuracy:  {vader_accuracy:.4f} ({vader_accuracy*100:.2f}%)')
print(f'  - Precision: {vader_precision:.4f}')
print(f'  - Recall:    {vader_recall:.4f}')
print(f'  - F1-Score:  {vader_f1:.4f}')
print(f'  - ROC-AUC:   {vader_roc_auc:.4f}')

print('\nTEXTBLOB PERFORMANCE:')
print(f'  - Accuracy:  {textblob_accuracy:.4f} ({textblob_accuracy*100:.2f}%)')
print(f'  - Precision: {textblob_precision:.4f}')
print(f'  - Recall:    {textblob_recall:.4f}')
print(f'  - F1-Score:  {textblob_f1:.4f}')
print(f'  - ROC-AUC:   {textblob_roc_auc:.4f}')

print(f'\nBEST MODEL: {best_model}')
print(f'  - Best F1-Score: {comparison_df["F1-Score"].max():.4f}')

print('\nKEY INSIGHTS:')
print('  - VADER performs better on social media text (designed for it)')
print('  - Both models struggle with sarcasm and context')
print('  - Rule-based approaches are fast but have accuracy limitations')

print('\nOUTPUT FILES:')
print('  - dataset/rule_based_predictions.csv')
print('  - dataset/rule_based_performance.csv')

print('\nAnalysis complete!')
print('='*80)