# Setup & Dependencies

In [None]:
!python.exe -m pip install --upgrade pip
!pip3 install vaderSentiment pandas numpy matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings('ignore')

In [None]:
# Reading the data
df = pd.read_csv('../.res/IMDb_Dataset.csv')
print(df.shape)
df.head(10)

In [None]:
# Convert labels to binary (same as logistic regression notebook)
df.rename(columns={'sentiment': 'label'}, inplace=True)
df.label = df.label.apply(lambda x: 1 if x == 'positive' else 0)

In [None]:
# Initialize VADER
analyzer = SentimentIntensityAnalyzer()

# Function to convert VADER scores to binary sentiment
def get_vader_sentiment(text):
    scores = analyzer.polarity_scores(text)
    return 1 if scores['compound'] >= 0 else 0

# Apply VADER analysis
df['vader_prediction'] = df['review'].apply(get_vader_sentiment)

In [None]:
# Split data the same way as logistic regression for fair comparison
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# Calculate metrics
test_predictions = test_data['vader_prediction']
test_labels = test_data['label']

# Print accuracy and classification report
print("VADER Sentiment Analysis Accuracy:", accuracy_score(test_labels, test_predictions))
print("\nClassification Report:\n", classification_report(test_labels, test_predictions))

In [None]:
# Create the same style of bar plot as in the logistic regression notebook
report = classification_report(test_labels, test_predictions, output_dict=True)

categories = list(report.keys())[:2]
precision = [report[cat]['precision'] for cat in categories]
recall = [report[cat]['recall'] for cat in categories]
f1_score = [report[cat]['f1-score'] for cat in categories]

x = np.arange(len(categories))
width = 0.25

fig, ax = plt.subplots(figsize=(8, 5))

bars1 = ax.bar(x - width, precision, width, label='Precision', color='skyblue')
bars2 = ax.bar(x, recall, width, label='Recall', color='orange')
bars3 = ax.bar(x + width, f1_score, width, label='F1 Score', color='green')

ax.set_xlabel('Sentiment')
ax.set_ylabel('Scores')
ax.set_title('VADER: Precision, Recall, and F1 Scores by Sentiment')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()

for bars in [bars1, bars2, bars3]:
    ax.bar_label(bars, fmt='%.2f', padding=3)

plt.tight_layout()
plt.show()

In [None]:
# Create confusion matrix
conf_matrix = confusion_matrix(test_labels, test_predictions)

plt.figure(figsize=(6, 4))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.title("VADER: Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()