# Evaluation of Naive Bayes Model

## Objective
Evaluate the Naive Bayes classifier trained on TF-IDF features:
- Accuracy
- Precision
- Recall
- F1-score
- Confusion Matrix
- Interpretation of results

In [None]:
# Step 1: Import Libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix)

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Step 2: Load dataset

df = pd.read_csv("data/ccleaned_imdb_reviews.csv")

X = df["cleaned_review"]
Y = df["label"]

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

In [None]:
# Step 3: TF-IDF encoding

tfidf = TfidfVectorizer(max_features=5000, stop_words="english")
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

print("TF-IDF Train shape:", X_train_tfidf.shape)
print("TF-IDF Test shape:", X_test_tfidf.shape)


In [None]:
# Step 4: Load & train Naive Bayes model

nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, Y_train)

Y_pred = nb_model.predict(X_test_tfidf)

In [None]:
# Step 5: Evaluation metrics

acc = accuracy_score(Y_test, Y_pred)
print("Accuracy:", round(acc, 4))

print("\nClassification Report:\n", classification_report(Y_test, Y_pred))

In [None]:
 # Step 6: Confusion Matrix
cm = confusion_matrix(Y_test, Y_pred)

plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Negative", "Positive"],
            yticklabels=["Negative", "Positive"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix - Naive Bayes")
plt.show()
