In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# 1. Load Dataset
df = pd.read_csv("mail_l7_dataset.csv")

# 2. Preprocessing
df["Message"] = df["Message"].fillna("")
df["Category"] = df["Category"].map({"spam": 0, "ham": 1})

X = df["Message"]
y = df["Category"]

# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 4. TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# 5. Train Models
lr = LogisticRegression()
lr.fit(X_train_tfidf, y_train)

rf = RandomForestClassifier()
rf.fit(X_train_tfidf, y_train)

nb = MultinomialNB()
nb.fit(X_train_tfidf, y_train)

# 6. Evaluation
models = {
    "Logistic Regression": lr,
    "Random Forest": rf,
    "Naive Bayes": nb
}

print("===== MODEL PERFORMANCE =====")

for name, model in models.items():
    y_pred = model.predict(X_test_tfidf)

    print(f"\n{name}")
    print("Accuracy :", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall   :", recall_score(y_test, y_pred))
    print("F1-Score :", f1_score(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# 7. Single Message Predictions
print("\n===== SINGLE MESSAGE TEST =====")

test_messages = [
    "Free entry in a weekly competition",
    "I will call you later today",
    "Congratulations! You won a prize"
]

for message in test_messages:
    message_tfidf = vectorizer.transform([message])
    print("\nMessage:", message)

    for name, model in models.items():
        prediction = model.predict(message_tfidf)[0]
        label = "Ham" if prediction == 1 else "Spam"
        print(f"{name}: {label}")