In [None]:
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import tkinter as tk
from tkinter import messagebox

# Load and preprocess dataset
df = pd.read_csv("enron_spam_data.csv")

# Normalize labels
df['Spam/Ham'] = df['Spam/Ham'].astype(str).str.lower().str.strip()
df['Category'] = df['Spam/Ham'].map({'spam': 0, 'ham': 1})

# Keep only valid rows
data = df[['Message', 'Category']].dropna()

# Function to clean text
def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", " ", text)   # remove links
    text = re.sub(r"[^a-z\s]", " ", text)        # keep only letters
    text = re.sub(r"\s+", " ", text)             # normalize spaces
    return text.strip()

# Apply cleaning
data['Message'] = data['Message'].apply(clean_text)

X = data['Message']
Y = data['Category']

# Train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)

# Feature extraction with TF-IDF
feature_extraction = TfidfVectorizer(
    min_df=5,          # ignore very rare words
    max_df=0.7,        # ignore very common words
    ngram_range=(1,2), # include unigrams + bigrams
    stop_words='english',
    lowercase=True
)

X_train_features = feature_extraction.fit_transform(X_train)
X_test_features = feature_extraction.transform(X_test)

# Train the model
model = MultinomialNB()
model.fit(X_train_features, Y_train)

# Evaluate
predictions = model.predict(X_test_features)
print("Model Accuracy:", accuracy_score(Y_test, predictions))

# Prediction function
def predict_spam(input_message):
    cleaned_message = clean_text(input_message)
    input_data_features = feature_extraction.transform([cleaned_message])
    prediction = model.predict(input_data_features)
    return 'Spam' if prediction[0] == 0 else 'Ham'

# GUI
def on_predict():
    input_message = entry.get()
    if input_message:
        prediction = predict_spam(input_message)
        messagebox.showinfo("Prediction", f"The message is: {prediction}")
    else:
        messagebox.showwarning("Input Error", "Please enter a message.")

root = tk.Tk()
root.title("Spam Detection")

# Label
label = tk.Label(root, text="Enter your message:")
label.pack(pady=10)

# Entry box
entry = tk.Entry(root, width=50)
entry.pack(pady=10)

# Predict button
predict_button = tk.Button(root, text="Predict", command=on_predict)
predict_button.pack(pady=20)

# Run GUI
root.mainloop()
