In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib

# -----------------------------------------
# 1. Load the dataset
# -----------------------------------------
df = pd.read_csv('mail_data.csv')

# Replace null values with empty string
data = df.where((pd.notnull(df)), '')

# Separate input text and labels
X = data['Message']
Y = data['Category']

# -----------------------------------------
# 2. Train-Test Split
# -----------------------------------------
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y,
    test_size=0.2,
    random_state=3
)

# -----------------------------------------
# 3. Text â†’ TF-IDF Vectorization
# -----------------------------------------
feature_extraction = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)

X_train_features = feature_extraction.fit_transform(X_train)
X_test_features = feature_extraction.transform(X_test)

Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')

# -----------------------------------------
# 4. Train the Model
# -----------------------------------------
model = LogisticRegression()
model.fit(X_train_features, Y_train)

# -----------------------------------------
# 5. Check accuracy
# -----------------------------------------
# Training accuracy
train_predictions = model.predict(X_train_features)
train_accuracy = accuracy_score(Y_train, train_predictions)
print("Training Accuracy:", train_accuracy)

# Testing accuracy
test_predictions = model.predict(X_test_features)
test_accuracy = accuracy_score(Y_test, test_predictions)
print("Testing Accuracy:", test_accuracy)

# -----------------------------------------
# 6. Save the model + TF-IDF vectorizer
# -----------------------------------------
joblib.dump(model, 'spam_classifier_model.joblib')
joblib.dump(feature_extraction, 'tfidf_vectorizer.joblib')

print("Model and Vectorizer saved successfully!")
