In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [None]:
df=pd.read_csv('/content/drive/MyDrive/spam.csv',encoding='latin-1')

In [None]:
df.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [None]:
df.columns

Index(['v1', 'v2', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')

In [None]:
df = df[['v1', 'v2']]

In [None]:
df.head()

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [None]:
df.columns = ['label', 'message']

In [None]:
df = df.dropna()

In [None]:
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

In [None]:
print(df['label'].value_counts())

label
0    4825
1     747
Name: count, dtype: int64


In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['message'])
y = df['label']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

In [None]:
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

In [None]:
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

In [None]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

In [None]:
print("Naive Bayes:")
evaluate_model(nb_model, X_test, y_test)
print("Logistic Regression:")
evaluate_model(lr_model, X_test, y_test)
print("SVM:")
evaluate_model(svm_model, X_test, y_test)

Naive Bayes:
[[965   0]
 [ 42 108]]
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.72      0.84       150

    accuracy                           0.96      1115
   macro avg       0.98      0.86      0.91      1115
weighted avg       0.96      0.96      0.96      1115

Logistic Regression:
[[965   0]
 [ 42 108]]
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.72      0.84       150

    accuracy                           0.96      1115
   macro avg       0.98      0.86      0.91      1115
weighted avg       0.96      0.96      0.96      1115

SVM:
[[963   2]
 [ 21 129]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       965
           1       0.98      0.86      0.92       150

    accuracy                           0.98      1115
   macro avg       0

In [None]:
joblib.dump(lr_model, 'spam_classifier.pkl')

['spam_classifier.pkl']

In [None]:
loaded_model = joblib.load('spam_classifier.pkl')

In [None]:
sample_message = ["You have won $60000 from rummycircle"]
sample_features = vectorizer.transform(sample_message)
prediction = loaded_model.predict(sample_features)
print("Prediction:", "Spam" if prediction[0] == 1 else "Ham")

Prediction: Spam
