In [4]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [5]:
# Sample spam dataset (simplified version)
data = {
    'text': [
        'Congratulations! You have won a $1,000 Walmart gift card. Go to http://bit.ly/123456 to claim now.',
        'Hey, are we still meeting for lunch tomorrow?',
        'Urgent! Your account has been suspended. Click here to fix it.',
        'Can you send me the project files before EOD?',
        'You have been selected for a free cruise to the Bahamas!',
        'Let’s catch up over coffee this weekend.',
        'Win money instantly by clicking this link!',
        'Are you free for a Zoom call today at 3 PM?',
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0]  # 1 = spam, 0 = not spam
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,text,label
0,"Congratulations! You have won a $1,000 Walmart...",1
1,"Hey, are we still meeting for lunch tomorrow?",0
2,Urgent! Your account has been suspended. Click...,1
3,Can you send me the project files before EOD?,0
4,You have been selected for a free cruise to th...,1


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.3, random_state=42)

# Convert text to numerical features using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [7]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Train the model
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Make predictions
y_pred = model.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.3333333333333333

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.33      1.00      0.50         1

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [9]:
import pandas as pd

# Make sure the CSV name matches exactly
df = pd.read_csv("spam.csv", encoding='latin-1')

# Show the first few rows
df.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [10]:
# Keep only relevant columns
df = df[['v1', 'v2']]
df.columns = ['label', 'message']

# Convert labels to binary: spam = 1, ham = 0
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

df.head()

Unnamed: 0,label,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [11]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score

# Convert text to numerical features
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['message'])
y = df['label']

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.979372197309417

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       965
           1       0.92      0.93      0.92       150

    accuracy                           0.98      1115
   macro avg       0.95      0.96      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [12]:
import joblib

# Save the trained model
joblib.dump(model, "spam_classifier_model.pkl")

# Save the TF-IDF vectorizer
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

['tfidf_vectorizer.pkl']

In [13]:
import os

[p for p in os.listdir() if p.endswith(".pkl")]

['spam_classifier_model.pkl', 'tfidf_vectorizer.pkl']

In [14]:
# Load again if needed
import joblib

model = joblib.load("spam_classifier_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Your message to test
sample_message = ["Congratulations! You've won a free iPhone. Click here."]
sample_vector = vectorizer.transform(sample_message)
prediction = model.predict(sample_vector)

print("Spam" if prediction[0] == 1 else "Ham")

Spam


In [15]:
import streamlit as st
import joblib

# Load model and vectorizer
model = joblib.load("spam_classifier_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

st.title("📨 Spam Message Classifier")

msg = st.text_area("Enter your message:")

if st.button("Check"):
    if msg.strip():
        vec = vectorizer.transform([msg])
        result = model.predict(vec)
        st.success("🔴 Spam" if result[0] == 1 else "🟢 Ham")
    else:
        st.warning("Please enter a message.")

2025-06-14 17:22:24.459 
  command:

    streamlit run c:\Users\shara\AppData\Local\Programs\Python\Python313\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-06-14 17:22:24.464 Session state does not function when running a script without `streamlit run`
