In [9]:
#Step 1: Set Up Your Environment
pip install pandas scikit-learn matplotlib numpy




In [2]:
#Load and Explore the Dataset
import pandas as pd

df = pd.read_csv('spam.csv', encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']
print(df.head())


  label                                            message
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...


In [3]:
#Preprocess the Text Data
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

X = df['message']
y = df['label'].map({'ham': 0, 'spam': 1})  # Convert to binary

cv = CountVectorizer()
X = cv.fit_transform(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
#Train a Machine Learning Model
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

model = MultinomialNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.97847533632287
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       965
           1       0.91      0.93      0.92       150

    accuracy                           0.98      1115
   macro avg       0.95      0.96      0.95      1115
weighted avg       0.98      0.98      0.98      1115



In [5]:
#Test with Your Own Emails
sample = ["Congratulations! You've won a free iPhone. Click here."]
sample_vec = cv.transform(sample)
print("Prediction:", model.predict(sample_vec))  # 1 for spam, 0 for ham


Prediction: [1]


In [10]:
#Test with Your Own Emails
sample_emails = [
    "Your account has been suspended due to suspicious activity. Click here to verify your details.",
    "Hey, are we still on for the meeting tomorrow at 10am?",
    "Win a brand new car! Reply now to claim your prize.",
    "Please find attached the invoice for your recent purchase.",
    "URGENT! You have won $10,000 in our lucky draw. Claim now!",
    "Let's catch up over coffee this weekend!",
    "Get cheap medicines at 90% off without prescription.",
    "Hi Mom, just wanted to check in and see how you're doing.",
    "Congratulations, you are selected for a free vacation to the Bahamas!",
    "Reminder: Your electricity bill is due next week.",
]


In [8]:
#Test with Your Own Emails
for email in sample_emails:
    sample_vec = cv.transform([email])
    prediction = model.predict(sample_vec)[0]
    label = "Spam" if prediction == 1 else "not spam"
    print(f"Email: {email}\nPrediction: {label}\n")


Email: Your account has been suspended due to suspicious activity. Click here to verify your details.
Prediction: Spam

Email: Hey, are we still on for the meeting tomorrow at 10am?
Prediction: not spam

Email: Win a brand new car! Reply now to claim your prize.
Prediction: Spam

Email: Please find attached the invoice for your recent purchase.
Prediction: Spam

Email: URGENT! You have won $10,000 in our lucky draw. Claim now!
Prediction: Spam

Email: Let's catch up over coffee this weekend!
Prediction: not spam

Email: Get cheap medicines at 90% off without prescription.
Prediction: not spam

Email: Hi Mom, just wanted to check in and see how you're doing.
Prediction: not spam

Email: Congratulations, you are selected for a free vacation to the Bahamas!
Prediction: Spam

Email: Reminder: Your electricity bill is due next week.
Prediction: Spam

