In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [17]:
data = {
    'message': [
        # Original messages (8)
        "Congratulations! You've won a free ticket to Bahamas. Call now!",
        "Hey, are we still meeting for dinner?",
        "URGENT! Your account has been suspended. Verify immediately.",
        "Don't forget the meeting tomorrow at 10AM.",
        "Win $1000 now!!! Click here to claim your prize.",
        "Can you send me the report by tonight?",
        "Get cheap loans now!!! No credit check!",
        "Let's catch up this weekend.",

        # New Spam Messages (10)
        "Limited time offer! 50% off all products. Buy now!",
        "You’ve been selected for a $500 Walmart gift card. Reply YES!",
        "ALERT: Unusual login detected. Secure your account NOW!",
        "Hot singles in your area! Click to chat instantly.",
        "Your Netflix subscription expired. Update payment info here.",
        "FREE iPhone 15 Pro if you complete our survey!",
        "Warning: Your PC is infected. Download antivirus ASAP!",
        "Exclusive deal: Rolex watches for 90% off. Limited stock!",
        "Your tax refund is pending. Confirm your details to claim.",
        "Last chance: Claim your prize before midnight!",

        # New Ham Messages (10)
        "Mom: Can you pick up milk on your way home?",
        "Team, the project deadline is extended to Friday.",
        "Thanks for your help earlier. Really appreciate it!",
        "Are you free for a quick call at 3PM today?",
        "Reminder: Dentist appointment tomorrow at 2PM.",
        "I’ll be late for dinner. Traffic is terrible.",
        "Did you see the email I sent about the budget report?",
        "Happy birthday! Let’s celebrate this weekend.",
        "The Wi-Fi password is ‘HomeNetwork123’.",
        "Can we reschedule our meeting to next Tuesday?"
    ],
    'label': [
        'spam', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'ham',
        'spam', 'spam', 'spam', 'spam', 'spam', 'spam', 'spam', 'spam', 'spam', 'spam',
        'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham', 'ham'
    ]
}

df = pd.DataFrame(data)

In [18]:
df['label']=df['label'].map({'ham':0,'spam':1})

X_train,X_test,y_train,y_test=train_test_split(df['message'],df['label'],test_size=0.25,random_state=42)

vectorizer=CountVectorizer()
X_train_vec=vectorizer.fit_transform(X_train)
X_test_vec=vectorizer.transform(X_test)

In [19]:
model=LogisticRegression()
model.fit(X_train_vec,y_train)

y_pred=model.predict(X_test_vec)


In [20]:
print(y_pred)

[0 0 1 0 1 1 1]


In [25]:
#Accuracy
acc=accuracy_score(y_test,y_pred)
print(f"\nAccuracy: {acc:.2f}")

print("\nconfusion_matrix: \n",confusion_matrix(y_test,y_pred))

#Detailed classification report 
print("\nClassification Report: ")
print(classification_report(y_test,y_pred))


Accuracy: 0.86

confusion_matrix: 
 [[2 0]
 [1 4]]

Classification Report: 
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         2
           1       1.00      0.80      0.89         5

    accuracy                           0.86         7
   macro avg       0.83      0.90      0.84         7
weighted avg       0.90      0.86      0.86         7



In [29]:
new_message=["Hot singles in your area! Click to chat instantly."]
new_message_vec = vectorizer.transform(new_message)
prediction=model.predict(new_message_vec)
print("\nNew message prediction: ","Spam" if prediction[0]==1 else "Ham")


New message prediction:  Spam


In [32]:
new_message=input("Enter mail: ")
new_message=list(new_message)
newz_message_vec = vectorizer.transform(new_message)
prediction=model.predict(new_message_vec)
print("\nNew message prediction: ","Spam" if prediction[0]==1 else "Ham")


New message prediction:  Ham
