In [34]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import pickle

In [35]:
data = {
    "text": [
        # --- 100 SPAM messages ---
        "Win a free iPhone now!",
        "Congratulations! You won a prize.",
        "Claim your cash reward immediately.",
        "You’ve been selected for a free gift card.",
        "Limited time offer, act now!",
        "Get rich quick with this method.",
        "Earn extra cash from home.",
        "Click here to claim your reward.",
        "Hot singles in your area waiting!",
        "Free trial, no credit card required.",
        "Winner! You have been chosen.",
        "Unlock your full potential now.",
        "This deal ends today!",
        "You are a lucky winner!",
        "Buy one, get one free!",
        "Instant loan approval guaranteed.",
        "Pre-approved credit card offer.",
        "Exclusive offer for you!",
        "Get free bitcoins instantly.",
        "Your chance to win big money!",
        "Win a trip to Dubai!",
        "Free membership registration today.",
        "Receive your gift by clicking here.",
        "This is your last chance!",
        "Win Rs. 10,000 today.",
        "Free movie tickets available!",
        "Call now to claim your offer.",
        "Lowest insurance rates guaranteed.",
        "You are selected for a scholarship.",
        "Urgent: act now to receive benefits.",
        "Flash sale only for you!",
        "Make money while you sleep!",
        "Hot offer ends tonight!",
        "Hurry! Seats are filling fast.",
        "No fee! No hassle!",
        "Complete this survey and win.",
        "Best earning opportunity!",
        "You won a free vacation.",
        "Guaranteed income daily!",
        "Limited slots left, join now.",
        "Don’t miss this chance!",
        "Your reward is waiting.",
        "Win exciting prizes now!",
        "Biggest offer of the year!",
        "Free mobile recharge!",
        "Unlock VIP access now!",
        "Get cashback instantly!",
        "Refer and earn rewards!",
        "Tap to claim your bonus!",
        "Your account has a bonus!",
        "Win free Netflix subscription!",
        "Play and win real money!",
        "Download and get ₹100!",
        "Free access to premium content!",
        "Daily rewards available!",
        "Spin the wheel to win!",
        "Exclusive crypto giveaway!",
        "Watch ads and earn money!",
        "Join the contest now!",
        "Invite friends and earn!",
        "Add funds and get bonus!",
        "Scratch to win prizes!",
        "Win 1 lakh rupees today!",
        "Cash prize waiting for you!",
        "Get paid to answer questions.",
        "Lucky draw entry confirmed.",
        "Win AirPods now!",
        "Finalist selected - claim prize!",
        "Special bonus just for you!",
        "Claim your voucher here!",
        "Cashback credited to your account!",
        "Free gold coin offer!",
        "Surprise gift inside!",
        "Lowest EMI offer now!",
        "Loan disbursed within 5 mins!",
        "Hurry, offer ending soon!",
        "Gift card enclosed!",
        "Get ₹500 instantly!",
        "Win trip to Maldives!",
        "Mega sale - up to 90% off!",
        "Recharge and win cashback.",
        "Get 6 months free subscription!",
        "Discount coupon for you!",
        "Order now & save more!",
        "Bonus unlocked for you!",
        "Make ₹2000/day easily!",
        "Apply now for instant approval!",
        "Fastest way to earn online!",
        "No investment needed!",
        "Free health checkup!"
    ],
    "label": [1] * 100  # 100 spam labels
}

# Add 100 ham messages
data["text"] += [
    "Hey, are we still meeting today?",
    "Let’s have lunch at 1 PM.",
    "Can you review my code?",
    "The report is due tomorrow.",
    "Did you watch the movie?",
    "Good morning! Have a nice day.",
    "Don’t forget the meeting at 10.",
    "Your order has been shipped.",
    "I'll call you in 10 minutes.",
    "See you at the office later.",
    "Let’s go shopping this weekend.",
    "Dinner at my place tonight?",
    "What’s the homework for tomorrow?",
    "Your resume looks great!",
    "Happy birthday! 🎉",
    "Good luck for your exams!",
    "Let’s catch up soon!",
    "I’m running a bit late.",
    "Thank you for your help!",
    "I'll email you the files.",
    "Meeting rescheduled to 3 PM.",
    "Please bring your ID proof.",
    "Can you send me the notes?",
    "Hope you’re doing well.",
    "Let’s plan the trip soon.",
    "What are you doing tomorrow?",
    "Is this the latest version?",
    "Don’t forget to water the plants.",
    "I reached the station.",
    "Congrats on your promotion!",
    "We’ll discuss in the meeting.",
    "The event is on Saturday.",
    "I'll join in 5 minutes.",
    "Team meeting at 2 PM.",
    "I appreciate your support.",
    "Shall we start the call?",
    "This looks good to me.",
    "Please find the attached file.",
    "Join the Zoom link below.",
    "All the best!",
    "Take care and be safe.",
    "No problem, I’ll handle it.",
    "Let’s do this tomorrow.",
    "I'll be offline after 6.",
    "You did a great job!",
    "Please confirm your availability.",
    "It was nice meeting you.",
    "Let me know your feedback.",
    "We are on track with the project.",
    "The deadline is next week.",
    "Can you review the document?",
    "I agree with your points.",
    "The system will be updated.",
    "Check your inbox for updates.",
    "Your package is delivered.",
    "Shall we sync up later?",
    "Do you have the file ready?",
    "I’ll call after lunch.",
    "I’ll send the invite.",
    "The link isn’t working.",
    "Yes, I’m available.",
    "Thanks again!",
    "Join the discussion thread.",
    "Let’s finalize the agenda.",
    "Working on it now.",
    "Can you resend that file?",
    "Happy to connect!",
    "It’s been a while!",
    "Lunch break at 1?",
    "Don’t worry, I’ll manage.",
    "What time is the session?",
    "I'll bring the documents.",
    "Nice to meet you!",
    "I have updated the task.",
    "Let’s collaborate soon.",
    "Catch you later!",
    "My internet is slow today.",
    "I'm traveling now.",
    "Will respond by EOD.",
    "Got your message.",
    "I’ll try to finish early.",
    "Please review the list.",
    "Let’s do it step-by-step.",
    "Reading the document now.",
    "It’s approved.",
    "Ping me if needed.",
    "Let me recheck that.",
    "I’ll let you know.",
    "Typing notes now.",
    "Almost done with the task.",
    "We can start tomorrow.",
    "Need clarification on this.",
    "Let’s ask the team.",
    "Yes, I’ll do it.",
    "This is a great idea!",
    "We’ll finalize it today.",
    "Please send a reminder.",
    "Done and dusted!",
    "Give me 5 more minutes.",
    "Following up on the mail.",
    "I have added the changes.",
    "Check the shared folder.",
    "Pushed the code to repo.",
    "We’re on the same page.",
    "Update me once done.",
    "Let’s wrap it up.",
    "All issues are resolved.",
    "Copy that!",
    "Shared the final version.",
    "Looks fine from my side."
]
data["label"] += [0] * 100  # 100 ham labels


In [36]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,text,label
0,Win a free iPhone now!,1
1,Congratulations! You won a prize.,1
2,Claim your cash reward immediately.,1
3,You’ve been selected for a free gift card.,1
4,"Limited time offer, act now!",1


In [37]:
df.tail()

Unnamed: 0,text,label
195,Let’s wrap it up.,0
196,All issues are resolved.,0
197,Copy that!,0
198,Shared the final version.,0
199,Looks fine from my side.,0


In [38]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['text']) # Convert text to numbers
y = df['label']

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [40]:
model = MultinomialNB()
model.fit(X_train, y_train)

In [41]:
y_pred = model.predict(X_test)

In [42]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Report:\n", classification_report(y_test, y_pred, zero_division=0))

# --- Save Model and Vectorizer ---
with open("Naivemodel.pkl", "wb") as f:
    pickle.dump(model, f)

with open("vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)


new_messages = ['Win a free laptop now', 'Let’s have a meeting today']
new_features = vectorizer.transform(new_messages)
predictions = model.predict(new_features)
for msg, label in zip(new_messages, predictions):
    print(f"'{msg}' --> {'Spam' if label == 1 else 'Not Spam'}")

Accuracy: 0.8833333333333333
Report:
               precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.93      0.84      0.88        31

    accuracy                           0.88        60
   macro avg       0.89      0.88      0.88        60
weighted avg       0.89      0.88      0.88        60

'Win a free laptop now' --> Spam
'Let’s have a meeting today' --> Not Spam
