In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
spam_subjects = [
    "You've won a million dollars! Claim your prize now!",
    "Get rich quick with this one simple trick",
    "Free vacation for lucky winner!",
    "Urgent: Your account is compromised",
    "Congratulations! You've been selected",
    "Win big today! Click to claim",
    "Special offer just for you",
    "Your loan approved instantly",
    "Unlock your exclusive reward now",
    "Act fast! Limited time deal"
]

spam_contents = [
    "Congratulations! You are a lucky winner. Click here to claim your prize.",
    "This is your last chance to win a free vacation. Claim now!",
    "Exclusive deal for you only. Limited time. Click to confirm.",
    "Get rich quick by joining our program. No effort needed.",
    "Your account is at risk. Verify your details immediately.",
    "Special limited offer for you. Claim your reward today.",
    "Win big money instantly. Click this link.",
    "Hurry up! Offer expires soon. Act now.",
    "We have deposited $1000 in your account. Claim before it expires.",
    "Urgent notice! Click here to secure your prize."
]


notspam_subjects = [
    "Weekly newsletter: latest updates and articles",
    "Meeting reminder for tomorrow",
    "Your monthly subscription bill",
    "Project update and deadlines",
    "Happy birthday! Wishing you the best",
    "Invitation to join our webinar",
    "Job application status update",
    "Your order has been shipped",
    "Team lunch scheduled next week",
    "Weather forecast for the week"
]

notspam_contents = [
    "Dear subscriber, here are the latest news updates and articles.",
    "Reminder: Meeting is scheduled for 10am tomorrow.",
    "Your bill for this month is ready. Please review it.",
    "Here is the update on your ongoing project and deadlines.",
    "Wishing you a happy and joyful birthday.",
    "Join us for our free webinar this Friday at 5 PM.",
    "We have reviewed your job application. Updates soon.",
    "Your package has been shipped and will arrive in 3 days.",
    "Team lunch will be held next Wednesday at 1 PM.",
    "Here is the weekly weather forecast for your city."
]


data = []
for _ in range(25):
  subj = random.choice(spam_subjects)
  cont = random.choice(spam_contents)
  data.append([subj,cont,1])

for _ in range(25):
  subj = random.choice(notspam_subjects)
  cont = random.choice(notspam_contents)
  data.append([subj,cont,0])

random.shuffle(data)
data = pd.DataFrame(data,columns=["subject","content","label"])
print(data.head())

                                 subject  \
0         Team lunch scheduled next week   
1          Meeting reminder for tomorrow   
2         Team lunch scheduled next week   
3  Congratulations! You've been selected   
4          Job application status update   

                                             content  label  
0  Your bill for this month is ready. Please revi...      0  
1  Reminder: Meeting is scheduled for 10am tomorrow.      0  
2  We have reviewed your job application. Updates...      0  
3  Get rich quick by joining our program. No effo...      1  
4  Reminder: Meeting is scheduled for 10am tomorrow.      0  


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

In [4]:
data["text"] = data["subject"] + " " + data["content"]

In [5]:
xtrain,xtest,ytrain,ytest = train_test_split(data['text'],data['label'],test_size = 0.2,random_state = 42)

In [6]:
vectorizer = TfidfVectorizer(stop_words="english")
x_train_tfidf = vectorizer.fit_transform(xtrain)
x_test_tfidf = vectorizer.transform(xtest)

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report

model = LogisticRegression()
model.fit(x_train_tfidf,ytrain)


In [8]:
ypred = model.predict(x_test_tfidf)

In [9]:
print("accuracy:",accuracy_score(ytest,ypred))

accuracy: 1.0


In [10]:
print("\nClassification Report:\n", classification_report(ytest, ypred))


Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       1.00      1.00      1.00         4

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [11]:
new_subject = "Update: Your application has been accepted"
new_content = "We offer various opportunities to help students develop professional skills and gain valuable experience.Benefits - Gain practical experience, build your network, and enhance your resume.Opportunities - Learn valuable skills through various activities online and workshops.Eligibility - Open to students from all backgrounds and disciplines.Deadline: Last day today!"
new_text = new_subject + " " + new_content
new_text_tfid = vectorizer.transform([new_text])
pred = model.predict(new_text_tfid)
if pred[0] == 1:
  print("spam")
else:
  print("not spam")

not spam


In [12]:
new_subject = "Listen to New HD Pro Voices"
new_content = '''New HD Pro Voices for Personal.Dear, mohan343mk2005@gmail.com.We are thrilled to introduce the next generation of AI voices with the addition of 38 new HD Pro Voices! HD voices are trained on the newest and latest LLM technology, resulting in voice quality never seen before. What Makes HD Voices Special.Fully Customizable (Coming Soon!)
No longer limited to predefined voices, you can use a simple prompt to describe the voice you envision – from a warm, friendly narrator to a bold, energetic announcer. Our AI will create it for you. Your perfect voice is just a description away.­
Intelligent and Content-Aware Unlike traditional AI voices, HD Pro Voices are "smart". They understand your text's context and meaning, automatically adapting their delivery to match the tone and intent of your content. This means more natural, engaging audio without manual adjustments.
Multilingual Support.HD Pro Voices are truly global, offering support for 15 languages and  accents, perfect for reading aloud text, documents, and books in diverse languages.
Try HD Pro Voices Listen to HD Pro Voices Reading Styles (Coming Soon)
We're putting the finishing touches on our new reading styles feature, a NaturalReader first that will let you customize your HD Pro Voices.
Simply use a prompt to describe the voice you envision, it's that easy! From a chill surfer dude to a jolly, Santa-like reading style, the possibilities are endless, and we can't wait to share it with you all very soon.'''

new_text = new_subject + " " + new_content
new_text_tfid = vectorizer.transform([new_text])
pred = model.predict(new_text_tfid)
if pred[0] == 1:
  print("spam")
else:
  print("not spam")

spam


In [17]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
import joblib

joblib.dump(model, "/content/drive/MyDrive/Colab Notebooks/MACHINE LEARNING/Mail classification/model/email_classifier.pkl")

['/content/drive/MyDrive/Colab Notebooks/MACHINE LEARNING/Mail classification/model/email_classifier.pkl']