In [None]:
# In this cell I am importing all the necessary Libraries for the spam email detection that is needed
import numpy as np # importing numpy
import pandas as pd # importing pandas
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:


# Here I am Loading all the three datasets that I am using for the program.
Data_frame_1st = pd.read_csv('/content/completeSpamAssassin.csv') # path for my first spam email dataset named completeSpamAssassin.csv
Data_frame_2nd = pd.read_csv('/content/enronSpamSubset.csv') # path for my second spam email dataset named enronSpamSubset.csv
Data_frame_3rd = pd.read_csv('/content/lingSpam.csv') # path for my third spam email dataset named lingspam.csv

# This will print all the columns for the verifications
print("Columns in Data_frame_1st:", Data_frame_1st.columns)
print("Columns in Data_frame_2nd:", Data_frame_2nd.columns)
print("Columns in Data_frame_3rd:", Data_frame_3rd.columns)

# This will help in making sure that all the dataframes have the same structure by adding missing columns if necessary in their respective column
dataframes = [Data_frame_1st, Data_frame_2nd, Data_frame_3rd]
for df in dataframes:
    if 'Subject' not in df.columns:
        df['Subject'] = ''  # This will add an empty subject comlumn if it is not present in the dataframe.

Columns in Data_frame_1st: Index(['Unnamed: 0', 'Body', 'Label'], dtype='object')
Columns in Data_frame_2nd: Index(['Unnamed: 0.1', 'Unnamed: 0', 'Body', 'Label'], dtype='object')
Columns in Data_frame_3rd: Index(['Unnamed: 0', 'Body', 'Label'], dtype='object')


In [None]:
# Here we are concatinating the dataframes
combined_dataframe = pd.concat(dataframes, ignore_index=True)

# This fill fill NAN if subject or Body is not present in the column
combined_dataframe['Subject'] = combined_dataframe['Subject'].fillna('')
combined_dataframe['Body'] = combined_dataframe['Body'].fillna('')

In [None]:
# Here we are combining the subject and the body in one column for better detection of the email if it's spam or not
combined_dataframe['Text'] = combined_dataframe['Subject'] + " " + combined_dataframe['Body']

# Here we are splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(combined_dataframe['Text'], combined_dataframe['Label'], test_size=0.2, random_state=42)

# Here we are initializing and fitting the vectorizer on the training data only
vectorizer = CountVectorizer(stop_words='english')
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

In [None]:
# Here the initialization of train the Naive Bayes classifier is being done
model = MultinomialNB()
model.fit(X_train, y_train)

# Here we are doing prediction on the test set
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.9614044492093272
Confusion Matrix:
 [[2255   40]
 [ 104 1332]]


In [None]:
def predict_email(subject, body):
    # We are sombining the subject and the body as our model for spam detections expects that
    email_text = subject + " " + body
    email_vector = vectorizer.transform([email_text])
    prediction = model.predict(email_vector)
    return 'The Provided Email is a Spam Email' if prediction[0] == 1 else "The Provided Email is a Legit Email, It's not a spam Email"

In [None]:
Subject_of_the_Email = "video lecture and quiz-2"
Body_of_the_Email = """
Dear students of CS 4416/5516,

On moodle, I have posted a new video lecture on 'Defense at Low Level - Memory Safety.' The corresponding slides are also available under the Lecture Slides section. Please review them carefully.

Quiz-2 will be open this Friday which will be due by October 21 (Monday). Covered topics are: Buffer Overflow, Code Injection, Other Memory Exploits, Format String Vulnerability, and Defense at Low Level.

I hope your semester-wide research project is going well.

Cheers.
Minhaz Zibran.
"""
print(predict_email(Subject_of_the_Email, Body_of_the_Email))

The Provided Email is a Legit Email, It's not a spam Email


In [None]:
Subject_of_the_Email = "Security Alert"
Body_of_the_Email = "Your password has been changed. Your phone has been hacked we need your 5 digit pin."
print(predict_email(Subject_of_the_Email, Body_of_the_Email))

The Provided Email is a Spam Email


In [None]:
Subject_of_the_Email = "Code Injection - video lecture posted"
Body_of_the_Email = """

Dear students of CS 4416/5516,

On moodle, I have posted a new video lecture on "Code Injection (Stack Smashing)." The corresponding slides are also made available on moodle. Please review them carefully.

Have a great weekend!

Cheers.
Minhaz Zibran.
"""
print(predict_email(Subject_of_the_Email, Body_of_the_Email))

The Provided Email is a Legit Email, It's not a spam Email


In [None]:
# Example usage
Subject_of_the_Email = "Congratulations!"
Body_of_the_Email = "You've won a $1000 Walmart gift card. Click here to claim now."
print(predict_email(Subject_of_the_Email, Body_of_the_Email))

The Provided Email is a Spam Email


In [None]:
Subject_of_the_Email = "Unlock Incredible Savings on Your Next Vacation! Exclusive Offers Just for You!"
Body_of_the_Email = """

"Dear Valued Customer,

We are excited to announce our biggest getaway deal of the season exclusively available to our loyal subscribers! This limited-time offer includes amazing discounts on some of our most popular destinations around the globe.

Here’s what you can expect with this exclusive package:

Up to 50% off our luxury suites in the Caribbean.
Complimentary meals and beverages throughout your stay.
A free guided tour of the city’s historical landmarks.
A welcome gift basket upon your arrival, filled with local delicacies and handpicked souvenirs.
But that's not all! If you book your vacation within the next 48 hours, you’ll also receive an additional 10% discount on your total booking costs. Our goal is to provide you with the most memorable and affordable travel experience possible.

To take advantage of this spectacular offer, simply click on the link below and enter the promo code 'TRAVEL2024' at checkout. Hurry, this offer expires soon, and availability is limited!

Book Your Dream Vacation Now!

If you have any questions or need further assistance, please do not hesitate to contact our customer support team at support@traveldeals.com or call us at 1-800-555-TRAVEL.

Thank you for choosing us as your travel partner. We look forward to helping you create unforgettable memories on your next vacation.

Warm regards, The Travel Deals Team"
"""
print(predict_email(Subject_of_the_Email, Body_of_the_Email))

The Provided Email is a Spam Email
