## **Author: Manahil Khan**



### Task: *Email Spam Detection with Machine Learning*

## Importing Libraries

In [None]:
#Importing all important libraries
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
from google.colab import files
uploaded = files.upload()

Saving spam.csv to spam.csv


## Loading Data

In [None]:
dataset = pd.read_csv('spam.csv', encoding='latin-1')

labels = dataset['v1']
emails = dataset['v2']

## Preprocessing Data

In [None]:
def preprocess_text(text):
    text = re.sub('[^a-zA-Z]', ' ', text)

    text = text.lower()

    tokens = text.split()

    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]

    text = ' '.join(tokens)

    return text

preprocessed_emails = emails.apply(preprocess_text)

vectorizer = TfidfVectorizer()
tfidf_vectors = vectorizer.fit_transform(preprocessed_emails)

## Splitting Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(tfidf_vectors, labels, test_size=0.2, random_state=42)

## Selecting the Model

In [None]:
classifier = MultinomialNB()

## Fitting the Model

In [None]:
classifier.fit(X_train, y_train)

## Evaluating the Model

In [None]:
predictions = classifier.predict(X_test)

accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, pos_label='spam')
recall = recall_score(y_test, predictions, pos_label='spam')
f1 = f1_score(y_test, predictions, pos_label='spam')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.9668161434977578
Precision: 1.0
Recall: 0.7533333333333333
F1 Score: 0.8593155893536121


## Prediction using Example

In [None]:
new_sentence = "Congratulations! You've won a free vacation. Claim your prize now!"

preprocessed_sentence = preprocess_text(new_sentence)

new_sentence_vector = vectorizer.transform([preprocessed_sentence])

prediction = classifier.predict(new_sentence_vector)

if prediction[0] == 'spam':
    print("The given sentence is predicted as spam.")
else:
    print("The given sentence is predicted as ham.")


The given sentence is predicted as spam.


## **THANK YOU**

