# 📧 Email Spam Detection using Machine Learning
---
**Objective:** Automatically classify emails as spam or not spam using ML.

**Dataset:** SMS Spam Collection Dataset

**Algorithms:** Naive Bayes and SVM

In [None]:
# Import libraries
import pandas as pd
import string
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, precision_score, recall_score
import nltk
from nltk.corpus import stopwords
# nltk.download('stopwords')

## Step 1: Load Dataset

In [None]:
df = pd.read_csv('spam.csv', encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'text']
df.head()

## Step 2: Preprocessing

In [None]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    stop_words = set(stopwords.words('english'))
    return ' '.join([w for w in words if w not in stop_words])

df['cleaned_text'] = df['text'].apply(preprocess_text)
df.head()

## Step 3: Vectorization

In [None]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_text'])
y = df['label'].map({'ham': 0, 'spam': 1})

## Step 4: Model Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Naive Bayes
nb = MultinomialNB()
nb.fit(X_train, y_train)

# SVM
svm = LinearSVC()
svm.fit(X_train, y_train)

## Step 5: Evaluation

In [None]:
nb_pred = nb.predict(X_test)
svm_pred = svm.predict(X_test)

print("Naive Bayes:")
print("Accuracy:", accuracy_score(y_test, nb_pred))
print("Precision:", precision_score(y_test, nb_pred))
print("Recall:", recall_score(y_test, nb_pred))

print("\nSVM:")
print("Accuracy:", accuracy_score(y_test, svm_pred))
print("Precision:", precision_score(y_test, svm_pred))
print("Recall:", recall_score(y_test, svm_pred))

## ✅ Conclusion
This spam detector achieves high accuracy using simple ML models. It can be integrated into apps or email clients.