<a href="https://colab.research.google.com/github/karthikskarthiks6023/Bharat-Intern-Email-SMS-Spam-Classification/blob/main/Email_SMS_spam_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# Load the dataset
data = pd.read_csv("/content/spam.csv", encoding="latin-1")


In [None]:
# Select only the relevant columns
data = data[['v1', 'v2']]
data.columns = ['label', 'message']


In [None]:
# Convert labels to binary values (0 for ham, 1 for spam)
data['label'] = data['label'].map({'ham': 0, 'spam': 1})


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['message'], data['label'], test_size=0.2, random_state=42)


In [None]:
# Convert text data into numerical features using CountVectorizer
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)


In [None]:
# Train the classifier
classifier = MultinomialNB()
classifier.fit(X_train, y_train)


In [None]:
# Predict on the test set
predictions = classifier.predict(X_test)


In [None]:
# Evaluate the classifier
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, predictions))


Accuracy: 0.9838565022421525
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       965
           1       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [None]:
# Test with new messages
new_messages = [
    "Hurrah! India won the match!",
    "Can you review this document?",
    "Get a free trial now!"
]

In [None]:
new_messages_vectorized = vectorizer.transform(new_messages)
new_predictions = classifier.predict(new_messages_vectorized)
for message, prediction in zip(new_messages, new_predictions):
    print("Message:", message)
    print("Predicted class:", "spam" if prediction == 1 else "ham")


Message: Hurrah! India won the match!
Predicted class: ham
Message: Can you review this document?
Predicted class: ham
Message: Get a free trial now!
Predicted class: spam
