In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Sample dataset: Spam Email Detection
# Replace this with your actual dataset
data = {
    'Email': [
        'Congratulations! You have won a free lottery ticket.',
        'Urgent: Update your account details now.',
        'Meeting schedule for next week.',
        'Get your free credit report today.',
        'Lunch with friends on Sunday?'
    ],
    'Label': ['Spam', 'Spam', 'Not Spam', 'Spam', 'Not Spam']
}

# Creating a DataFrame
df = pd.DataFrame(data)

# Splitting data into features and labels
X = df['Email']
y = df['Label']

# Converting text data into numerical format using CountVectorizer
vectorizer = CountVectorizer()
X_transformed = vectorizer.fit_transform(X)

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.3, random_state=42)

# Building a Naive Bayes Classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Example prediction
example_email = ['Win a brand new car by clicking this link.']
example_transformed = vectorizer.transform(example_email)
example_prediction = model.predict(example_transformed)
print("\nPrediction for example email:", example_prediction)


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

    Not Spam       1.00      1.00      1.00         1
        Spam       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:
 [[1 0]
 [0 1]]

Prediction for example email: ['Spam']
