In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# Load the dataset
data = pd.read_csv('dataset.csv')

# Display the first few rows of the dataset
print(data.head())
# Convert text to lowercase
data['Email_Text'] = data['Email_Text'].str.lower()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['Email_Text'], data['Label'], test_size=0.3, random_state=42)
# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Transform the text data into TF-IDF features
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
# Initialize Naive Bayes classifier
model = MultinomialNB()

# Train the model
model.fit(X_train_tfidf, y_train)
# Predict the labels for the test set
y_pred = model.predict(X_test_tfidf)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Generate a confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

# Generate a classification report
class_report = classification_report(y_test, y_pred)
print('Classification Report:')
print(class_report)


   Id                                         Email_Text     Label
0   1     Win money now! Click here to claim your prize.      spam
1   2  Meeting agenda for tomorrow's project discussion.  not spam
2   3  Exclusive offer just for you! Buy one get one ...      spam
3   4  Reminder: Your appointment with the dentist is...  not spam
4   5  Congratulations! You've been selected for a fr...      spam
Accuracy: 0.83
Confusion Matrix:
[[3 0]
 [1 2]]
Classification Report:
              precision    recall  f1-score   support

    not spam       0.75      1.00      0.86         3
        spam       1.00      0.67      0.80         3

    accuracy                           0.83         6
   macro avg       0.88      0.83      0.83         6
weighted avg       0.88      0.83      0.83         6

