#EMAIL SPAM DETECTION WITH MACHINE LEARNING

#Importing Datasets

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load your dataset


In [None]:
try:
    data = pd.read_csv('spam.csv', encoding='latin-1')
except UnicodeDecodeError as e:
    print("Error:", e)

print(data.columns)

# Split dataset into features and target


In [None]:
X = data['v2']
y = data['v1']

# Split data into train and test sets


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize TF-IDF vectorizer


In [None]:
vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Initialize Random Forest classifier



In [None]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)


# Train the classifier


In [None]:
rf_classifier.fit(X_train_tfidf, y_train)

# Make predictions

In [None]:
y_pred = rf_classifier.predict(X_test_tfidf)

# Evaluate the model


In [None]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", report)