In [10]:
# EMAIL SPAM FILTERING USING NAIVE BAYES (MultinomialNB)

# Import libraries
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

# Map CSV to a DataFrame
df = pd.read_csv("spam.csv")

# Declare labels for the DataFrame
X = df["Message"]
y = df["Category"]

# Encoding y using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert string to numerical output (Data Transformation)
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_encoded, test_size=0.2);

# Create the model
model = MultinomialNB()

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate and display the accuracy of the predictions
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9829596412556054

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       962
           1       0.93      0.95      0.94       153

    accuracy                           0.98      1115
   macro avg       0.96      0.97      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [12]:
# EMAIL SPAM FILTERING USING LOGISTIC REGRESSION

# Import libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

# Map CSV to a DataFrame
df = pd.read_csv("spam.csv")

# Declare labels for the DataFrame
X = df["Message"]
y = df["Category"]

# Encoding y using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert string to numerical output (Data Transformation)
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_encoded, test_size=0.2);

# Create the model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate and display the accuracy of the predictions
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9829596412556054

Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       976
           1       0.98      0.88      0.93       139

    accuracy                           0.98      1115
   macro avg       0.98      0.94      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [13]:
# EMAIL SPAM FILTERING USING KNNs(K-Nearest Neighbors)

# Import libraries
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer

# Map CSV to a DataFrame
df = pd.read_csv("spam.csv")

# Declare labels for the DataFrame
X = df["Message"]
y = df["Category"]

# Encoding y using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert string to numerical output (Data Transformation)
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_encoded, test_size=0.2);

# Create the model
model = KNeighborsClassifier(n_neighbors = 5)

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate and display the accuracy of the predictions
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9192825112107623

Classification Report:
               precision    recall  f1-score   support

           0       0.91      1.00      0.96       961
           1       1.00      0.42      0.59       154

    accuracy                           0.92      1115
   macro avg       0.96      0.71      0.77      1115
weighted avg       0.93      0.92      0.90      1115



In [14]:
# EMAIL SPAM FILTERING USING SVMs(Support Vector Machines)

# Import libraries
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

# Map CSV to a DataFrame
df = pd.read_csv("spam.csv")

# Declare labels for the DataFrame
X = df["Message"]
y = df["Category"]

# Encoding y using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert string to numerical output (Data Transformation)
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_encoded, test_size=0.2);

# Create the model
model = SVC()

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate and display the accuracy of the predictions
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9775784753363229

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       966
           1       1.00      0.83      0.91       149

    accuracy                           0.98      1115
   macro avg       0.99      0.92      0.95      1115
weighted avg       0.98      0.98      0.98      1115



In [18]:
# EMAIL SPAM FILTERING USING RANDOM FOREST

# Import libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

# Map CSV to a DataFrame
df = pd.read_csv("spam.csv")

# Declare labels for the DataFrame
X = df["Message"]
y = df["Category"]

# Encoding y using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert string to numerical output (Data Transformation)
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_encoded, test_size=0.2);

# Create the model
model = RandomForestClassifier()

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate and display the accuracy of the predictions
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9748878923766816

Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.99       967
           1       1.00      0.81      0.90       148

    accuracy                           0.97      1115
   macro avg       0.99      0.91      0.94      1115
weighted avg       0.98      0.97      0.97      1115



In [8]:
# EMAIL SPAM FILTERING USING XGBOOST

# Import libraries
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder

# Map CSV to a DataFrame
df = pd.read_csv("spam.csv")

# Declare labels for the DataFrame
X = df["Message"]
y = df["Category"]

# Encoding y using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Convert string to numerical output (Data Transformation)
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y_encoded, test_size=0.2);

# Create the model
model = XGBClassifier()

# Train the model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Calculate and display the accuracy of the predictions
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy:  0.9766816143497757

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.99       956
           1       0.96      0.87      0.91       159

    accuracy                           0.98      1115
   macro avg       0.97      0.93      0.95      1115
weighted avg       0.98      0.98      0.98      1115

