In [9]:
#Loading libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split, KFold  
from sklearn.metrics import accuracy_score
import pickle


In [44]:
import pandas as pd

# Assuming your dataset is in a CSV file
data = pd.read_csv('spam.csv')

# Separate spam and non-spam emails
spam_emails = data[data['Category'] == 'spam']
non_spam_emails = data[data['Category'] == 'ham']

# Count the number of emails in each category
num_spam_emails = len(spam_emails)
num_non_spam_emails = len(non_spam_emails)

# Determine the minimum number of emails in a category
min_num_emails = min(num_spam_emails, num_non_spam_emails)

# Sample a balanced subset from each category
balanced_spam_emails = spam_emails.sample(n=min_num_emails, random_state=42)
balanced_non_spam_emails = non_spam_emails.sample(n=min_num_emails, random_state=42)

# Concatenate the balanced subsets
balanced_data = pd.concat([balanced_spam_emails, balanced_non_spam_emails])

# Shuffle the balanced dataset
balanced_data = balanced_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the balanced dataset to a new CSV file

balanced_data.to_csv('balanced_emails_dataset.csv', index=False)



In [11]:
#reading the dataset
data = pd.read_csv('balanced_emails_dataset.csv')
data.value_counts('Category')

Category
ham     747
spam    747
dtype: int64

In [10]:
# Split the data into features (X) - input and target variable (y) - output
X1 = data['Message']
y1 = data['Category']

In [12]:
#Convert text into numerical vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(X1)
y = y1

#Saving the vectorizer
with open('vectorizer.pkl', 'wb') as f:
    pickle.dump(vectorizer, f)

In [13]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Initialize KFold with 10 folds
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

In [42]:
# Import the MultinomialNB classifier
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()

print("MultinomialNB")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate f1_score for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    # accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('MultinomialNB_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
# accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('MultinomialNB_Train_Test.pkl','wb'))

# Load the best model for future predictions
best_model = pickle.load(open('MultinomialNB_KFold.pkl', 'rb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



MultinomialNB
Best accuracy using KFold: 0.98
Average Accuracy using KFold: 0.963847874720358
Accuracy using Train Test: 0.9798657718120806
['spam']


In [47]:
# Import the support vector classifier
from sklearn.svm import SVC
model = SVC(kernel='linear')

print("Support Vector Classifier (SVC)")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('SVC_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('SVC_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
new_message_transformed = new_message_transformed.toarray()
print(best_model.predict(new_message_transformed))



Support Vector Classifier (SVC)
Best accuracy using KFold: 0.9733333333333334
Average Accuracy using KFold: 0.9605100671140938
Accuracy using Train Test: 0.9731543624161074
['ham']


In [23]:
# Import the logistic regression algorithm
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()

print("Logistic Regression Algorithm")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('Logistic_Regression_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('Logistic_Regression_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



Logistic Regression Algorithm
Best accuracy using KFold: 0.9798657718120806
Average Accuracy using KFold: 0.95917225950783
Accuracy using Train Test: 0.9798657718120806
['spam']


In [24]:
# Import the decision tree algorithm
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()

print("Decision Tree Algorithm")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('Decision_Tree_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('Decision_Tree_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



Decision Tree Algorithm
Best accuracy using KFold: 0.9530201342281879
Average Accuracy using KFold: 0.9250604026845638
Accuracy using Train Test: 0.9328859060402684
['ham']


In [25]:
# Import the random forest algorithm
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()

print("Random Forest Algorithm")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('Random_Forest_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('Random_Forest_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



Random Forest Algorithm
Best accuracy using KFold: 0.9798657718120806
Average Accuracy using KFold: 0.9585055928411632
Accuracy using Train Test: 0.959731543624161
['ham']


In [26]:
# Import the KNN algorithm
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()

print("KNN Algorithm")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('KNN_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('KNN_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



KNN Algorithm


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


Best accuracy using KFold: 0.8053691275167785
Average Accuracy using KFold: 0.7343355704697987
Accuracy using Train Test: 0.7315436241610739
['ham']


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [30]:
# Import the bagging classifier algorithm
from sklearn.ensemble import BaggingClassifier
model = BaggingClassifier()

print("Bagging Classifier Algorithm")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('Bagging_Classifier_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('Bagging_Classifier_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
#new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



Bagging Classifier Algorithm
Best accuracy using KFold: 0.9533333333333334
Average Accuracy using KFold: 0.9323937360178972
Accuracy using Train Test: 0.959731543624161
['spam']


In [33]:
# Import the adaboost classifier algorithm
from sklearn.ensemble import AdaBoostClassifier
model = AdaBoostClassifier()

print("AdaBoost Classifier Algorithm")
#Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []

# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and making predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:',np.mean(accuracy_scores))
pickle.dump(best_model, open('AdaBoost_Classifier_KFold.pkl', 'wb'))


#Training the model with train test split
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

#save the model
import pickle
pickle.dump(model, open('AdaBoost_Classifier_Train_Test.pkl','wb'))

# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
#new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
print(best_model.predict(new_message_transformed))



AdaBoost Classifier Algorithm
Best accuracy using KFold: 0.9664429530201343
Average Accuracy using KFold: 0.9364116331096198
Accuracy using Train Test: 0.9530201342281879
['spam']


In [39]:
# Import the Gaussian Naive Bayes algorithm
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold, train_test_split
import numpy as np
import pickle

# Create an instance of the Gaussian Naive Bayes model
model = GaussianNB()

print("Gaussian Naive Bayes Algorithm")

# Training the model with KFold
# Initialize variables to track the best model and its performance
best_accuracy = 0.0
best_model = None
accuracy_scores = []


# Iterate over the folds
for train_index, test_index in kfold.split(X):
    # Split the data into training and testing sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the model and make predictions
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate accuracy for the current fold
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    # Check if the current model performs better than the previous best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy achieved
print("Best accuracy using KFold:", best_accuracy)

# Save the best model for future predictions
print('Average Accuracy using KFold:', np.mean(accuracy_scores))
pickle.dump(best_model, open('GaussianNB_KFold.pkl', 'wb'))


# Training the model with train test split
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy using Train Test:", accuracy)

# Save the model
pickle.dump(model, open('GaussianNB_TrainTest.pkl', 'wb'))


# Predict the category of a new message using the best model
#new_message = ["Hi Mom, Please let me know if you need anything. I am always there for you. Love you."]
new_message = ["Make money fast and easy! Guaranteed returns of up to 1000% in just one week! Don't miss out on this incredible opportunity. Click here to learn more."]
#new_message = ["Looking for ways to enhance your [REDACTED]? We have the solution for you. Our revolutionary product guarantees incredible results in just a few weeks. Order now and experience the difference!"]
#new_message = ["Congratulations! You have won a cash prize of $1,000,000. To claim your prize, reply to this email with your full name, address, and bank account details. Act quickly to secure your winnings."]
new_message_transformed = vectorizer.transform(new_message)
new_message_transformed = new_message_transformed.toarray()
print(best_model.predict(new_message_transformed))



Gaussian Naive Bayes Algorithm
Best accuracy using KFold: 0.98
Average Accuracy using KFold: 0.9531275167785234
Accuracy using Train Test: 0.9331103678929766
['spam']
