In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data = pd.read_csv('mail_data.csv')

mail_data = data.where((pd.notnull(data)), '')

mail_data.loc[mail_data['Category'] == 'ham', 'Category'] = 1
mail_data.loc[mail_data['Category'] == 'spam', 'Category'] = 0

x = mail_data['Message']
y = mail_data['Category']

X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.2, random_state=3)

feature_extraction = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)

X_train_features = feature_extraction.fit_transform(X_train)
X_test_features = feature_extraction.transform(X_test)

Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')

model = LogisticRegression()
model.fit(X_train_features, Y_train)

prediction_on_training_data = model.predict(X_train_features)
accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)

print('Accuracy on training data : ', accuracy_on_training_data)

prediction_on_test_data = model.predict(X_test_features)
accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data)

print('Accuracy on test data : ', accuracy_on_test_data)

print("Instructions")
print("Press 1 for Proceed with a spam mail")
print("Press 2 for proceed with ham mail")
print("Press 3 for manual mail feeding")

def function():
    input_data_features = feature_extraction.transform([mail])

    prediction = model.predict(input_data_features)
    print(prediction)

    if prediction[0] == 0:
        print('Spam mail')
    else:
        print('Ham mail')


x = input()

if x == "3":
    mail = input("Please share your mail here: ")
    function()
    
elif x == "2":
    mail = ("I've been searching for the right words to thank you for this breather.I promise i wont take your help for granted and will fulfil my promise. You have been wonderful and a blessing at all times")
    function()
    
elif x == "1":
    mail = ("Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's")
    function()
    
else:
    print("Invalid input!!!")


Accuracy on training data :  0.9670181736594121
Accuracy on test data :  0.9659192825112107
Instructions
Press 1 for Proceed with a spam mail
Press 2 for proceed with ham mail
Press 3 for manual mail feeding
3
Please share your mail here: hi there get you free ticket
[1]
Ham mail
