# Data Collection

##### Importing pandas library for data wrangling and data manipulation

In [11]:
import pandas as pd

##### reading the given csv file into a dataframe

In [12]:
spam=pd.read_csv('spam.csv')
spam

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,
...,...,...,...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...,,,
5568,ham,Will �_ b going to esplanade fr home?,,,
5569,ham,"Pity, * was in mood for that. So...any other s...",,,
5570,ham,The guy did some bitching but I acted like i'd...,,,


# Data cleaning

##### Removing unwanted columns

In [13]:
spam=spam.drop(spam.columns[2:],axis=1)
spam

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will �_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


##### Renaming the columns for better understanding

In [14]:
spam.rename(columns={'v1':'Category','v2':'Message'},inplace=True)
spam

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,spam,This is the 2nd time we have tried 2 contact u...
5568,ham,Will �_ b going to esplanade fr home?
5569,ham,"Pity, * was in mood for that. So...any other s..."
5570,ham,The guy did some bitching but I acted like i'd...


##### Checking for NULL values
***No NULL values found***

In [15]:
spam.isnull().sum()

Unnamed: 0,0
Category,0
Message,0


# Data Preparation

##### Converting the Message and Category columns into numerical values

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
X_vectorized=vectorizer.fit_transform(spam['Message'])
spam['Category']=spam['Category'].map({'spam':1,'ham':0})
spam

Unnamed: 0,Category,Message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5567,1,This is the 2nd time we have tried 2 contact u...
5568,0,Will �_ b going to esplanade fr home?
5569,0,"Pity, * was in mood for that. So...any other s..."
5570,0,The guy did some bitching but I acted like i'd...


##### Taking the independent and dependent values as X and y respectively

In [17]:
X = X_vectorized
y = spam['Category']

##### Splitting the data for training and testing

In [18]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

# Model Building and Evaluation

##### Using Support Vector Machine
***98% Accuracy Model***

In [19]:
from sklearn.svm import SVC
model=SVC()
model.fit(X_train,y_train)
print('Accuracy = ',model.score(X_test,y_test))

Accuracy =  0.9856502242152466


# Model Deployment

### Ham examples:-
-  Hi team, Please find attached the agenda for tomorrow's meeting. Let me know if you have any questions or need further information. Best, John.
- Hi Sarah, I wanted to give you a quick update on the project. We are on track to meet our deadlines and will have the first draft ready by next week. Regards, Mike.
### Spam examples:-
- Congratulations! You've won a $1000 gift card!
Body: Click the link below to claim your prize. This offer is only valid for 24 hours. Claim Your Prize Now.
- Congratulations! You have been chosen to receive a free vacation package to the destination of your choice. Click here to claim your prize.

In [20]:
new_emails = []
n=int(input('Enter the number of emails to be predcited '))
print('Enter',n,'emails')
for i in range(n):
  new_emails.append(input())
new_emails_transformed = vectorizer.transform(new_emails)
predictions = model.predict(new_emails_transformed)
d={1:'spam',0:'ham'}
for i in predictions:
  print(d[i])

Enter the number of emails to be predcited 4
Enter 4 emails
Hi team, Please find attached the agenda for tomorrow's meeting. Let me know if you have any questions or need further information. Best, John.
Hi Sarah, I wanted to give you a quick update on the project. We are on track to meet our deadlines and will have the first draft ready by next week. Regards, Mike.
Congratulations! You've won a $1000 gift card! Body: Click the link below to claim your prize. This offer is only valid for 24 hours. Claim Your Prize Now.
Congratulations! You have been chosen to receive a free vacation package to the destination of your choice. Click here to claim your prize.
ham
ham
spam
spam
