### IMPORTS

---





In [50]:
import numpy as np
import pandas as pd

In [51]:
df = pd.read_csv("spam.csv",
                      encoding="latin1")

In [52]:
# get rid of unneccessary columns
df.drop(["Unnamed: 2", "Unnamed: 3", "Unnamed: 4"],
          axis=1,
          inplace=True)

In [53]:
df.columns = ['labels','Message']

In [54]:
df.head()

Unnamed: 0,labels,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [55]:
df['label_tag'] = df["labels"].map({"ham":0,"spam":1})

In [56]:
df.head()

Unnamed: 0,labels,Message,label_tag
0,ham,"Go until jurong point, crazy.. Available only ...",0
1,ham,Ok lar... Joking wif u oni...,0
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,1
3,ham,U dun say so early hor... U c already then say...,0
4,ham,"Nah I don't think he goes to usf, he lives aro...",0


In [57]:
y = df.iloc[:,-1]

In [58]:
X = df.iloc[:,1:]

In [59]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.15, random_state=4)

In [60]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [61]:
tfidf  = TfidfVectorizer(min_df = 1,stop_words = 'english', lowercase = 'True')

In [62]:
x_train_feature = tfidf.fit_transform(X_train.Message)
print(x_train_feature)

  (0, 3103)	0.5421454775653566
  (0, 7304)	0.37271195763760273
  (0, 2177)	0.2895581893858797
  (0, 7322)	0.46470205236196555
  (0, 6211)	0.5170803953734132
  (1, 827)	0.5278284887733855
  (1, 1308)	0.5278284887733855
  (1, 5790)	0.5278284887733855
  (1, 1771)	0.4052052064295474
  (2, 4315)	0.3435706926981149
  (2, 2882)	0.35439042050157565
  (2, 2542)	0.42345558973428904
  (2, 7516)	0.3411739502624901
  (2, 3952)	0.22296547983017628
  (2, 4073)	0.28770686736778844
  (2, 4121)	0.4728599947216733
  (2, 896)	0.32336803618486987
  (3, 1962)	0.6298684420490477
  (3, 1541)	0.6298684420490477
  (3, 3298)	0.45445735929942965
  (4, 1100)	0.4604972817632959
  (4, 3198)	0.49913443882370845
  (4, 5112)	0.5248127472613591
  (4, 3310)	0.5132042924418523
  (5, 336)	0.18802808085957515
  :	:
  (4732, 1638)	0.21051377136204036
  (4732, 4115)	0.20720393443788712
  (4732, 5357)	0.22459435949047968
  (4732, 4942)	0.21290968703463373
  (4732, 2999)	0.2917394091772359
  (4732, 2124)	0.20230469749712093
  (

In [63]:
x_test_feature = tfidf.transform(X_test.Message)
print(x_test_feature)

  (0, 6888)	0.3705732510831153
  (0, 5006)	0.5291384754400474
  (0, 4987)	0.5745485256290435
  (0, 2320)	0.502575298923619
  (1, 4214)	0.657782331438692
  (1, 4202)	0.4374036792152095
  (1, 771)	0.6131887359174797
  (2, 7546)	0.14312700554450272
  (2, 7115)	0.19704744934386823
  (2, 6739)	0.12649075261531842
  (2, 6445)	0.13179418322798525
  (2, 6217)	0.15972455063700605
  (2, 5996)	0.19074646400991946
  (2, 5205)	0.13892399761086077
  (2, 4723)	0.18262308106345815
  (2, 4169)	0.2166200975035556
  (2, 3655)	0.2280693627167817
  (2, 3411)	0.15910392428201797
  (2, 3222)	0.22176837738283287
  (2, 2756)	0.20517083229032956
  (2, 2102)	0.2084967145570943
  (2, 1967)	0.4561387254335634
  (2, 1885)	0.247642010876469
  (2, 1749)	0.18892406639740694
  (2, 759)	0.23619274566324297
  :	:
  (833, 7546)	0.21778184521700455
  (833, 7449)	0.2449899790729705
  (833, 7151)	0.2470363624899714
  (833, 7037)	0.30766072997679467
  (833, 7030)	0.19488940307785724
  (833, 6750)	0.2666062921538245
  (833, 66

In [64]:
y_train = y_train.astype('int')
y_test = y_test.astype('int')

In [66]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [67]:
models  = {"Random_Forest":RandomForestClassifier(n_estimators = 10, criterion = 'entropy')}

In [68]:
for model_name, model in models.items():
    y_pred=model.fit(x_train_feature, y_train).predict(x_test_feature)
    print(y_pred)
    print(f"Accuracy for {model_name} model : {accuracy_score(y_test, y_pred)}")

[0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0
 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0
 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 1 0 0 0 0 1 0 1 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 

In [69]:
y_pred=model.fit(x_train_feature, y_train).predict(a)

In [27]:
l  = ["I am Ham",
     " I am not beautiful"]

In [70]:
d = pd.Series('SMS SERVICES. for your inclusive text credits, pls goto www.comuk.net login= 3qxj9 unsubscribe with STOP, no extra charge. help 08702840625.COMUK. 220-CM2 9AE')

In [71]:
d

0    SMS SERVICES. for your inclusive text credits,...
dtype: object

In [72]:
a = tfidf.transform(d)

In [73]:
y_pred=model.fit(x_train_feature, y_train).predict(a)

In [74]:
y_pred[0]

1

In [75]:
X_test.Message[2276]

'Love you aathi..love u lot..'

In [44]:
X_test.Message[4498]

'SMS SERVICES. for your inclusive text credits, pls goto www.comuk.net login= 3qxj9 unsubscribe with STOP, no extra charge. help 08702840625.COMUK. 220-CM2 9AE'

In [79]:
def predict(new_message):
    d = pd.Series(new_message)
    a = tfidf.transform(d)
    y_pred=model.fit(x_train_feature, y_train).predict(a)
    c = y_pred[0]
    return c

In [80]:
predict('SMS SERVICES. for your inclusive text credits, pls goto www.comuk.net login= 3qxj9 unsubscribe with STOP, no extra charge. help 08702840625.COMUK. 220-CM2 9AE')

1

In [82]:
def spam_classificatio(c):
    if y_pred[0]==1:
        return "SPAM"
    else:
        return "HAM"

In [83]:
spam_classificatio(1)

'SPAM'