In [127]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize
import gensim
import gensim.downloader as api
from gensim.utils import simple_preprocess
from gensim.models import Word2Vec, KeyedVectors
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

In [92]:
df = pd.read_csv(r'/content/SMSSpamCollection.txt', sep='\t', names=['label', 'text'])

In [93]:
lemmatizer = WordNetLemmatizer()

In [94]:
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [95]:
corpus = []
for i in range(len(df)):
  message = re.sub('[^a-zA-Z]', ' ', df['text'][i])
  message = message.lower()
  message = message.split()
  message = [word for word in message if word not in stopwords.words('english')]
  message = ' '.join(message)
  corpus.append(message)

In [96]:
words = []
for sentence in corpus:
  sent_tokens = sent_tokenize(sentence)
  for sent in sent_tokens:
    words.append(simple_preprocess(sent))

In [97]:
words[:5]

[['go',
  'jurong',
  'point',
  'crazy',
  'available',
  'bugis',
  'great',
  'world',
  'la',
  'buffet',
  'cine',
  'got',
  'amore',
  'wat'],
 ['ok', 'lar', 'joking', 'wif', 'oni'],
 ['free',
  'entry',
  'wkly',
  'comp',
  'win',
  'fa',
  'cup',
  'final',
  'tkts',
  'st',
  'may',
  'text',
  'fa',
  'receive',
  'entry',
  'question',
  'std',
  'txt',
  'rate',
  'apply'],
 ['dun', 'say', 'early', 'hor', 'already', 'say'],
 ['nah', 'think', 'goes', 'usf', 'lives', 'around', 'though']]

In [98]:
model = Word2Vec(words)

In [37]:
wv = api.load('word2vec-google-news-300')



In [99]:
# List all words in vocabulary
model.wv.index_to_key[:30]

['call',
 'get',
 'ur',
 'gt',
 'lt',
 'ok',
 'go',
 'free',
 'know',
 'like',
 'good',
 'day',
 'got',
 'come',
 'time',
 'love',
 'send',
 'want',
 'text',
 'txt',
 'one',
 'going',
 'need',
 'home',
 'stop',
 'lor',
 'sorry',
 'today',
 'see',
 'still']

In [100]:
model.corpus_count

5564

In [101]:
model.wv.similar_by_word('happy')

[('day', 0.9993436336517334),
 ('dear', 0.9991540908813477),
 ('new', 0.9990948438644409),
 ('wish', 0.9990916848182678),
 ('amp', 0.9990845322608948),
 ('birthday', 0.999076247215271),
 ('great', 0.9990759491920471),
 ('love', 0.9990725517272949),
 ('good', 0.9990459680557251),
 ('said', 0.9990389943122864)]

In [102]:
def avg_word2vec(doc):
    return np.mean([model.wv[word] for word in doc if word in model.wv.index_to_key], axis=0)

In [103]:
X = []
for i in tqdm(range(len(words))):
  X.append(avg_word2vec(words[i]))

100%|██████████| 5564/5564 [00:01<00:00, 5530.80it/s]


In [104]:
len(X)

5564

In [105]:
df.shape

(5572, 2)

In [106]:
y = df[list(map(lambda x: len(x)>0 ,corpus))]
y=pd.get_dummies(y['label']).astype(int)
y=y.iloc[:,0].values

In [107]:
y.shape

(5564,)

In [108]:
df = pd.concat([pd.DataFrame(X[i].reshape(1, -1)) for i in range(len(X))], ignore_index=True)

In [109]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
0,-0.078511,0.183765,0.053795,0.0908,0.111081,-0.367333,0.150768,0.514643,-0.189338,-0.094752,-0.15602,-0.347673,-0.100849,0.118151,0.051265,-0.154576,0.06893,-0.274484,0.071496,-0.464868,0.097743,0.158133,0.080467,-0.101638,-0.052768,0.009319,-0.205429,-0.270603,-0.168774,0.016852,0.175416,0.051472,0.067228,-0.069178,-0.056084,0.19641,-0.050352,-0.237485,-0.19699,-0.482627,0.048287,-0.211404,0.002346,0.009678,0.151365,-0.057974,-0.150307,-0.114053,0.09673,0.141032,0.144075,-0.223182,-0.039815,0.009162,-0.207022,0.112373,0.096408,0.000724,-0.246231,0.07747,0.016287,0.131694,-0.110633,-0.009273,-0.255506,0.183892,0.056102,0.231327,-0.266521,0.305315,-0.199171,0.080051,0.247861,-0.137883,0.265828,0.146377,-0.025478,-0.009966,-0.181508,0.077735,-0.085491,0.029768,-0.308752,0.355198,-0.022975,0.046495,0.016685,0.260976,0.298337,0.033449,0.302434,0.143173,-0.021886,0.096823,0.38839,0.273649,0.147532,-0.27318,0.137404,-0.045766
1,-0.068497,0.161958,0.045265,0.077333,0.09402,-0.321119,0.134866,0.445965,-0.169561,-0.084136,-0.133923,-0.303991,-0.089764,0.10346,0.037269,-0.134766,0.062747,-0.236528,0.058591,-0.406298,0.079793,0.140191,0.067689,-0.088913,-0.047767,0.010487,-0.178766,-0.235801,-0.15135,0.018063,0.153888,0.043002,0.055731,-0.070421,-0.045581,0.177058,-0.043929,-0.205017,-0.167356,-0.424586,0.045262,-0.182892,0.003307,0.008943,0.134654,-0.052966,-0.13036,-0.094091,0.083165,0.1224,0.122821,-0.193481,-0.030244,0.014193,-0.18609,0.100171,0.08425,1.1e-05,-0.223276,0.061715,0.016915,0.113353,-0.0985,-0.008506,-0.220438,0.158659,0.048061,0.198573,-0.232704,0.259011,-0.173296,0.070232,0.220689,-0.117648,0.227756,0.129267,-0.021174,-0.007341,-0.154314,0.067928,-0.069454,0.021606,-0.269353,0.307706,-0.019282,0.039428,0.014384,0.235337,0.259977,0.027345,0.265677,0.124829,-0.021526,0.084265,0.338405,0.236665,0.12308,-0.243148,0.115697,-0.037108
2,-0.088543,0.201832,0.053131,0.107078,0.122256,-0.418405,0.177484,0.576422,-0.215096,-0.110669,-0.173393,-0.386792,-0.115775,0.139616,0.057219,-0.164369,0.079527,-0.304297,0.069675,-0.52383,0.107068,0.184836,0.083218,-0.117723,-0.061255,0.011448,-0.22409,-0.304356,-0.184637,0.022687,0.195134,0.059185,0.084325,-0.077113,-0.065207,0.221988,-0.04974,-0.262854,-0.214155,-0.54898,0.057291,-0.237814,0.001504,0.007009,0.169638,-0.058577,-0.172576,-0.123279,0.112674,0.16472,0.152269,-0.24674,-0.041293,0.009914,-0.234666,0.140286,0.105466,0.004555,-0.274618,0.09656,0.026111,0.149823,-0.133122,-0.008768,-0.283623,0.21587,0.057426,0.257022,-0.294762,0.347073,-0.220657,0.079465,0.281599,-0.156532,0.295503,0.165741,-0.028172,-0.014221,-0.20167,0.085668,-0.096566,0.031855,-0.352846,0.39437,-0.02343,0.051905,0.023558,0.291592,0.337578,0.031093,0.342094,0.156332,-0.019534,0.1143,0.425594,0.309428,0.165909,-0.311553,0.14908,-0.050672
3,-0.11522,0.276111,0.07375,0.133779,0.163778,-0.548939,0.215553,0.765684,-0.291025,-0.141193,-0.231763,-0.51581,-0.147683,0.168828,0.074989,-0.224668,0.106038,-0.416067,0.109305,-0.696887,0.139075,0.235468,0.11201,-0.155562,-0.084296,0.014715,-0.308804,-0.395352,-0.249243,0.027596,0.258406,0.081308,0.097579,-0.115737,-0.079592,0.297387,-0.076038,-0.360771,-0.286375,-0.724123,0.07114,-0.309016,0.006405,0.01322,0.229739,-0.088022,-0.225262,-0.165749,0.135165,0.209755,0.205533,-0.32312,-0.058121,0.019925,-0.303965,0.173923,0.139966,0.010905,-0.369434,0.121039,0.02429,0.197776,-0.161918,-0.012186,-0.380398,0.277477,0.091126,0.34391,-0.393993,0.454796,-0.297473,0.119429,0.366957,-0.206039,0.391796,0.218605,-0.041726,-0.014884,-0.263355,0.111518,-0.125657,0.050052,-0.459019,0.533542,-0.034297,0.067583,0.025402,0.392483,0.448027,0.052393,0.451238,0.213111,-0.027179,0.149107,0.576804,0.408064,0.225807,-0.409278,0.203771,-0.066639
4,-0.071212,0.169902,0.045504,0.085141,0.100748,-0.324052,0.131518,0.453935,-0.167459,-0.082431,-0.135394,-0.307722,-0.08731,0.106285,0.044576,-0.136811,0.060254,-0.245725,0.062401,-0.40754,0.083109,0.140211,0.071022,-0.087898,-0.053431,0.00657,-0.176245,-0.237084,-0.149285,0.016705,0.157986,0.045157,0.064308,-0.061481,-0.04536,0.172347,-0.041236,-0.210348,-0.169163,-0.424992,0.044495,-0.187007,0.001705,0.0052,0.134033,-0.053106,-0.136265,-0.098228,0.082379,0.127635,0.123792,-0.195567,-0.03525,0.012872,-0.188047,0.110387,0.084759,0.005219,-0.215142,0.072093,0.013471,0.118768,-0.096339,-0.009851,-0.225598,0.164907,0.048857,0.204354,-0.235323,0.27476,-0.175881,0.071348,0.218799,-0.123974,0.233456,0.130962,-0.026636,-0.013988,-0.160775,0.06617,-0.078298,0.022391,-0.272981,0.31121,-0.023196,0.043539,0.015343,0.229452,0.267015,0.023474,0.266007,0.124463,-0.023728,0.085773,0.346644,0.241635,0.133123,-0.242345,0.117546,-0.036514


In [110]:
df['output'] = y

In [111]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,...,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,output
0,-0.078511,0.183765,0.053795,0.0908,0.111081,-0.367333,0.150768,0.514643,-0.189338,-0.094752,-0.15602,-0.347673,-0.100849,0.118151,0.051265,-0.154576,0.06893,-0.274484,0.071496,-0.464868,0.097743,0.158133,0.080467,-0.101638,-0.052768,0.009319,-0.205429,-0.270603,-0.168774,0.016852,0.175416,0.051472,0.067228,-0.069178,-0.056084,0.19641,-0.050352,-0.237485,-0.19699,-0.482627,0.048287,-0.211404,0.002346,0.009678,0.151365,-0.057974,-0.150307,-0.114053,0.09673,0.141032,...,-0.223182,-0.039815,0.009162,-0.207022,0.112373,0.096408,0.000724,-0.246231,0.07747,0.016287,0.131694,-0.110633,-0.009273,-0.255506,0.183892,0.056102,0.231327,-0.266521,0.305315,-0.199171,0.080051,0.247861,-0.137883,0.265828,0.146377,-0.025478,-0.009966,-0.181508,0.077735,-0.085491,0.029768,-0.308752,0.355198,-0.022975,0.046495,0.016685,0.260976,0.298337,0.033449,0.302434,0.143173,-0.021886,0.096823,0.38839,0.273649,0.147532,-0.27318,0.137404,-0.045766,1
1,-0.068497,0.161958,0.045265,0.077333,0.09402,-0.321119,0.134866,0.445965,-0.169561,-0.084136,-0.133923,-0.303991,-0.089764,0.10346,0.037269,-0.134766,0.062747,-0.236528,0.058591,-0.406298,0.079793,0.140191,0.067689,-0.088913,-0.047767,0.010487,-0.178766,-0.235801,-0.15135,0.018063,0.153888,0.043002,0.055731,-0.070421,-0.045581,0.177058,-0.043929,-0.205017,-0.167356,-0.424586,0.045262,-0.182892,0.003307,0.008943,0.134654,-0.052966,-0.13036,-0.094091,0.083165,0.1224,...,-0.193481,-0.030244,0.014193,-0.18609,0.100171,0.08425,1.1e-05,-0.223276,0.061715,0.016915,0.113353,-0.0985,-0.008506,-0.220438,0.158659,0.048061,0.198573,-0.232704,0.259011,-0.173296,0.070232,0.220689,-0.117648,0.227756,0.129267,-0.021174,-0.007341,-0.154314,0.067928,-0.069454,0.021606,-0.269353,0.307706,-0.019282,0.039428,0.014384,0.235337,0.259977,0.027345,0.265677,0.124829,-0.021526,0.084265,0.338405,0.236665,0.12308,-0.243148,0.115697,-0.037108,1
2,-0.088543,0.201832,0.053131,0.107078,0.122256,-0.418405,0.177484,0.576422,-0.215096,-0.110669,-0.173393,-0.386792,-0.115775,0.139616,0.057219,-0.164369,0.079527,-0.304297,0.069675,-0.52383,0.107068,0.184836,0.083218,-0.117723,-0.061255,0.011448,-0.22409,-0.304356,-0.184637,0.022687,0.195134,0.059185,0.084325,-0.077113,-0.065207,0.221988,-0.04974,-0.262854,-0.214155,-0.54898,0.057291,-0.237814,0.001504,0.007009,0.169638,-0.058577,-0.172576,-0.123279,0.112674,0.16472,...,-0.24674,-0.041293,0.009914,-0.234666,0.140286,0.105466,0.004555,-0.274618,0.09656,0.026111,0.149823,-0.133122,-0.008768,-0.283623,0.21587,0.057426,0.257022,-0.294762,0.347073,-0.220657,0.079465,0.281599,-0.156532,0.295503,0.165741,-0.028172,-0.014221,-0.20167,0.085668,-0.096566,0.031855,-0.352846,0.39437,-0.02343,0.051905,0.023558,0.291592,0.337578,0.031093,0.342094,0.156332,-0.019534,0.1143,0.425594,0.309428,0.165909,-0.311553,0.14908,-0.050672,0
3,-0.11522,0.276111,0.07375,0.133779,0.163778,-0.548939,0.215553,0.765684,-0.291025,-0.141193,-0.231763,-0.51581,-0.147683,0.168828,0.074989,-0.224668,0.106038,-0.416067,0.109305,-0.696887,0.139075,0.235468,0.11201,-0.155562,-0.084296,0.014715,-0.308804,-0.395352,-0.249243,0.027596,0.258406,0.081308,0.097579,-0.115737,-0.079592,0.297387,-0.076038,-0.360771,-0.286375,-0.724123,0.07114,-0.309016,0.006405,0.01322,0.229739,-0.088022,-0.225262,-0.165749,0.135165,0.209755,...,-0.32312,-0.058121,0.019925,-0.303965,0.173923,0.139966,0.010905,-0.369434,0.121039,0.02429,0.197776,-0.161918,-0.012186,-0.380398,0.277477,0.091126,0.34391,-0.393993,0.454796,-0.297473,0.119429,0.366957,-0.206039,0.391796,0.218605,-0.041726,-0.014884,-0.263355,0.111518,-0.125657,0.050052,-0.459019,0.533542,-0.034297,0.067583,0.025402,0.392483,0.448027,0.052393,0.451238,0.213111,-0.027179,0.149107,0.576804,0.408064,0.225807,-0.409278,0.203771,-0.066639,1
4,-0.071212,0.169902,0.045504,0.085141,0.100748,-0.324052,0.131518,0.453935,-0.167459,-0.082431,-0.135394,-0.307722,-0.08731,0.106285,0.044576,-0.136811,0.060254,-0.245725,0.062401,-0.40754,0.083109,0.140211,0.071022,-0.087898,-0.053431,0.00657,-0.176245,-0.237084,-0.149285,0.016705,0.157986,0.045157,0.064308,-0.061481,-0.04536,0.172347,-0.041236,-0.210348,-0.169163,-0.424992,0.044495,-0.187007,0.001705,0.0052,0.134033,-0.053106,-0.136265,-0.098228,0.082379,0.127635,...,-0.195567,-0.03525,0.012872,-0.188047,0.110387,0.084759,0.005219,-0.215142,0.072093,0.013471,0.118768,-0.096339,-0.009851,-0.225598,0.164907,0.048857,0.204354,-0.235323,0.27476,-0.175881,0.071348,0.218799,-0.123974,0.233456,0.130962,-0.026636,-0.013988,-0.160775,0.06617,-0.078298,0.022391,-0.272981,0.31121,-0.023196,0.043539,0.015343,0.229452,0.267015,0.023474,0.266007,0.124463,-0.023728,0.085773,0.346644,0.241635,0.133123,-0.242345,0.117546,-0.036514,1


In [112]:
df.isna().sum()

Unnamed: 0,0
0,74
1,74
2,74
3,74
4,74
...,...
96,74
97,74
98,74
99,74


In [113]:
df.dropna(inplace=True)

In [115]:
df.isna().sum()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
...,...
96,0
97,0
98,0
99,0


In [118]:
X = df.drop('output', axis=1)

In [119]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
0,-0.078511,0.183765,0.053795,0.0908,0.111081,-0.367333,0.150768,0.514643,-0.189338,-0.094752,-0.15602,-0.347673,-0.100849,0.118151,0.051265,-0.154576,0.06893,-0.274484,0.071496,-0.464868,0.097743,0.158133,0.080467,-0.101638,-0.052768,0.009319,-0.205429,-0.270603,-0.168774,0.016852,0.175416,0.051472,0.067228,-0.069178,-0.056084,0.19641,-0.050352,-0.237485,-0.19699,-0.482627,0.048287,-0.211404,0.002346,0.009678,0.151365,-0.057974,-0.150307,-0.114053,0.09673,0.141032,0.144075,-0.223182,-0.039815,0.009162,-0.207022,0.112373,0.096408,0.000724,-0.246231,0.07747,0.016287,0.131694,-0.110633,-0.009273,-0.255506,0.183892,0.056102,0.231327,-0.266521,0.305315,-0.199171,0.080051,0.247861,-0.137883,0.265828,0.146377,-0.025478,-0.009966,-0.181508,0.077735,-0.085491,0.029768,-0.308752,0.355198,-0.022975,0.046495,0.016685,0.260976,0.298337,0.033449,0.302434,0.143173,-0.021886,0.096823,0.38839,0.273649,0.147532,-0.27318,0.137404,-0.045766
1,-0.068497,0.161958,0.045265,0.077333,0.09402,-0.321119,0.134866,0.445965,-0.169561,-0.084136,-0.133923,-0.303991,-0.089764,0.10346,0.037269,-0.134766,0.062747,-0.236528,0.058591,-0.406298,0.079793,0.140191,0.067689,-0.088913,-0.047767,0.010487,-0.178766,-0.235801,-0.15135,0.018063,0.153888,0.043002,0.055731,-0.070421,-0.045581,0.177058,-0.043929,-0.205017,-0.167356,-0.424586,0.045262,-0.182892,0.003307,0.008943,0.134654,-0.052966,-0.13036,-0.094091,0.083165,0.1224,0.122821,-0.193481,-0.030244,0.014193,-0.18609,0.100171,0.08425,1.1e-05,-0.223276,0.061715,0.016915,0.113353,-0.0985,-0.008506,-0.220438,0.158659,0.048061,0.198573,-0.232704,0.259011,-0.173296,0.070232,0.220689,-0.117648,0.227756,0.129267,-0.021174,-0.007341,-0.154314,0.067928,-0.069454,0.021606,-0.269353,0.307706,-0.019282,0.039428,0.014384,0.235337,0.259977,0.027345,0.265677,0.124829,-0.021526,0.084265,0.338405,0.236665,0.12308,-0.243148,0.115697,-0.037108
2,-0.088543,0.201832,0.053131,0.107078,0.122256,-0.418405,0.177484,0.576422,-0.215096,-0.110669,-0.173393,-0.386792,-0.115775,0.139616,0.057219,-0.164369,0.079527,-0.304297,0.069675,-0.52383,0.107068,0.184836,0.083218,-0.117723,-0.061255,0.011448,-0.22409,-0.304356,-0.184637,0.022687,0.195134,0.059185,0.084325,-0.077113,-0.065207,0.221988,-0.04974,-0.262854,-0.214155,-0.54898,0.057291,-0.237814,0.001504,0.007009,0.169638,-0.058577,-0.172576,-0.123279,0.112674,0.16472,0.152269,-0.24674,-0.041293,0.009914,-0.234666,0.140286,0.105466,0.004555,-0.274618,0.09656,0.026111,0.149823,-0.133122,-0.008768,-0.283623,0.21587,0.057426,0.257022,-0.294762,0.347073,-0.220657,0.079465,0.281599,-0.156532,0.295503,0.165741,-0.028172,-0.014221,-0.20167,0.085668,-0.096566,0.031855,-0.352846,0.39437,-0.02343,0.051905,0.023558,0.291592,0.337578,0.031093,0.342094,0.156332,-0.019534,0.1143,0.425594,0.309428,0.165909,-0.311553,0.14908,-0.050672
3,-0.11522,0.276111,0.07375,0.133779,0.163778,-0.548939,0.215553,0.765684,-0.291025,-0.141193,-0.231763,-0.51581,-0.147683,0.168828,0.074989,-0.224668,0.106038,-0.416067,0.109305,-0.696887,0.139075,0.235468,0.11201,-0.155562,-0.084296,0.014715,-0.308804,-0.395352,-0.249243,0.027596,0.258406,0.081308,0.097579,-0.115737,-0.079592,0.297387,-0.076038,-0.360771,-0.286375,-0.724123,0.07114,-0.309016,0.006405,0.01322,0.229739,-0.088022,-0.225262,-0.165749,0.135165,0.209755,0.205533,-0.32312,-0.058121,0.019925,-0.303965,0.173923,0.139966,0.010905,-0.369434,0.121039,0.02429,0.197776,-0.161918,-0.012186,-0.380398,0.277477,0.091126,0.34391,-0.393993,0.454796,-0.297473,0.119429,0.366957,-0.206039,0.391796,0.218605,-0.041726,-0.014884,-0.263355,0.111518,-0.125657,0.050052,-0.459019,0.533542,-0.034297,0.067583,0.025402,0.392483,0.448027,0.052393,0.451238,0.213111,-0.027179,0.149107,0.576804,0.408064,0.225807,-0.409278,0.203771,-0.066639
4,-0.071212,0.169902,0.045504,0.085141,0.100748,-0.324052,0.131518,0.453935,-0.167459,-0.082431,-0.135394,-0.307722,-0.08731,0.106285,0.044576,-0.136811,0.060254,-0.245725,0.062401,-0.40754,0.083109,0.140211,0.071022,-0.087898,-0.053431,0.00657,-0.176245,-0.237084,-0.149285,0.016705,0.157986,0.045157,0.064308,-0.061481,-0.04536,0.172347,-0.041236,-0.210348,-0.169163,-0.424992,0.044495,-0.187007,0.001705,0.0052,0.134033,-0.053106,-0.136265,-0.098228,0.082379,0.127635,0.123792,-0.195567,-0.03525,0.012872,-0.188047,0.110387,0.084759,0.005219,-0.215142,0.072093,0.013471,0.118768,-0.096339,-0.009851,-0.225598,0.164907,0.048857,0.204354,-0.235323,0.27476,-0.175881,0.071348,0.218799,-0.123974,0.233456,0.130962,-0.026636,-0.013988,-0.160775,0.06617,-0.078298,0.022391,-0.272981,0.31121,-0.023196,0.043539,0.015343,0.229452,0.267015,0.023474,0.266007,0.124463,-0.023728,0.085773,0.346644,0.241635,0.133123,-0.242345,0.117546,-0.036514


In [120]:
y = df['output']

In [121]:
y

Unnamed: 0,output
0,1
1,1
2,0
3,1
4,1
...,...
5559,0
5560,1
5561,1
5562,1


In [129]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=15)

In [130]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(f'Accuracy score: {accuracy_score(y_test, y_pred)}')

Accuracy score: 0.970873786407767


In [131]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.81      0.88       108
           1       0.97      0.99      0.98       716

    accuracy                           0.97       824
   macro avg       0.96      0.90      0.93       824
weighted avg       0.97      0.97      0.97       824

