Repeat steps until model training

In [2]:
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from keras.utils.np_utils import to_categorical
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Flatten
from keras import optimizers
from keras.constraints import maxnorm


import re

Using TensorFlow backend.


In [3]:
dataset = pd.read_csv('spam.csv', encoding='latin-1')

In [4]:
dataset.drop(['Unnamed: 2','Unnamed: 3','Unnamed: 4'],axis=1,inplace=True)

In [5]:
dataset['spam_or_ham']=dataset.spam_or_ham.str.strip()
dataset['message']=dataset.message.str.strip()
dataset['message']=dataset.message.str.lower()

In [6]:
maximum_number_of_features = 1500
tokenizer = Tokenizer(num_words=maximum_number_of_features, split=' ')
tokenizer.fit_on_texts(dataset['message'].values)
X = tokenizer.texts_to_sequences(dataset['message'].values)

In [7]:
X = pad_sequences(X)

In [8]:
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(dataset['spam_or_ham'])
Y = to_categorical(integer_encoded)

In [9]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(3733, 172) (3733, 2)
(1839, 172) (1839, 2)


LSTM Model training:
1. Create a Sequential model in which sequential model can add several layers to the model.
2. Add LSTM Layer.
2. Add several layers like Embedding with and SpatialDropout1D.

In [None]:
model = Sequential()
model.add(Embedding(1500, 128,input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(196, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2,activation='sigmoid'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])

print(model.summary())

W0722 21:30:11.275977  3380 deprecation_wrapper.py:119] From C:\Users\kaphc\Anaconda3\envs\python3.6\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0722 21:30:11.319418  3380 deprecation_wrapper.py:119] From C:\Users\kaphc\Anaconda3\envs\python3.6\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0722 21:30:11.325401  3380 deprecation_wrapper.py:119] From C:\Users\kaphc\Anaconda3\envs\python3.6\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0722 21:30:11.385217  3380 deprecation_wrapper.py:119] From C:\Users\kaphc\Anaconda3\envs\python3.6\lib\site-packages\keras\backend\tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_defa

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 172, 128)          192000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 172, 128)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 196)               254800    
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 394       
Total params: 447,194
Trainable params: 447,194
Non-trainable params: 0
_________________________________________________________________
None


Fit the model

In [None]:
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs = 16, batch_size=100, verbose = 2)

W0722 21:30:12.989036  3380 deprecation.py:323] From C:\Users\kaphc\Anaconda3\envs\python3.6\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 3733 samples, validate on 1839 samples
Epoch 1/16
 - 62s - loss: 0.4176 - acc: 0.8602 - val_loss: 0.1901 - val_acc: 0.9342
Epoch 2/16
 - 60s - loss: 0.0897 - acc: 0.9721 - val_loss: 0.0662 - val_acc: 0.9821
Epoch 3/16
 - 61s - loss: 0.0403 - acc: 0.9879 - val_loss: 0.0576 - val_acc: 0.9842
Epoch 4/16
 - 61s - loss: 0.0294 - acc: 0.9922 - val_loss: 0.0599 - val_acc: 0.9831
Epoch 5/16
 - 61s - loss: 0.0180 - acc: 0.9954 - val_loss: 0.0602 - val_acc: 0.9848
Epoch 6/16
 - 55s - loss: 0.0095 - acc: 0.9981 - val_loss: 0.0642 - val_acc: 0.9831
Epoch 7/16
 - 58s - loss: 0.0127 - acc: 0.9968 - val_loss: 0.0611 - val_acc: 0.9831
Epoch 8/16


Find accuracy

In [None]:
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=40)
print(score)
print(acc)