In [10]:
import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model
from keras.layers import Dense, Flatten, Dropout, Embedding, Conv1D, SpatialDropout1D, GlobalMaxPool1D, SimpleRNN, LSTM, MaxPooling1D
from keras.layers import Input, concatenate
from keras.layers.wrappers import Bidirectional
from keras.callbacks import ModelCheckpoint

import os
from sklearn.metrics import roc_auc_score
import matplotlib.pylab as plt
%matplotlib inline

In [3]:
output_dir = 'model_output/dense'
output_dir_conv = 'model_output/conv'
output_dir_rnn = 'model_output/rnn'
output_dir_lstm = 'model_output/lstm'

epochs = 4
batch_size = 128

n_dim = 64
n_unique_words = 10000
n_words_toskip = 50
max_review_length = 100
pad_type = trunc_type = 'pre'
drop_embed = 0.2

n_dense = 256
dropout = 0.2

n_conv = 256
k_conv = 3

n_rnn = 256
k_conv = 3

n_lstm = 256
drop_lstm = 0.2

### Loading data 

In [4]:
(x_train,y_train),(x_valid,y_valid) = imdb.load_data(num_words=n_unique_words)

#### Rstore words from index

In [22]:
word_index = imdb.get_word_index()
word_index ={k:(v+3) for k,v in word_index.items()}
word_index["PAD"] = 0
word_index["START"] = 1
word_index['UNK'] = 2

In [27]:
index_word = {v:k for k,v in word_index.items()}

In [29]:
' '.join([index_word[indx] for indx in x_train[0]])

"UNK UNK UNK UNK UNK brilliant casting location scenery story direction everyone's really suited UNK part UNK played UNK UNK could UNK imagine being there robert UNK UNK UNK amazing actor UNK now UNK same being director UNK father came UNK UNK same scottish island UNK myself UNK UNK loved UNK fact there UNK UNK real connection UNK UNK UNK UNK witty remarks throughout UNK UNK were great UNK UNK UNK brilliant UNK much UNK UNK bought UNK UNK UNK soon UNK UNK UNK released UNK UNK UNK would recommend UNK UNK everyone UNK watch UNK UNK fly UNK UNK amazing really cried UNK UNK end UNK UNK UNK sad UNK UNK know what UNK say UNK UNK cry UNK UNK UNK UNK must UNK been good UNK UNK definitely UNK also UNK UNK UNK two little UNK UNK played UNK UNK UNK norman UNK paul UNK were UNK brilliant children UNK often left UNK UNK UNK UNK list UNK think because UNK stars UNK play them UNK grown up UNK such UNK big UNK UNK UNK whole UNK UNK these children UNK amazing UNK should UNK UNK UNK what UNK UNK done do

In [5]:
x_train = pad_sequences(x_train,maxlen=max_review_length,padding=pad_type,truncating=trunc_type)
x_valid = pad_sequences(x_valid,maxlen=max_review_length,padding=pad_type,truncating=trunc_type)


In [35]:
for rev in x_train[:6]:
    print(rev)

[1415    2    2    2    2  215    2   77   52    2    2  407    2   82
    2    2    2  107  117    2    2  256    2    2    2 3766    2  723
    2   71    2  530  476    2  400  317    2    2    2    2 1029    2
  104   88    2  381    2  297   98    2 2071   56    2  141    2  194
    2    2    2  226    2    2  134  476    2  480    2  144    2    2
    2   51    2    2  224   92    2  104    2  226   65    2    2 1334
   88    2    2  283    2    2 4472  113  103    2    2    2    2    2
  178    2]
[ 163    2 3215    2    2 1153    2  194  775    2    2    2  349 2637
  148  605    2    2    2  123  125   68    2    2    2  349  165 4362
   98    2    2  228    2    2    2 1157    2  299  120    2  120  174
    2  220  175  136   50    2 4373  228    2    2    2  656  245 2350
    2    2    2  131  152  491    2    2    2    2 1212    2    2    2
  371   78    2  625   64 1382    2    2  168  145    2    2 1690    2
    2    2 1355    2    2    2   52  154  462    2   89   78  285

### NN architecture

In [45]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(Flatten())
model.add(Dense(n_dense,activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(1,activation='sigmoid'))

In [46]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 100, 64)           320000    
_________________________________________________________________
flatten_3 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                409664    
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 65        
Total params: 729,729
Trainable params: 729,729
Non-trainable params: 0
_________________________________________________________________


In [53]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])

modelchekpoints = ModelCheckpoint(filepath=output_dir+'weights.{epoch:02d}.hdf5')

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [54]:
model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(x_valid,y_valid),callbacks=[modelchekpoints])

Train on 25000 samples, validate on 25000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1a3e684be0>

In [75]:
model.load_weights('model_output'+'/denseweights.02.hdf5')

In [78]:
y_hat = model.predict_proba(x_valid)

roc_auc_score(y_valid,y_hat)

In [7]:
SpatialDropout1D?

[0;31mInit signature:[0m [0mSpatialDropout1D[0m[0;34m([0m[0mrate[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Spatial 1D version of Dropout.

This version performs the same function as Dropout, however it drops
entire 1D feature maps instead of individual elements. If adjacent frames
within feature maps are strongly correlated (as is normally the case in
early convolution layers) then regular dropout will not regularize the
activations and will otherwise just result in an effective learning rate
decrease. In this case, SpatialDropout1D will help promote independence
between feature maps and should be used instead.

# Arguments
    rate: float between 0 and 1. Fraction of the input units to drop.

# Input shape
    3D tensor with shape:
    `(samples, timesteps, channels)`

# Output shape
    Same as input

# References
    - [Efficient Object Localization Using Convolutional Networks](https://arxiv.org/abs/1411.4280)
[0;31mFi

# Conv

In [9]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
#model.add(Flatten())
model.add(Conv1D(n_conv,k_conv, activation='relu'))
model.add(GlobalMaxPool1D())
model.add(Dense(n_dense,activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(1,activation='sigmoid'))

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 400, 64)           320000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 400, 64)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 398, 256)          49408     
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total para

In [14]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
modelchekpoints = ModelCheckpoint(filepath=output_dir_conv+'weights.{epoch:02d}.hdf5')
if not os.path.exists(output_dir_conv):
    os.makedirs(output_dir_conv)

In [15]:
model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(x_valid,y_valid),callbacks=[modelchekpoints])

Train on 25000 samples, validate on 25000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1a30a7f198>

In [16]:
y_hat = model.predict_proba(x_valid)
roc_auc_score(y_valid,y_hat)

0.9606502368000001

## RNN

In [11]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(SimpleRNN(n_rnn, dropout=dropout))
model.add(Dense(1,activation='sigmoid'))

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 100, 64)           640000    
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, 100, 64)           0         
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 256)               82176     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 722,433
Trainable params: 722,433
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
modelchekpoints = ModelCheckpoint(filepath=output_dir_rnn+'weights.{epoch:02d}.hdf5')
if not os.path.exists(output_dir_rnn):
    os.makedirs(output_dir_rnn)

In [14]:
model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(x_valid,y_valid),callbacks=[modelchekpoints])

Train on 25000 samples, validate on 25000 samples
Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


<keras.callbacks.History at 0x1a40ffb240>

In [20]:
model.load_weights('model_output'+'/rnnweights.08.hdf5')

In [21]:
y_hat = model.predict_proba(x_valid)
roc_auc_score(y_valid,y_hat)

0.8088009312000001

In [17]:
!ls model_output/

[34mconv[m[m                 denseweights.04.hdf5 rnnweights.08.hdf5
convweights.01.hdf5  [34mrnn[m[m                  rnnweights.09.hdf5
convweights.02.hdf5  rnnweights.01.hdf5   rnnweights.10.hdf5
convweights.03.hdf5  rnnweights.02.hdf5   rnnweights.11.hdf5
convweights.04.hdf5  rnnweights.03.hdf5   rnnweights.12.hdf5
[34mdense[m[m                rnnweights.04.hdf5   rnnweights.13.hdf5
denseweights.01.hdf5 rnnweights.05.hdf5   rnnweights.14.hdf5
denseweights.02.hdf5 rnnweights.06.hdf5   rnnweights.15.hdf5
denseweights.03.hdf5 rnnweights.07.hdf5   rnnweights.16.hdf5


### LSTM

In [7]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(LSTM(n_lstm, dropout=drop_lstm))
model.add(Dense(1,activation='sigmoid'))

In [8]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 100, 64)           640000    
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 100, 64)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               328704    
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 257       
Total params: 968,961
Trainable params: 968,961
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
modelchekpoints = ModelCheckpoint(filepath=output_dir_lstm+'weights.{epoch:02d}.hdf5')
if not os.path.exists(output_dir_lstm):
    os.makedirs(output_dir_lstm)

In [10]:
model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(x_valid,y_valid),callbacks=[modelchekpoints])

Train on 25000 samples, validate on 25000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1a2a0b3240>

In [11]:
y_hat = model.predict_proba(x_valid)
roc_auc_score(y_valid,y_hat)

0.9242907360000001

### Bidirectional LSTM

In [5]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm)))
model.add(Dense(1,activation='sigmoid'))

NameError: name 'n_unique_words' is not defined

### Stacked LSTM

In [None]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm,return_sequences=True)))
model.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm)))
model.add(Dense(1,activation='sigmoid'))

### Convolution-LSTM stacking

In [None]:
model = Sequential()
model.add(Embedding(n_unique_words,n_dim,input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(Conv1D(n_conv,k_conv, activation='relu'))
model.add(MaxPooling1D(mp_size))
model.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm)))
model.add(Dense(1,activation='sigmoid'))

### Multi-convnet

In [22]:
input_layer = Input(shape=(100,),dtype='int16',name='input')
embedding_layer = Embedding(5000, 128, input_length=100,name='embedding')(input_layer)
drop_embed_layer = SpatialDropout1D(0.2,name='drop_embed')(embedding_layer)



conv_1 = Conv1D(256,3,activation='relu',name='conv_1')(drop_embed_layer)
maxp_1 = GlobalMaxPool1D(name='maxp_1')(conv_1)

conv_2 = Conv1D(256,3,activation='relu',name='conv_2')(drop_embed_layer)
maxp_2 = GlobalMaxPool1D(name='maxp_2')(conv_2)

conv_3 = Conv1D(256,3,activation='relu',name='conv_3')(drop_embed_layer)
maxp_3 = GlobalMaxPool1D(name='maxp_3')(conv_3)


concat = concatenate([maxp_1,maxp_2,maxp_3])

dense_layer = Dense(256,activation='relu',name='dense')(concat)
drop_dense = Dropout(0.2, name='drop_dense')(dense_layer)

predictions = Dense(1,activation='sigmoid',name='output')(drop_dense)

model = Model(input_layer, predictions)

In [23]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, 100)          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 100, 128)     640000      input[0][0]                      
__________________________________________________________________________________________________
drop_embed (SpatialDropout1D)   (None, 100, 128)     0           embedding[0][0]                  
__________________________________________________________________________________________________
conv_1 (Conv1D)                 (None, 98, 256)      98560       drop_embed[0][0]                 
__________________________________________________________________________________________________
conv_2 (Co