In [44]:
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense, Dropout, Convolution1D
from keras.backend import clear_session
from keras_tqdm import TQDMNotebookCallback

from vaiutils import add_nb_path

In [2]:
vocab_size = 5000
seq_len = 500
(x_train, y_train), (x_test, y_test) = imdb.load_data(start_char=None, oov_char=None, index_from=0)

In [3]:
idx = imdb.get_word_index()
idx_arr = sorted(idx, key=idx.get)

In [4]:
idx2word = {v:k for k,v in idx.items()}
idx2word[0] = ''

In [5]:
x_train = pad_sequences([np.array([i if i < vocab_size - 1 else vocab_size - 1 for i in s]) for s in x_train], maxlen=seq_len)
x_test = pad_sequences([np.array([i if i < vocab_size - 1 else vocab_size - 1 for i in s]) for s in x_test], maxlen=seq_len)

In [36]:
clear_session()
model = Sequential([Embedding(vocab_size, 32, input_length=seq_len),
                    Flatten(),
                   Dense(100, activation='relu'),
                   Dropout(0.7),
                   Dense(1, activation='sigmoid')])
model.compile('adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
flatten_1 (Flatten)          (None, 16000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               1600100   
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 1,760,201
Trainable params: 1,760,201
Non-trainable params: 0
_________________________________________________________________


In [13]:
def fit_model(epochs=1, lr=1e-3):
    model.optimizer.lr = lr
    model.fit(x_train, y_train, batch_size=64, epochs=epochs, verbose=0, callbacks=[TQDMNotebookCallback()], validation_data=(x_test, y_test))

In [37]:
fit_model()




In [42]:
clear_session()
model = Sequential([Embedding(vocab_size, 32, input_length=seq_len),
                   Dropout(0.2),
                    Convolution1D(64, 5, strides=2, padding='same', activation='relu'),
                    Flatten(),
                    Dropout(0.2),
                   Dense(100, activation='relu'),
                   Dropout(0.7),
                   Dense(1, activation='sigmoid')])
model.compile('adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
dropout_1 (Dropout)          (None, 500, 32)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 250, 64)           10304     
_________________________________________________________________
flatten_1 (Flatten)          (None, 16000)             0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 16000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               1600100   
_________________________________________________________________
dropout_3 (Dropout)          (None, 100)               0         
__________

In [43]:
fit_model(2)




In [56]:
DIR_NB = add_nb_path('')
model.save_weights(DIR_NB + '/output/Sentiments/conv.h5')