In [10]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from keras.layers import Embedding
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import pad_sequences
from keras.datasets import imdb
from keras.preprocessing import sequence

In [11]:
unique_words = 5000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=unique_words)

In [12]:
# unique words
len(np.unique(np.hstack(train_data)))

4998

In [13]:
X = np.concatenate((train_data, test_data), axis=0)
y = np.concatenate((train_labels, test_labels), axis=0)
print(X.shape)
print(y.shape)

(50000,)
(50000,)


In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y)
print(X_train.shape)
print(X_test.shape)

(37500,)
(12500,)


In [15]:
strlen = [len(x) for x in X_train]
print("min, max and avg strlen", min(strlen), max(strlen), np.rint(np.sum(strlen)/len(strlen)))

min, max and avg strlen 7 2494 235.0


In [16]:
timesteps = 64
X_train = sequence.pad_sequences(X_train, maxlen=timesteps)
X_test = sequence.pad_sequences(X_test, maxlen=timesteps)

In [17]:
output_dim = 64
model = Sequential()
model.add(Embedding(unique_words, output_dim, input_length=timesteps, trainable=True))
model.add(LSTM(32, activation='relu', return_sequences = True))
model.add(LSTM(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 64, 64)            320000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 64, 32)            12416     
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 340,769
Trainable params: 340,769
Non-trainable params: 0
_________________________________________________________________


In [18]:
import os.path
fname = "my_model_weights.h5"
if os.path.isfile(fname) is True:
    print("Weight File Exist")
    model.load_weights(fname, by_name=True)
else:
    # fit model
    hist = model.fit(X_train, y_train, epochs=100, verbose=2)
    model.save_weights(fname)

Weight File Exist


In [19]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 94.90%


In [20]:
PRINT_NUM = 50

idx = np.random.randint(low=0, high=X_test.shape[0], size=PRINT_NUM)
yhat = model.predict(X_test[idx])
yhat = np.rint(yhat)
yhat = yhat.reshape(PRINT_NUM)
# print(yhat)
# print(y_test[idx])
# digits = np.argmax(yhat, axis=1)
# yhat = int(yhat)
# for i, d in enumerate(digits):

accuracy = np.sum(yhat == y_test[idx])/PRINT_NUM
print("Sample accuracy", accuracy*100.)

Sample accuracy 94.0
