# RNN

In [1]:
%matplotlib inline

from commons import load_glove_embedding, pad_input, load_imdb, get_max_length, WordIndex, Rating

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

keras = tf.keras

print("Tensorflow Version: " + tf.VERSION)

Tensorflow Version: 1.13.1


In [2]:
# imdb data
imdb = load_imdb()

In [3]:
(x_train, y_train), (x_test, y_test) = imdb
word_index = WordIndex()

In [4]:
# GLOVE Word Embedding
GLOVE_DIR = "D:/google drive/haw/master/mastertheisis/hauptprojekt"
EMBEDDING_DIM = 50
embedding_index = load_glove_embedding(GLOVE_DIR, EMBEDDING_DIM)

Found 400000 word vectors.


In [5]:
(embedding_matrix, unknown_words) = word_index.match_glove(embedding_index=embedding_index, embedding_dim=EMBEDDING_DIM)

28437/88587 unknown words


In [8]:
# max Input Length
max_length = get_max_length(x_train, x_test)

In [9]:
# pad input vectors
x_train_padded = pad_input(x_train, max_length)
x_test_padded = pad_input(x_test, max_length)
x_train_padded[0]

array([ 1, 11, 19, ...,  0,  0,  0])

In [27]:
# https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/

embedding_layer = keras.layers.Embedding(len(word_index.index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=max_length,
                            trainable=False)

model = keras.Sequential([
    embedding_layer,
    keras.layers.Bidirectional(keras.layers.LSTM(128, dropout=0.4, recurrent_dropout=0.4, activation='relu', return_sequences=True)),
    keras.layers.Bidirectional(keras.layers.CuDNNLSTM(64, return_sequences = True)),
    keras.layers.Flatten(),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     (None, 2697, 50)          4429400   
_________________________________________________________________
bidirectional_12 (Bidirectio (None, 2697, 256)         183296    
_________________________________________________________________
bidirectional_13 (Bidirectio (None, 2697, 128)         164864    
_________________________________________________________________
flatten_3 (Flatten)          (None, 345216)            0         
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 345217    
Total params: 5,122,777
Trainable params: 693,377
Non-trainable params: 4,429,400
_________________________________________________________________


In [None]:
tb_callback = keras.callbacks.TensorBoard(
    log_dir='./logs', 
    histogram_freq=0,
    write_graph=True, 
    write_images=True) 

model.fit(x_train_padded, y_train, validation_split=0.05, epochs=2, callbacks=[tb_callback])
loss, accuracy = model.evaluate(x_train_padded, y_train)

Train on 23750 samples, validate on 1250 samples
Epoch 1/2
   64/23750 [..............................] - ETA: 4:35:57 - loss: 1.1354 - acc: 0.5000

In [16]:
def test_model(x, y):
    test_result = np.round(model.predict(x))
    test_errors = np.squeeze(test_result) != y
    correct_percentage = np.sum(test_errors) / len(y)
    print("%i / %i (%.2f%%) are correct" % (len(y) - np.sum(test_errors), len(y), 100 * (1 - correct_percentage)))

In [17]:
test_model(x_test_padded, y_test)
test_model(x_train_padded, y_train)

12500 / 25000 (50.00%) are correct
12500 / 25000 (50.00%) are correct


In [18]:
sentences = [
    "this was a very bad movie",
    "this was a very good movie",
    "I did not like this movie at all",
    "I hope there will be a sequal",
    "not bad",
    "bad",
    "not good",
    "one of the best movies of the year",
    "the first part was bad but the second part got better",
    "the first part was not bad but after that it just got worse",
    "this film was just brilliant casting location scenery story direction everyone's really suited part they played you could just imagine being there robert redford's is an amazing actor now same being director norman's father came from same scottish island as myself so i loved fact there was a real connection with this film witty remarks throughout film were great it was just brilliant so much that i bought"
]

rating = Rating(word_index, model)
rating.print(rating.of(sentences, max_length))

⭐⭐⭐⭐⭐ (49.90%)
this was a very bad movie

⭐⭐⭐⭐⭐ (49.90%)
this was a very good movie

⭐⭐⭐⭐⭐ (49.90%)
I did not like this movie at all

⭐⭐⭐⭐⭐ (49.90%)
I hope there will be a sequal

⭐⭐⭐⭐⭐ (49.90%)
not bad

⭐⭐⭐⭐⭐ (49.90%)
bad

⭐⭐⭐⭐⭐ (49.90%)
not good

⭐⭐⭐⭐⭐ (49.90%)
one of the best movies of the year

⭐⭐⭐⭐⭐ (49.90%)
the first part was bad but the second part got better

⭐⭐⭐⭐⭐ (49.90%)
the first part was not bad but after that it just got worse

⭐⭐⭐⭐⭐ (49.90%)
this film was just brilliant casting location scenery story direction everyone's really suited part they played you could just imagine being there robert redford's is an amazing actor now same being director norman's father came from same scottish island as myself so i loved fact there was a real connection with this film witty remarks throughout film were great it was just brilliant so much that i bought

