# NN with dropout

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

keras = tf.keras

print("Tensorflow Version: %s" % tf.__version__)

Tensorflow Version: 1.13.1


In [2]:
# commons package
import os
import sys
import importlib

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import commons as cm
importlib.reload(cm)

<module 'commons' from 'D:\\Tom\\Documents\\gitworkspace\\master\\ml-probability\\tfp_word_embeddings\\commons.py'>

In [3]:
# imdb data
imdb = cm.load_imdb()
(x_train, y_train), (x_test, y_test) = imdb
word_index = cm.WordIndex()

In [4]:
# GLOVE Word Embedding
GLOVE_DIR = "D:/google drive/haw/master/mastertheisis/hauptprojekt"
EMBEDDING_DIM = 50
embedding_index = cm.load_glove_embedding(GLOVE_DIR, EMBEDDING_DIM)

Found 400000 word vectors.


In [5]:
(embedding_matrix, unknown_words) = word_index.match_glove(embedding_index=embedding_index, embedding_dim=EMBEDDING_DIM)

17361/88587 unknown words


In [6]:
max_length = cm.get_max_length(x_train, x_test)

In [7]:
# pad input vectors
x_train_padded = cm.pad_input(x_train, max_length)
x_test_padded = cm.pad_input(x_test, max_length)
x_train_padded[0]

array([ 1, 11, 19, ...,  0,  0,  0])

In [9]:
# https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/

embedding_layer = keras.layers.Embedding(len(word_index.index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=max_length,
                            trainable=False)

model = keras.Sequential([
    embedding_layer,
    keras.layers.Dropout(0.25),
    keras.layers.Conv1D(64, 15, activation="relu"),
    keras.layers.Dropout(0.25),
    keras.layers.Conv1D(64, 5, activation="relu"),
    keras.layers.MaxPooling1D(10),
    keras.layers.Dropout(0.25),
    keras.layers.Flatten(),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.25),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 2697, 50)          4429400   
_________________________________________________________________
dropout_5 (Dropout)          (None, 2697, 50)          0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 2683, 64)          48064     
_________________________________________________________________
dropout_6 (Dropout)          (None, 2683, 64)          0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 2679, 64)          20544     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 267, 64)           0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 267, 64)           0         
__________

In [10]:
tb_callback = keras.callbacks.TensorBoard(
    log_dir='./logs', 
    histogram_freq=0,
    write_graph=True, 
    write_images=True) 

model.fit(x_train_padded, y_train, validation_split=0.05, epochs=8, callbacks=[tb_callback])
loss, accuracy = model.evaluate(x_train_padded, y_train)

Train on 23750 samples, validate on 1250 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [11]:
def test_model(x, y):
    test_result = np.round(model.predict(x))
    test_errors = np.squeeze(test_result) != y
    correct_percentage = np.sum(test_errors) / len(y)
    print("%i / %i (%.2f%%) are correct" % (len(y) - np.sum(test_errors), len(y), 100 * (1 - correct_percentage)))

In [12]:
test_model(x_test_padded, y_test)
test_model(x_train_padded, y_train)

21097 / 25000 (84.39%) are correct
22205 / 25000 (88.82%) are correct


In [13]:
sentences = [
    "this was a very bad movie",
    "this was a very good movie",
    "I did not like this movie at all",
    "I hope there will be a sequal",
    "not bad",
    "bad",
    "not good",
    "one of the best movies of the year",
    "the first part was bad but the second part got better",
    "the first part was not bad but after that it just got worse",
    "this film was just brilliant casting location scenery story direction everyone's really suited part they played you could just imagine being there robert redford's is an amazing actor now same being director norman's father came from same scottish island as myself so i loved fact there was a real connection with this film witty remarks throughout film were great it was just brilliant so much that i bought"
]

rating = cm.Rating(word_index, model)
rating.print(rating.of(sentences))

⭐ (6.62%)
this was a very bad movie

⭐⭐⭐⭐⭐⭐⭐⭐⭐ (94.83%)
this was a very good movie

⭐⭐⭐⭐⭐⭐⭐ (67.09%)
I did not like this movie at all

⭐⭐⭐⭐⭐⭐ (60.73%)
I hope there will be a sequal

⭐⭐⭐⭐⭐⭐ (57.35%)
not bad

⭐⭐⭐⭐⭐⭐ (58.33%)
bad

⭐⭐⭐⭐⭐⭐⭐ (70.76%)
not good

⭐⭐⭐⭐⭐⭐⭐⭐ (79.93%)
one of the best movies of the year

⭐⭐⭐ (29.11%)
the first part was bad but the second part got better

⭐⭐ (21.82%)
the first part was not bad but after that it just got worse

⭐⭐⭐⭐⭐⭐⭐⭐⭐⭐ (96.65%)
this film was just brilliant casting location scenery story direction everyone's really suited part they played you could just imagine being there robert redford's is an amazing actor now same being director norman's father came from same scottish island as myself so i loved fact there was a real connection with this film witty remarks throughout film were great it was just brilliant so much that i bought

