# Deep NN

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

keras = tf.keras

print("Tensorflow Version: %s" % tf.__version__)

Tensorflow Version: 2.0.0-beta0


In [2]:
# commons package
import os
import sys
import importlib

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import commons as cm
importlib.reload(cm)

<module 'commons' from 'D:\\Tom\\Documents\\gitworkspace\\master\\ml-probability\\tfp_word_embeddings\\commons.py'>

In [3]:
# imdb data
imdb = cm.load_imdb()
(x_train, y_train), (x_test, y_test) = imdb
word_index = cm.WordIndex()

In [4]:
# GLOVE Word Embedding
GLOVE_DIR = "D:/google drive/haw/master/mastertheisis/hauptprojekt"
EMBEDDING_DIM = 50
embedding_index = cm.load_glove_embedding(GLOVE_DIR, EMBEDDING_DIM)

Found 400000 word vectors.


In [5]:
(embedding_matrix, unknown_words) = word_index.match_glove(embedding_index=embedding_index, embedding_dim=EMBEDDING_DIM)

17361/88587 unknown words


In [6]:
# raw data representation
print(x_train[0], y_train[0])

[1, 11, 19, 13, 40, 527, 970, 1619, 1382, 62, 455, 4465, 63, 3938, 1, 170, 33, 253, 2, 22, 97, 40, 835, 109, 47, 667, 22662, 6, 32, 477, 281, 2, 147, 1, 169, 109, 164, 21628, 333, 382, 36, 1, 169, 4533, 1108, 14, 543, 35, 10, 444, 1, 189, 47, 13, 3, 144, 2022, 16, 11, 19, 1, 1917, 4610, 466, 1, 19, 68, 84, 9, 13, 40, 527, 35, 73, 12, 10, 1244, 1, 19, 14, 512, 14, 9, 13, 623, 15, 19190, 2, 59, 383, 9, 5, 313, 5, 103, 2, 1, 2220, 5241, 13, 477, 63, 3782, 30, 1, 127, 9, 13, 35, 616, 2, 22, 121, 48, 33, 132, 45, 22, 1412, 30, 3, 19, 9, 212, 25, 74, 49, 2, 11, 404, 13, 79, 10308, 5, 1, 104, 114, 5949, 12, 253, 1, 31047, 4, 3763, 2, 720, 33, 68, 40, 527, 473, 23, 397, 314, 43, 4, 1, 12115, 1026, 10, 101, 85, 1, 378, 12, 294, 95, 29, 2068, 53, 23, 138, 3, 191, 7483, 15, 1, 223, 19, 18, 131, 473, 23, 477, 2, 141, 27, 5532, 15, 48, 33, 25, 221, 89, 22, 101, 1, 223, 62, 13, 35, 1331, 85, 9, 13, 280, 2, 13, 4469, 110, 100, 29, 12, 13, 5342, 16, 175, 29] 1


In [8]:
# parsed sentence
print(word_index.vec2sentence(x_train[0]))

<START> this film was just brilliant casting location scenery story direction everyone's really suited <START> part they played <UNK> you could just imagine being there robert redford's is an amazing actor <UNK> now <START> same being director norman's father came from <START> same scottish island as myself so i loved <START> fact there was a real connection with this film <START> witty remarks throughout <START> film were great it was just brilliant so much that i bought <START> film as soon as it was released for retail <UNK> would recommend it to everyone to watch <UNK> <START> fly fishing was amazing really cried at <START> end it was so sad <UNK> you know what they say if you cry at a film it must have been good <UNK> this definitely was also congratulations to <START> two little boy's that played <START> part's of norman <UNK> paul they were just brilliant children are often left out of <START> praising list i think because <START> stars that play them all grown up are such a big

In [6]:
# max Input Length
max_length = cm.get_max_length(x_train, x_test)

In [7]:
# pad input vectors
x_train_padded = cm.pad_input(x_train, max_length)
x_test_padded = cm.pad_input(x_test, max_length)
x_train_padded[0]

array([ 1, 11, 19, ...,  0,  0,  0])

In [8]:
embedding_layer = keras.layers.Embedding(len(word_index.index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=max_length,
                            trainable=False)

model = keras.Sequential([
    embedding_layer,
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Flatten(),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 2697, 50)          4429400   
_________________________________________________________________
dense (Dense)                (None, 2697, 256)         13056     
_________________________________________________________________
dense_1 (Dense)              (None, 2697, 128)         32896     
_________________________________________________________________
dense_2 (Dense)              (None, 2697, 128)         16512     
_________________________________________________________________
flatten (Flatten)            (None, 345216)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 345217    
Total params: 4,837,081
Trainable params: 407,681
Non-trainable params: 4,429,400
________________________________________

In [13]:
tb_callback = keras.callbacks.TensorBoard(log_dir='logs') 

model.fit(x_train_padded, y_train, validation_split=0.05, epochs=2, callbacks=[tb_callback])
loss, accuracy = model.evaluate(x_train_padded, y_train)

Train on 23750 samples, validate on 1250 samples
Epoch 1/2
Epoch 2/2


In [14]:
def test_model(x, y):
    test_result = np.round(model.predict(x))
    test_errors = np.squeeze(test_result) != y
    correct_percentage = np.sum(test_errors) / len(y)
    print("%i / %i (%.2f%%) are correct" % (len(y) - np.sum(test_errors), len(y), 100 * (1 - correct_percentage)))

In [15]:
test_model(x_test_padded, y_test)

21007 / 25000 (84.03%) are correct


In [16]:
sentences = [
    "this was a very bad movie",
    "this was a very good movie",
    "I did not like this movie at all",
    "I hope there will be a sequal",
    "not bad",
    "bad",
    "not good",
    "one of the best movies of the year",
    "the first part was bad but the second part got better",
    "the first part was not bad but after that it just got worse",
    "this film was just brilliant casting location scenery story direction everyone's really suited part they played you could just imagine being there robert redford's is an amazing actor now same being director norman's father came from same scottish island as myself so i loved fact there was a real connection with this film witty remarks throughout film were great it was just brilliant so much that i bought"
]

rating = cm.Rating(word_index, model)
rating.print(rating.of(sentences))

⭐⭐⭐⭐ (38.17%)
this was a very bad movie

⭐⭐⭐⭐⭐⭐⭐ (70.54%)
this was a very good movie

⭐⭐⭐⭐⭐⭐ (62.31%)
I did not like this movie at all

⭐⭐⭐⭐⭐ (45.38%)
I hope there will be a sequal

⭐⭐⭐ (28.62%)
not bad

⭐⭐⭐⭐⭐ (52.00%)
bad

⭐⭐⭐⭐⭐⭐ (59.55%)
not good

⭐⭐⭐⭐⭐⭐⭐ (67.32%)
one of the best movies of the year

⭐⭐⭐ (29.83%)
the first part was bad but the second part got better

⭐ (12.60%)
the first part was not bad but after that it just got worse

⭐⭐⭐⭐⭐⭐⭐⭐⭐⭐ (99.61%)
this film was just brilliant casting location scenery story direction everyone's really suited part they played you could just imagine being there robert redford's is an amazing actor now same being director norman's father came from same scottish island as myself so i loved fact there was a real connection with this film witty remarks throughout film were great it was just brilliant so much that i bought

