# Lesson 10
# Peter Lorenz

## 0. Preparation
Import libraries:

In [14]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences

Set global options:

In [3]:
# Display multiple cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Suppress scientific notation
np.set_printoptions(suppress=True)
np.set_printoptions(precision=3)

Set constants:

In [12]:
# Declare constants
EMBEDDING_VECOR_LENGTH = 250
NUM_WORDS = 5000
MAX_LEN = 500
MAX_REVIEW_LENGTH = 300

## 1. Read data set into training and testing
In this section we read the Reuters data set into training and test data sets:

In [5]:
# Deal with pickle bug
# See https://stackoverflow.com/questions/57176714/how-to-fix-error-when-load-dataset-in-keras
old = np.load
np.load = lambda *a,**k: old(*a,allow_pickle=True)

# Load data set
(X_train, y_train), (X_test, y_test) = \
    tf.keras.datasets.reuters.load_data(
        path='reuters.npz', num_words=NUM_WORDS, skip_top=10, maxlen=MAX_LEN, 
        test_split=0.2, seed=1)

# Restore numpy load
np.load = old
del(old)

Examine the shape of the data:

In [6]:
# Examine data
X_train.shape
X_test.shape
y_train.shape
y_test.shape

(8621,)

(2156,)

(8621,)

(2156,)

## 2. Prepare the data set
In this section we prepare the data set. We begin by padding the sequences in the training and test data:

In [9]:
# Pad data
x_train_padded = pad_sequences(X_train, maxlen = MAX_REVIEW_LENGTH)
x_test_padded = pad_sequences(X_test, maxlen = MAX_REVIEW_LENGTH)

Next we verify the shape of the data:

In [11]:
# Display shape
x_train_padded.shape
x_test_padded.shape

(8621, 300)

(2156, 300)

As a sanity check we examine the padded data itself:

In [10]:
x_train_padded[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

With the data prepared, we can now build our models.

## 3. Build and compile 3 different models
In this section we build and compile 3 different models using Keras LTSM, ideally improving model at each iteration.

### Model 1
Our first model is a basic recurrent neural network with a single hidden layer:

In [22]:
# Build model
model = keras.models.Sequential()
model.add(keras.layers.Embedding(NUM_WORDS, EMBEDDING_VECOR_LENGTH, 
                                 input_length = MAX_REVIEW_LENGTH))
model.add(keras.layers.LSTM(32))
model.add(keras.layers.Dense(46, activation = 'sigmoid'))
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', 
              metrics = ['accuracy'])
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 300, 250)          1250000   
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                36224     
_________________________________________________________________
dense_2 (Dense)              (None, 46)                1518      
Total params: 1,287,742
Trainable params: 1,287,742
Non-trainable params: 0
_________________________________________________________________
None


Now we train our model:

In [23]:
model.fit(x_train_padded, y_train, validation_data = (x_test_padded, y_test), 
          epochs = 2, batch_size = 128)

Train on 8621 samples, validate on 2156 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0xed821d7148>

With the model trained, we check the accuracy:

In [24]:
# Evaluate model
scores = model.evaluate(x_test_padded, y_test)
print("Accuracy: {:.2f}%".format(scores[1]*100))

Accuracy: 37.62%


The accuracy of our initial model is rather poor with just 38% accuracy.

### Model 2
Next we try a second model with an additional hidden layer:

In [26]:
# Build model
model2 = keras.models.Sequential()
model2.add(keras.layers.Embedding(NUM_WORDS, EMBEDDING_VECOR_LENGTH, 
                                  input_length = MAX_REVIEW_LENGTH))
model2.add(keras.layers.LSTM(32))
model2.add(keras.layers.Dense(92, activation = 'relu'))
model2.add(keras.layers.Dense(46, activation = 'sigmoid'))
model2.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', 
               metrics = ['accuracy'])
print(model2.summary())

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 300, 250)          1250000   
_________________________________________________________________
lstm_5 (LSTM)                (None, 32)                36224     
_________________________________________________________________
dense_3 (Dense)              (None, 92)                3036      
_________________________________________________________________
dense_4 (Dense)              (None, 46)                4278      
Total params: 1,293,538
Trainable params: 1,293,538
Non-trainable params: 0
_________________________________________________________________
None


Now we train the model:

In [27]:
model2.fit(x_train_padded, y_train, validation_data = (x_test_padded, y_test), 
           epochs = 2, batch_size = 128)

Train on 8621 samples, validate on 2156 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0xed826b7408>

Finally, we check the accuracy:

In [28]:
# Evaluate model
scores = model2.evaluate(x_test_padded, y_test)
print("Accuracy: {:.2f}%".format(scores[1]*100))

Accuracy: 37.62%


Adding an additional hidden layer did not improve the model accuracy, which remains at about 38%.

## Model 3
We now try a third model by experimenting with various hyperparameters:

## 4. Describe and explain your findings
In this assignment ...