# Keras RNN

This is a walkthrough of how to make a simple Recurrent Neural Net using Keras

### Import dependencies

In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.datasets import mnist
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Data Cleanup

### Set our hyperparameters

In [2]:
# length of input vector
batch_size = 128
# number of epochs trained
nb_epoch = 100

### Define our possible outcomes

In [3]:
# possible outcomes
nb_classes = 10

### Import our data

In [4]:
# Load the MNIST dataset, extract out into tuples
# we organize it into training and testing sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
# X_train vector has 60k samples of 28x28 (image width * height)
# X_train.shape = (60000, 28, 28) which means it is a 3D tensor
# but practically it is 60k rows of 2D matrix pixel images
X_train.shape

(60000, 28, 28)

### Reshape inputs as a 1D vector

In [6]:
# we reshape the 3D Tensor to a 2D Matrix of 1D vectors (28x28=784)
X_train = X_train.reshape(60000, 784)
# X_train.shape = (60000, 784)
X_train.shape

(60000, 784)

In [7]:
# X_test is similar, but it has only 10k samples
X_test.shape

(10000, 28, 28)

In [8]:
# but we also reshape it into a 2D matrix
X_test = X_test.reshape(10000, 784)
X_test.shape

(10000, 784)

In [9]:
# lets look at the datatype of our data - `uint8`
X_train.dtype

dtype('uint8')

In [10]:
# lets change our data's type to be a number - `float32`
# because matrix values must be numbers
X_train = X_train.astype('float32')
X_train.dtype

# repeat for X_test inputs
X_test = X_test.astype('float32')
X_test.dtype

dtype('float32')

In [11]:
# ----- Not sure what this does?? Couldnt see a difference before vs after
X_train /= 255
X_test /= 255

X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [12]:
# the labels are 1D vectors
y_train.shape
y_test.shape

# containing scaler values
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [13]:
# .to_categgorical() converts our scaler labels into a 1D binary class vector, used for softmax
Y_Train = np_utils.to_categorical(y_train, nb_classes)
Y_Test = np_utils.to_categorical(y_test, nb_classes)

# we have converted scalers into 1D vectors
# (60000,) -> (60000, 10)
Y_Train.shape

(60000, 10)

## The Neural Net Architecture

In [14]:
# Multilayer Perceptron model
# the simpliest neural net model
# it is instantiated from Keras.models.Sequential
# Sequential() is a linearly connected neural net
model = Sequential()

# lets look at our neural net so far
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


In [15]:
# add the first layer to the model
# it will be a `Dense` layer, which means every node is connected to the next
model.add(Dense(
    units=625,
    input_dim=784,
    kernel_initializer='random_normal',
    activation='sigmoid'
))
# Dense.units -> dimensionality of the output space
# Dense.input_dim -> dimensionality of the input vector
# Dense.kernel_initializer -> how the layer's kernal should start out
# Dense.activation -> the nonlinearity applied

# ----- why did we pick units=625 ?

# lets look at our neural net so far
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 625)               490625    
Total params: 490,625
Trainable params: 490,625
Non-trainable params: 0
_________________________________________________________________


In [16]:
# lets add one more dense layer
# notice how the input_dim=625 and the output units=625
model.add(Dense(units=625, input_dim=625, kernel_initializer='normal', activation='sigmoid'))

In [17]:
# and then end off with a final softmax layer
# notice the input_dim=625 but the output units=10
# also notice how our non-linearity is softmax this time
model.add(Dense(units=10, input_dim=625, kernel_initializer='normal', activation='softmax'))

# lets look at our neural net so far
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 625)               490625    
_________________________________________________________________
dense_2 (Dense)              (None, 625)               391250    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                6260      
Total params: 888,135
Trainable params: 888,135
Non-trainable params: 0
_________________________________________________________________


In [18]:
# now we're ready to compile our model
# choose which settings to use
model.compile(
    optimizer=SGD(lr=0.05),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
# optimizer=SGD(lr=0.05) means use Stoichastic Gradient Descent with a learning rate of 5%
# loss='categorical_crossentropy' is the chosen loss metric we want to minimize
# metrics=['accuracy'] means the performance of our model is being judged based on how accurate its predictions are

# now lets look at our model
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 625)               490625    
_________________________________________________________________
dense_2 (Dense)              (None, 625)               391250    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                6260      
Total params: 888,135
Trainable params: 888,135
Non-trainable params: 0
_________________________________________________________________


## The Training Phase

In [19]:
# Fit our model on the training data
# we pass in our training input, X_train.shape = (60000, 784)
# we pass in our training labels, Y_Train.shape = (60000, 10)
history = model.fit(X_train, Y_Train, epochs=nb_epoch, batch_size=batch_size, verbose=1)

# nb_epoch = # of epochs to train for. each epoch trains over all X_train
# batch_size = # of images in each training batch. multiple batches add up to X_train in 1 epoch
# verbose = show logged output of training progress

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


## The Validation Phase

In [20]:
# Now that our neural nets have been trained, we can validate their accuracy on the test set
# X_test.shape=(10000, 784) and Y_Test.shape=(10000, 10)
evaluation = model.evaluate(X_test, Y_Test, verbose=1)

# lets view the results of validation test. was our model trained accurately?
print('Summary: Loss over the test dataset: %.2f, Accuracy: %.2f' % (evaluation[0], evaluation[1]))

Summary: Loss over the test dataset: 0.12, Accuracy: 0.96


## Done