# keras RNN

In [1]:
from keras.layers import SimpleRNN, Activation, Dense
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.optimizers import Adam

import numpy as np
np.random.seed(2018)

Using TensorFlow backend.


In [2]:
TIME_STEPS = 28     # same as the height of the image
INPUT_SIZE = 28     # same as the width of the image
BATCH_SIZE = 50
BATCH_INDEX = 0
OUTPUT_SIZE = 10  # 10个类
CELL_SIZE = 50    # RNN的hidden_unit
LR = 0.001

## Data Load

In [3]:
# X shape (60,000 28x28), y shape (10,000, )
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# normalize 归一化
# 注意：这里如果不归一化，效果特别差
X_train = X_train.reshape(-1, 28, 28)/255.
X_test = X_test.reshape(-1, 28, 28)/255.

# one-hot 编码
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)

print(X_train[1].shape)
print(y_train[0])

print()
print(X_train.shape)
print(y_train.shape)

(28, 28)
[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]

(60000, 28, 28)
(60000, 10)


## build RNN model

In [4]:
model = Sequential()

## RNN cell

In [5]:
# for batch_input_shape, if using tensorflow as the backend, we have to put None for the batch_size.
# Otherwise, model.evaluate() will get error.

model.add(
    SimpleRNN(
        # Or: input_dim=INPUT_SIZE, input_length=TIME_STEPS,
        # batch_input_shape=(None, TIME_STEPS, INPUT_SIZE),
        batch_input_shape=(None, TIME_STEPS, INPUT_SIZE),
        output_dim=CELL_SIZE,
        # unroll=True,
))



## output layer

In [6]:
model.add(Dense(OUTPUT_SIZE))
model.add(Activation('softmax'))  # 默认的是tanh

## optimizer

In [7]:
adam = Adam(LR)

model.compile(
    optimizer=adam,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

## training

In [8]:
for step in range(4001):
    # data shape = (batch_num, steps, inputs/outputs)
    X_batch = X_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :, :]
    Y_batch = y_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :]
#     print(X_train.shape, X_batch.shape, Y_batch.shape)
    
    cost = model.train_on_batch(X_batch, Y_batch)
    BATCH_INDEX += BATCH_SIZE
    
    if BATCH_INDEX >= X_train.shape[0]:
        BATCH_INDEX = 0 

    if step % 500 == 0:
        cost, accuracy = model.evaluate(X_test, y_test, batch_size=y_test.shape[0], verbose=False)
        print('%4d | test cost: %10f | test accuracy: %10f' %(step, cost, accuracy))

   0 | test cost:   2.325950 | test accuracy:   0.091900
 500 | test cost:   0.580084 | test accuracy:   0.823800
1000 | test cost:   0.430510 | test accuracy:   0.869600
1500 | test cost:   0.354676 | test accuracy:   0.891300
2000 | test cost:   0.351815 | test accuracy:   0.900900
2500 | test cost:   0.283916 | test accuracy:   0.919000
3000 | test cost:   0.250450 | test accuracy:   0.928300
3500 | test cost:   0.236479 | test accuracy:   0.932100
4000 | test cost:   0.229886 | test accuracy:   0.934300
