In [1]:
from argparse import Namespace
import numpy as np

from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Activation
from keras.optimizers import Adam

Using TensorFlow backend.


In [2]:
np.random.seed(1337)

args = Namespace(
    TIME_STEPS = 28,  # same as the height of the image/ rows
    INPUT_SIZE = 28,  # same as the width of the image/ columns
    BATCH_SIZE = 50,  # TODO: fix last epoch < BATCH_SIZE cause error
    BATCH_INDEX = 0,
    OUTPUT_SIZE = 10,
    CELL_SIZE = 50,
    EPOCHES = 4001,
    LR = 1e-3,
)

In [3]:
def load_data(args):
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    
    X_train = X_train.reshape(-1, 28, 28) / 255    # normalize
    X_test = X_test.reshape(-1, 28, 28) / 255
    y_train = np_utils.to_categorical(y_train, num_classes=args.OUTPUT_SIZE)
    y_test = np_utils.to_categorical(y_test, num_classes=args.OUTPUT_SIZE)
    
    return X_train, y_train, X_test, y_test

In [4]:
def build_model(args):
    model = Sequential()
    # RNN cell
    model.add(SimpleRNN(
        batch_input_shape=(args.BATCH_SIZE, args.TIME_STEPS, args.INPUT_SIZE),
        output_dim=args.CELL_SIZE,
    ))
    # output layer
    model.add(Dense(args.OUTPUT_SIZE))
    model.add(Activation('softmax'))    # default tanh: return [-1, 1]
    
    return model

In [5]:
def train(model, args, X_train, y_train, X_test, y_test):
    for step in range(args.EPOCHES):
        # data shape = (batch_size, steps, inputs/outputs)
        X_batch = X_train[args.BATCH_INDEX:args.BATCH_INDEX+args.BATCH_SIZE, :, :]
        y_batch = y_train[args.BATCH_INDEX:args.BATCH_INDEX+args.BATCH_SIZE, :]
        
        cost = model.train_on_batch(X_batch, y_batch)
        
        args.BATCH_INDEX += args.BATCH_SIZE
        args.BATCH_INDEX = 0 if args.BATCH_INDEX >= X_train.shape[0] else args.BATCH_INDEX
        
        if step % 500 == 0:
            cost, acc = model.evaluate(X_test, y_test, batch_size=args.BATCH_SIZE, verbose=False)
            print('test cost: ', cost, 'test accuracy: ', acc)

    return model

In [6]:
X_train, y_train, X_test, y_test = load_data(args)

# optimizer
adam = Adam(args.LR)
model = build_model(args)
model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model = train(model, args, X_train, y_train, X_test, y_test)

Instructions for updating:
Colocations handled automatically by placer.


  


Instructions for updating:
Use tf.cast instead.
test cost:  2.4058232843875884 test accuracy:  0.03909999929368496
test cost:  0.5946149977296591 test accuracy:  0.8252999976277351
test cost:  0.4534797954000533 test accuracy:  0.8631999951601028
test cost:  0.3418855496309698 test accuracy:  0.900799997150898
test cost:  0.3061706231161952 test accuracy:  0.9113999980688096
test cost:  0.2858938118815422 test accuracy:  0.9154999980330467
test cost:  0.30916016990318895 test accuracy:  0.9084999963641167
test cost:  0.23095018286723643 test accuracy:  0.9320999968051911
test cost:  0.22944507162552327 test accuracy:  0.934399998486042
