# Tutorial Keras



In [1]:
import numpy as np
import tensorflow as tf
import tensorlayer as tl
import time
from keras import backend as K
from keras.layers import *
from tensorlayer.layers import *

Using TensorFlow backend.


Note: Keras could use Tensorflow or theano as the backend, but the default setting is tensorflow.

Load Data

In [3]:
X_train, y_train, X_val, y_val, X_test, y_test =tl.files.load_mnist_dataset(shape=(-1, 784))

Load or Download MNIST > data/mnist/
Downloading train-images-idx3-ubyte.gz...100%
Succesfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
data/mnist/train-images-idx3-ubyte.gz
Downloading train-labels-idx1-ubyte.gz...113%
Succesfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Downloading t10k-images-idx3-ubyte.gz...100%
Succesfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
data/mnist/t10k-images-idx3-ubyte.gz
Downloading t10k-labels-idx1-ubyte.gz...180%
Succesfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.


In [4]:
sess = tf.InteractiveSession()

In [5]:
batch_size = 128
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.int64, shape=[None,])

In [6]:
def keras_block(x):
    x = Dropout(0.8)(x)
    x = Dense(800, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(800, activation='relu')(x)
    x = Dropout(0.5)(x)
    logits = Dense(10, activation='linear')(x)
    return logits

In [7]:
network = InputLayer(x, name='input')
network = KerasLayer(network, keras_layer=keras_block, name='keras')

  [TL] InputLayer  input: (?, 784)
  [TL] KerasLayer keras: <function keras_block at 0x0000014CE9E96620>
       This API will be removed, please use LambdaLayer instead.


In [8]:
y = network.outputs

In [9]:
y

<tf.Tensor 'keras/dense_3/BiasAdd:0' shape=(?, 10) dtype=float32>

In [12]:
network.print_params(False)

  param   0: (784, 800)         keras/dense_1/kernel:0
  param   1: (800,)             keras/dense_1/bias:0
  param   2: (800, 800)         keras/dense_2/kernel:0
  param   3: (800,)             keras/dense_2/bias:0
  param   4: (800, 10)          keras/dense_3/kernel:0
  param   5: (10,)              keras/dense_3/bias:0
  num of params: 1276810


In [13]:
network.print_layers()

  layer 0: Tensor("keras/dense_3/BiasAdd:0", shape=(?, 10), dtype=float32)


In [14]:
cost = tl.cost.cross_entropy(y, y_, 'cost')
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [15]:
n_epoch = 200
learning_rate = 0.0001

In [16]:
train_params = network.all_params

In [17]:
train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
    epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)

In [18]:
tl.layers.initialize_global_variables(sess)

We began to train:

In [None]:
for epoch in range(n_epoch):
    start_time = time.time()
    ## Training
    for X_train_a, y_train_a in tl.iterate.minibatches(
                                X_train, y_train, batch_size, shuffle=True):
        _, _ = sess.run([cost, train_op], feed_dict={x: X_train_a, y_: y_train_a,
                                K.learning_phase(): 1})

    print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
    ## Evaluation
    train_loss, train_acc, n_batch = 0, 0, 0
    for X_train_a, y_train_a in tl.iterate.minibatches(
                            X_train, y_train, batch_size, shuffle=False):
        err, ac = sess.run([cost, acc], feed_dict={x: X_train_a, y_: y_train_a,
                                K.learning_phase(): 0})
        train_loss += err; train_acc += ac; n_batch += 1
    print("   train loss: %f" % (train_loss/ n_batch))
    print("   train acc: %f" % (train_acc/ n_batch))
    val_loss, val_acc, n_batch = 0, 0, 0
    for X_val_a, y_val_a in tl.iterate.minibatches(
                                X_val, y_val, batch_size, shuffle=False):
        err, ac = sess.run([cost, acc], feed_dict={x: X_val_a, y_: y_val_a,
                                K.learning_phase(): 0})
        val_loss += err; val_acc += ac; n_batch += 1
    print("   val loss: %f" % (val_loss/ n_batch))
    print("   val acc: %f" % (val_acc/ n_batch))

Epoch 1 of 200 took 12.526583s
   train loss: 0.815532
   train acc: 0.822456
   val loss: 0.776519
   val acc: 0.843450
Epoch 2 of 200 took 12.771707s
   train loss: 0.591552
   train acc: 0.855929
   val loss: 0.551409
   val acc: 0.871695
Epoch 3 of 200 took 11.570901s
   train loss: 0.524391
   train acc: 0.874359
   val loss: 0.485615
   val acc: 0.887420
Epoch 4 of 200 took 11.518147s
   train loss: 0.484785
   train acc: 0.877344
   val loss: 0.447274
   val acc: 0.888922
Epoch 5 of 200 took 11.721315s
   train loss: 0.453363
   train acc: 0.891707
   val loss: 0.417462
   val acc: 0.901242
Epoch 6 of 200 took 12.269288s
   train loss: 0.426012
   train acc: 0.895272
   val loss: 0.390113
   val acc: 0.907352
Epoch 7 of 200 took 11.807560s
   train loss: 0.403468
   train acc: 0.897256
   val loss: 0.369989
   val acc: 0.910156
Epoch 8 of 200 took 11.738737s
   train loss: 0.386020
   train acc: 0.902544
   val loss: 0.354183
   val acc: 0.913562
Epoch 9 of 200 took 12.013738s
 

# Credit

Credit goes to [TensorLayer Example](https://github.com/zsdonghao/tensorlayer/tree/master/example) for the majority of this code. I've merely create a jupyter notebook to make it more readable.