In [1]:
# -*- coding: utf-8 -*-

""" Deep Neural Network for MNIST dataset classification task.
References:
    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
    learning applied to document recognition." Proceedings of the IEEE,
    86(11):2278-2324, November 1998.
Links:
    [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
"""
from __future__ import division, print_function, absolute_import

import tflearn
import tensorflow as tf
from AMSGrad import *

# Data loading and preprocessing
import tflearn.datasets.mnist as mnist

X, Y, testX, testY = mnist.load_data(one_hot=True)
N_EPOCHS = 15

def get_layers():
    # Building deep neural network
    input_layer = tflearn.input_data(shape=[None, 784])
    dense1 = tflearn.fully_connected(input_layer, 64, activation='tanh',
                                     regularizer='L2')
    dense2 = tflearn.fully_connected(dense1, 64, activation='tanh',
                                     regularizer='L2')
    dense3 = tflearn.fully_connected(dense2, 64, activation='tanh',
                                     regularizer='L2')
    softmax = tflearn.fully_connected(dense3, 10, activation='softmax')
    
    return softmax

  from ._conv import register_converters as _register_converters


Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz


In [2]:
# SGD
tf.reset_default_graph()
sgd = tflearn.SGD(learning_rate=0.01)
net = tflearn.regression(get_layers(), optimizer=sgd,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="sgd")

Training Step: 12899  | total loss: [1m[32m0.53668[0m[0m | time: 2.686s
| SGD | epoch: 015 | loss: 0.53668 - acc: 0.8689 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.51576[0m[0m | time: 3.756s
| SGD | epoch: 015 | loss: 0.51576 - acc: 0.8774 | val_loss: 0.40393 - val_acc: 0.8898 -- iter: 55000/55000
--


In [3]:
# Momentum
tf.reset_default_graph()
momentum = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
net = tflearn.regression(get_layers(), optimizer=momentum,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="momentum")

Training Step: 12899  | total loss: [1m[32m0.06313[0m[0m | time: 2.295s
| Optimizer | epoch: 015 | loss: 0.06313 - acc: 0.9770 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.05888[0m[0m | time: 3.366s
| Optimizer | epoch: 015 | loss: 0.05888 - acc: 0.9793 | val_loss: 0.09902 - val_acc: 0.9704 -- iter: 55000/55000
--


In [4]:
# NAG
tf.reset_default_graph()
nag = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9, use_nesterov=True)
net = tflearn.regression(get_layers(), optimizer=nag,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="nag")

Training Step: 12899  | total loss: [1m[32m0.27147[0m[0m | time: 2.262s
| Optimizer | epoch: 015 | loss: 0.27147 - acc: 0.9695 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.24626[0m[0m | time: 3.332s
| Optimizer | epoch: 015 | loss: 0.24626 - acc: 0.9725 | val_loss: 0.09437 - val_acc: 0.9721 -- iter: 55000/55000
--


In [5]:
# Adagrad
tf.reset_default_graph()
adagrad = tflearn.AdaGrad(learning_rate=0.01)
net = tflearn.regression(get_layers(), optimizer=adagrad,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="adagrad")

Training Step: 12899  | total loss: [1m[32m0.30148[0m[0m | time: 2.286s
| AdaGrad | epoch: 015 | loss: 0.30148 - acc: 0.9457 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.28427[0m[0m | time: 3.358s
| AdaGrad | epoch: 015 | loss: 0.28427 - acc: 0.9464 | val_loss: 0.18653 - val_acc: 0.9512 -- iter: 55000/55000
--


In [6]:
# Adadelta
tf.reset_default_graph()
adadelta = tflearn.AdaDelta(learning_rate=1)
net = tflearn.regression(get_layers(), optimizer=adadelta,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="adadelta")

Training Step: 12899  | total loss: [1m[32m0.30006[0m[0m | time: 2.543s
| AdaDelta | epoch: 015 | loss: 0.30006 - acc: 0.9340 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.28531[0m[0m | time: 3.613s
| AdaDelta | epoch: 015 | loss: 0.28531 - acc: 0.9344 | val_loss: 0.15131 - val_acc: 0.9547 -- iter: 55000/55000
--


In [7]:
# Adam
tf.reset_default_graph()
adam = tflearn.Adam(learning_rate=0.01)
net = tflearn.regression(get_layers(), optimizer=adam,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="adam")

Training Step: 12899  | total loss: [1m[32m0.23974[0m[0m | time: 2.408s
| Adam | epoch: 015 | loss: 0.23974 - acc: 0.9285 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.22898[0m[0m | time: 3.483s
| Adam | epoch: 015 | loss: 0.22898 - acc: 0.9325 | val_loss: 0.28020 - val_acc: 0.9183 -- iter: 55000/55000
--


In [8]:
# Nadam
tf.reset_default_graph()
nadam = tf.contrib.opt.NadamOptimizer(learning_rate=0.01)
net = tflearn.regression(get_layers(), optimizer=nadam,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="nadam")

Training Step: 12899  | total loss: [1m[32m0.22663[0m[0m | time: 5.359s
| Optimizer | epoch: 015 | loss: 0.22663 - acc: 0.9320 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.22601[0m[0m | time: 6.432s
| Optimizer | epoch: 015 | loss: 0.22601 - acc: 0.9310 | val_loss: 0.23752 - val_acc: 0.9321 -- iter: 55000/55000
--


In [9]:
# AMSGrad
tf.reset_default_graph()
amsgrad = AMSGrad(learning_rate=0.01)
net = tflearn.regression(get_layers(), optimizer=amsgrad,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=N_EPOCHS, validation_set=(testX, testY),
          show_metric=True, run_id="amsgrad")

Training Step: 12899  | total loss: [1m[32m0.35223[0m[0m | time: 2.857s
| Optimizer | epoch: 015 | loss: 0.35223 - acc: 0.9201 -- iter: 54976/55000
Training Step: 12900  | total loss: [1m[32m0.33797[0m[0m | time: 3.934s
| Optimizer | epoch: 015 | loss: 0.33797 - acc: 0.9219 | val_loss: 0.17924 - val_acc: 0.9467 -- iter: 55000/55000
--
