In [1]:
from __future__ import print_function 
import os, sys
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf
sys.path.append('..')
from tfomics import layers, utils, init, learn, explore
from tfomics import neuralnetwork as nn

# load data

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

X_train = mnist.train.images
X_train = np.reshape(X_train, (X_train.shape[0], 28, 28, 1))
y_train = mnist.train.labels

X_valid = mnist.validation.images
X_valid = np.reshape(X_valid, (X_valid.shape[0], 28, 28, 1))
y_valid = mnist.validation.labels

X_test = mnist.test.images
X_test = np.reshape(X_test, (X_test.shape[0], 28, 28, 1))
y_test = mnist.test.labels

# get shapes
num_data, height, widht, dim = X_train.shape
input_shape=[None, height, widht, dim]
num_labels = y_train.shape[1]   # number of labels (output units)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


# build network

In [3]:
# design a neural network model
inputs = utils.placeholder(shape=input_shape, name='input')
targets = utils.placeholder(shape=(None,num_labels), name='output')
is_training = tf.placeholder(tf.bool, name='is_training')   # variable to specify training or testing
keep_prob = tf.placeholder(tf.float32, name='keep_prob')   # dropout probability

# placeholder dictionary
placeholders = {'inputs': inputs, 
                'targets': targets, 
                'keep_prob': keep_prob, 
                'is_training': is_training}

net = OrderedDict()
net['inputs'] = layers.InputLayer(inputs)
net['conv1'] = layers.Conv2DLayer(net['inputs'], filter_size=(5,5), num_filters=32, padding='SAME')
net['conv1_norm'] = layers.BatchNormLayer(net['conv1'], is_training)
net['conv1_active'] = layers.ActivationLayer(net['conv1_norm'], function='relu')
net['conv1_pool'] = layers.MaxPool2DLayer(net['conv1_active'], pool_size=(2,2))

net['conv2'] = layers.Conv2DLayer(net['conv1_pool'], filter_size=(5,5), num_filters=64, 
                                  strides=(2,2), padding='VALID')
net['conv2_norm'] = layers.BatchNormLayer(net['conv2'], is_training)
net['conv2_active'] = layers.ActivationLayer(net['conv2_norm'], function='relu')
net['conv2_dropout'] = layers.DropoutLayer(net['conv2_active'], keep_prob=keep_prob)

net['dense1'] = layers.DenseLayer(net['conv2_dropout'], num_units=512, W=init.HeNormal(), b=None)
net['dense1_norm'] = layers.BatchNormLayer(net['dense1'], is_training)
net['dense1_active'] = layers.ActivationLayer(net['dense1_norm'], function='relu')
net['dense1_dropout'] = layers.DropoutLayer(net['dense1_active'], keep_prob=keep_prob)

net['dense2'] = layers.DenseLayer(net['dense1_dropout'], num_units=num_labels, 
                                  W=init.HeNormal(), b=init.Constant(0.05))
net['output'] = layers.ActivationLayer(net['dense2'], function='sigmoid')

optimization = {"objective": "categorical",  # (binary, categorical, squared_error)
                "optimizer": "adam",    
                "learning_rate": 0.001, # learning rate
                "clip_value": True,     # clip prediction values (True for classification)
                "l2": .00001            # l-2 weight decay
                }

In [6]:
# build neural network class
nnmodel = nn.NeuralNet(net, placeholders)
nnmodel.inspect_layers()

# compile neural trainer
filename = 'test'
save_path = utils.make_directory('results','MNIST')
filepath = os.path.join(save_path, filename)
nntrainer = nn.NeuralTrainer(nnmodel, placeholders, optimization, save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: inputs
(?, 28, 28, 1)
layer2: conv1
(?, 28, 28, 32)
layer3: conv1_norm
(?, 28, 28, 32)
layer4: conv1_active
(?, 28, 28, 32)
layer5: conv1_pool
(?, 14, 14, 32)
layer6: conv2
(?, 5, 5, 64)
layer7: conv2_norm
(?, 5, 5, 64)
layer8: conv2_active
(?, 5, 5, 64)
layer9: conv2_dropout
(?, 5, 5, 64)
layer10: dense1
(?, 512)
layer11: dense1_norm
(?, 512)
layer12: dense1_active
(?, 512)
layer13: dense1_dropout
(?, 512)
layer14: dense2
(?, 10)
layer15: output
(?, 10)
----------------------------------------------------------------------------


In [7]:
# run session
sess = tf.Session()

# initialize variables
sess.run(tf.initialize_all_variables())

In [8]:
# organize dataset to be just like placeholders list
X = {'inputs': X_train, 'targets': y_train, 'keep_prob': 0.6, 'is_training': True}
X2 = {'inputs': X_valid, 'targets': y_valid, 'keep_prob': 1, 'is_training': False}
data = {'train': X, 'valid': X2}
learn.train_minibatch(sess, nntrainer, data, batch_size=128, num_epochs=500, 
                      patience=10, verbose=1, shuffle=True)

Epoch 1 out of 500 
  valid loss:		1.52001
  valid accuracy:	0.99176+/-0.00211
  valid auc-roc:	0.99907+/-0.00085
  valid auc-pr:		0.99514+/-0.00350
lower cross-validation found
saving model to:  results/MNIST/test_best.ckpt
Epoch 2 out of 500 
  valid loss:		1.50890
  valid accuracy:	0.99410+/-0.00231
  valid auc-roc:	0.99947+/-0.00054
  valid auc-pr:		0.99736+/-0.00187
lower cross-validation found
saving model to:  results/MNIST/test_best.ckpt
Epoch 3 out of 500 
  valid loss:		1.50358
  valid accuracy:	0.99562+/-0.00198
  valid auc-roc:	0.99955+/-0.00046
  valid auc-pr:		0.99783+/-0.00135
lower cross-validation found
saving model to:  results/MNIST/test_best.ckpt
Epoch 4 out of 500 

KeyboardInterrupt: 

In [10]:
# load best model
nnmodel.load_model_parameters(sess, filepath+'_best.ckpt')

test = {'inputs': X_test, 'targets': y_test, 'keep_prob': 1, 'is_training': False}
test_loss = nntrainer.test_model(sess, test, batch_size=512)

loading model from:  results/MNIST/test_best.ckpt
  test  loss:		1.50271
  test  accuracy:	0.99566+/-0.00170
  test  auc-roc:	0.99987+/-0.00008
  test  auc-pr:		0.99897+/-0.00063


# Residual blocks

In [16]:
# design a neural network model
input_vars = utils.placeholder(shape=input_shape, name='input')
target_vars = utils.placeholder(shape=(None,num_labels), name='output')
is_training = tf.placeholder(tf.bool, name='is_training')   # variable to specify training or testing
keep_prob = tf.placeholder(tf.float32, name='keep_prob')   # dropout probability

# placeholder dictionary
placeholders = {'inputs': inputs, 
                'targets': targets, 
                'keep_prob': keep_prob, 
                'is_training': is_training}

net = OrderedDict()
net['inputs'] = layers.InputLayer(inputs)

# 1st convolution layer
net['conv1'] = layers.Conv2DLayer(net['inputs'], filter_size=(5,5), num_filters=32, padding='SAME')
net['conv1_norm'] = layers.BatchNormLayer(net['conv1'], is_training)
net['conv1_active'] = layers.ActivationLayer(net['conv1_norm'], function='relu')

# residual block 1
num_filters = 32
filter_size = (5,5)
last_layer = 'conv1_active'
name = 'conv1_2'
net[name+'_1resid'] = layers.Conv2DLayer(net[last_layer], filter_size=filter_size, num_filters=num_filters, padding='SAME')
net[name+'_1resid_norm'] = layers.BatchNormLayer(net[name+'_1resid'], is_training)
net[name+'_1resid_active'] = layers.ActivationLayer(net[name+'_1resid_norm'], function='relu')
net[name+'_1resid_dropout'] = layers.DropoutLayer(net[name+'_1resid_active'], keep_prob=keep_prob)
net[name+'_2resid'] = layers.Conv2DLayer(net[name+'_1resid_dropout'], filter_size=filter_size, num_filters=num_filters, padding='SAME')
net[name+'_2resid_norm'] = layers.BatchNormLayer(net[name+'_2resid'], is_training)
net[name+'_residual'] = layers.ElementwiseSumLayer([net[last_layer], net[name+'_2resid_norm']])
net[name+'_resid'] = layers.ActivationLayer(net[name+'_residual'], function='relu')

net['conv1_pool'] = layers.MaxPool2DLayer(net['conv1_2_resid'], pool_size=(2,2))
net['conv1_dropout'] = layers.DropoutLayer(net['conv1_pool'], keep_prob=keep_prob)

# 2nd convolution layer
net['conv2'] = layers.Conv2DLayer(net['conv1_dropout'], filter_size=(5,5), num_filters=64, 
                                  strides=(1,1), padding='VALID')
net['conv2_norm'] = layers.BatchNormLayer(net['conv2'], is_training)
net['conv2_active'] = layers.ActivationLayer(net['conv2_norm'], function='relu')

# residual block 2
num_filters = 64
filter_size = (5,5)
last_layer = 'conv2_active'
name = 'conv2_2'
net[name+'_1resid'] = layers.Conv2DLayer(net[last_layer], filter_size=filter_size, num_filters=num_filters, padding='SAME')
net[name+'_1resid_norm'] = layers.BatchNormLayer(net[name+'_1resid'], is_training)
net[name+'_1resid_active'] = layers.ActivationLayer(net[name+'_1resid_norm'], function='relu')
net[name+'_1resid_dropout'] = layers.DropoutLayer(net[name+'_1resid_active'], keep_prob=keep_prob)
net[name+'_2resid'] = layers.Conv2DLayer(net[name+'_1resid_dropout'], filter_size=filter_size, num_filters=num_filters, padding='SAME')
net[name+'_2resid_norm'] = layers.BatchNormLayer(net[name+'_2resid'], is_training)
net[name+'_residual'] = layers.ElementwiseSumLayer([net[last_layer], net[name+'_2resid_norm']])
net[name+'_resid'] = layers.ActivationLayer(net[name+'_residual'], function='relu')

net['conv2_pool'] = layers.MaxPool2DLayer(net['conv2_2_resid'], pool_size=(2,2))
net['conv2_dropout'] = layers.DropoutLayer(net['conv2_pool'], keep_prob=keep_prob)

# dense layer 1
net['dense1'] = layers.DenseLayer(net['conv2_dropout'], num_units=128, 
                                  W=init.HeNormal(), b=init.Constant(0.05))
net['dense1_norm'] = layers.BatchNormLayer(net['dense1'], is_training)
net['dense1_active'] = layers.ActivationLayer(net['dense1_norm'], function='relu')
net['dense1_dropout'] = layers.DropoutLayer(net['dense1_active'], keep_prob=keep_prob)

# dense layer 2
net['dense2'] = layers.DenseLayer(net['dense1_dropout'], num_units=num_labels, 
                                  W=init.HeNormal(), b=init.Constant(0.05))
net['output'] = layers.ActivationLayer(net['dense2'], function='softmax')


optimization = {"objective": "categorical",  # (binary, categorical, squared_error)
                "optimizer": "adam",    
                "learning_rate": 0.001, # learning rate
                "clip_value": True,     # clip prediction values (True for classification)
                "l2": .00001            # l-2 weight decay
                }


In [19]:
# build neural network class
nnmodel = nn.NeuralNet(net, placeholders)
nnmodel.inspect_layers()

# compile neural trainer
filename = 'test_resnet'
save_path = utils.make_directory('results','MNIST')
filepath = os.path.join(save_path, filename)
nntrainer = nn.NeuralTrainer(nnmodel, placeholders, optimization, 
                             save='best', filepath=filepath)

----------------------------------------------------------------------------
Network architecture:
----------------------------------------------------------------------------
layer1: inputs
(?, 28, 28, 1)
layer2: conv1
(?, 28, 28, 32)
layer3: conv1_norm
(?, 28, 28, 32)
layer4: conv1_active
(?, 28, 28, 32)
layer5: conv1_2_1resid
(?, 28, 28, 32)
layer6: conv1_2_1resid_norm
(?, 28, 28, 32)
layer7: conv1_2_1resid_active
(?, 28, 28, 32)
layer8: conv1_2_1resid_dropout
(?, 28, 28, 32)
layer9: conv1_2_2resid
(?, 28, 28, 32)
layer10: conv1_2_2resid_norm
(?, 28, 28, 32)
layer11: conv1_2_residual
(?, 28, 28, 32)
layer12: conv1_2_resid
(?, 28, 28, 32)
layer13: conv1_pool
(?, 14, 14, 32)
layer14: conv1_dropout
(?, 14, 14, 32)
layer15: conv2
(?, 10, 10, 64)
layer16: conv2_norm
(?, 10, 10, 64)
layer17: conv2_active
(?, 10, 10, 64)
layer18: conv2_2_1resid
(?, 10, 10, 64)
layer19: conv2_2_1resid_norm
(?, 10, 10, 64)
layer20: conv2_2_1resid_active
(?, 10, 10, 64)
layer21: conv2_2_1resid_dropout
(?, 10,

In [20]:
# run session
sess = tf.Session()

# initialize variables
sess.run(tf.initialize_all_variables())

# organize dataset to be just like placeholders list
X = {'inputs': X_train, 'targets': y_train, 'keep_prob': 0.6, 'is_training': True}
X2 = {'inputs': X_valid, 'targets': y_valid, 'keep_prob': 1, 'is_training': False}
data = {'train': X, 'valid': X2}
learn.train_minibatch(sess, nntrainer, data, batch_size=128, num_epochs=500, 
                      patience=10, verbose=1, shuffle=True)

Epoch 1 out of 500 
  valid loss:		2.30239
  valid accuracy:	0.90000+/-0.00752
  valid auc-roc:	0.99799+/-0.00049
  valid auc-pr:		0.93699+/-0.00836
lower cross-validation found
saving model to:  results/MNIST/test_resnet_best.ckpt
Epoch 2 out of 500 
  valid loss:		2.30194
  valid accuracy:	0.90000+/-0.00752
  valid auc-roc:	0.99789+/-0.00060
  valid auc-pr:		0.93678+/-0.00889
lower cross-validation found
saving model to:  results/MNIST/test_resnet_best.ckpt
Epoch 3 out of 500 

KeyboardInterrupt: 

In [None]:
# load best model
nnmodel.load_model_parameters(sess, filepath+'_best.ckpt')

test = {'inputs': X_test, 'targets': y_test, 'keep_prob': 1, 'is_training': False}
test_loss = nntrainer.test_model(sess, test, batch_size=512)