In [2]:
%matplotlib inline
import os
os.environ['DATA_PATH'] = "/root/work/data"
import matplotlib.pyplot as plt
from invoke import task
from collections import OrderedDict
import numpy as np
from model import build_dense, build_conv, build_dense_resid, build_ciresan
import theano
import theano.tensor as T
from lasagne import layers, objectives, updates
from lasagnekit.datasets.mnist import MNIST
from lasagnekit.datasets.infinite_image_dataset import Transform
from helpers import iterate_minibatches, flip, rotate_scale, rotate_scale_one, elastic_transform, elastic_transform_one
from tabulate import tabulate
from time import time

In [88]:
nb_epochs = 100
c = 1
w = 28
h = 28
learning_rate = theano.shared(np.array(0.001).astype(np.float32))
momentum = 0
batchsize = 128

X = T.tensor4()
y = T.ivector()

net = build_ciresan(
    w=w, h=w, c=c, 
    nb_outputs=10)
#net = build_dense(
#    w=w, h=w, c=c, 
#    nb_hidden=500, 
#    nb_outputs=10, 
#    nb_blocks=4, layer_per_block=2)

#net = build_conv(
#	w=w, h=h, c=c,
#	nb_filters=16,
#	filter_size=5,
#	nb_outputs=10,
#	nb_blocks=2,
#	layer_per_block=3,
#	pool=True
#)

print('Compiling the net...')

y_pred = layers.get_output(net, X)
y_pred_detm = layers.get_output(net, X, deterministic=True)
#predict_fn = theano.function([X], y_pred)

loss = objectives.categorical_crossentropy(y_pred, y).mean()

loss_detm = objectives.categorical_crossentropy(y_pred, y).mean()
y_acc_detm = T.eq(y_pred_detm.argmax(axis=1), y).mean()

loss_fn = theano.function([X, y], loss_detm)
acc_fn = theano.function([X, y], y_acc_detm)

params = layers.get_all_params(net, trainable=True)
grad_updates = updates.momentum(loss, params, learning_rate=learning_rate, momentum=momentum)
train_fn = theano.function([X, y], loss, updates=grad_updates)




Compiling the net...


In [94]:
print('Loading data...')

def preprocess(data):
    return data.reshape((data.shape[0], c, w, h))

train = MNIST(which='train')
train.load()
train.X = preprocess(train.X)
train.X = train.X[0:128]
train.y = train.y[0:128]

test = MNIST(which='test')
test.load()
test.X = preprocess(test.X)

Loading data...


In [95]:
history = []

In [None]:
lr = 0.001
learning_rate.set_value(np.array(lr).astype(np.float32))

print('Training...')

for epoch in range(1, nb_epochs + 1):
    data_aug_time = []
    train_time = []
    for train_X, train_y in iterate_minibatches(train.X, train.y, batchsize):
        t = time()
        train_X = rotate_scale(
            train_X[:, 0],
            min_angle=-15, max_angle=15,
            min_scale=0.85, max_scale=1.15)
        train_X = elastic_transform(
            train_X, min_alpha=36, max_alpha=38, min_sigma=5, max_sigma=6)
        data_aug_time.append(time() - t)
        t = time()
        train_X = train_X[:, None, :, :]
        train_fn(train_X, train_y)
        train_time.append(time() - t)
    stats = OrderedDict()
    stats['train_loss'] = loss_fn(train.X, train.y)
    stats['test_loss'] = loss_fn(test.X, test.y)
    stats['train_acc'] = acc_fn(train.X, train.y)
    stats['test_acc'] = acc_fn(test.X, test.y)
    stats['data_aug_time'] = np.sum(data_aug_time)
    stats['train_time'] = np.sum(train_time)
    stats['epoch'] = epoch
    
    history.append(stats)
    print(tabulate([stats], headers="keys"))
    
    lr = learning_rate.get_value()
    #lr *= 0.99
    learning_rate.set_value(np.array(lr).astype(np.float32))

Training...


In [None]:
train_acc = [h['train_acc'] for h in history]
test_acc = [h['test_acc'] for h in history]
plt.plot(train_acc)
plt.plot(test_acc)