In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import time
import Image

import theano
import theano.tensor as T

import lasagne

from collections import OrderedDict
from lasagne import utils

import subprocess

In [2]:
def make_seed(n):
    np.random.seed(n)
    lasagne.random.set_rng(np.random)

In [3]:
def adam_update(loss_or_grads, params, learning_rate=1e-3, beta1=0.9,
                        beta2=0.999, epsilon=1e-4):
    all_grads = lasagne.updates.get_or_compute_grads(loss_or_grads, params)
    t_prev = theano.shared(utils.floatX(0.))
    updates = OrderedDict()

    t = t_prev + 1
    a_t = learning_rate * T.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)

    for param, g_t in zip(params, all_grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)
        v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)

        m_t = beta1 * m_prev + (1 - beta1) * g_t
        v_t = beta2 * v_prev + (1 - beta2) * g_t ** 2
        step = a_t * m_t / (T.sqrt(v_t) + epsilon)

        updates[m_prev] = m_t
        updates[v_prev] = v_t
        updates[param] = param - step

    updates[t_prev] = t
    return updates

In [4]:
def build_mlp(input_var=None, BN=False):
    l_in = lasagne.layers.InputLayer(shape=(None, 1, sz, sz),
                                     input_var=input_var)
    if BN:
        l_in = lasagne.layers.batch_norm(l_in)

    l_hid1 = lasagne.layers.DenseLayer(
            l_in, num_units=100,
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())
    if BN:
        l_hid1 = lasagne.layers.batch_norm(l_hid1)

    l_hid2 = lasagne.layers.DenseLayer(
            l_hid1, num_units=100,
            nonlinearity=lasagne.nonlinearities.rectify)
    if BN:
        l_hid2 = lasagne.layers.batch_norm(l_hid2)
        
    l_hid3 = lasagne.layers.DenseLayer(
            l_hid2, num_units=100,
            nonlinearity=lasagne.nonlinearities.rectify)
    if BN:
        l_hid3 = lasagne.layers.batch_norm(l_hid3)

    l_out = lasagne.layers.DenseLayer(
            l_hid3, num_units=10,
            nonlinearity=lasagne.nonlinearities.softmax)
    return l_out

In [5]:
def get_data():
    subprocess.call(['th', '-l', 'my_example'])
    n, d = 10000, 3600
    train_images = np.zeros((n, d), dtype=np.uint8)
    train_labels = np.zeros(n, dtype=np.uint8)
    for i in range(n):
        image_open = Image.open("clMNIST/example" + str(i) + ".png")
        a = np.array(image_open.getdata())
        train_images[i] = a 
        train_labels[i] = np.uint(np.loadtxt("clMNIST/y" + str(i)))
    return np.reshape(train_images, (-1, 1, 60, 60)), np.ravel(train_labels)

In [6]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]

In [7]:
sz = 60

In [8]:
def run_method(method, model='mlp', BN=False, num_epochs=50, alpha=0.1, mu=0.9, beta1=0.9, beta2=0.999, echo=False, 
               batch_size=100):
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    if echo:
        print("Building model and compiling functions...")
    if model == 'mlp':
        network = build_mlp(input_var, BN)
    else:
        print("Unrecognized model type %r." % model)
        return

    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), target_var),
                 dtype=theano.config.floatX)

    params = lasagne.layers.get_all_params(network, trainable=True)

    if method == lasagne.updates.sgd:
        updates = method(loss, params, learning_rate=alpha)
    elif method == lasagne.updates.momentum:
        updates = method(loss, params, learning_rate=alpha, momentum=mu)
    elif method == lasagne.updates.adam:
        updates = method(loss, params, learning_rate=alpha, beta1=beta1)
    elif method == adam_update or method == adam_update2:
        updates = method(loss, params, learning_rate=alpha, beta1=beta1, beta2=beta2)
    else:
        updates = method(loss, params, learning_rate=alpha)


    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    test_loss = test_loss.mean()
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                      dtype=theano.config.floatX)

    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    train_fn_acc = theano.function([input_var, target_var], train_acc)
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])

    if echo:
        print("Starting training...")

    res = dict()
    arr_train_err = []
    arr_val_err = []
    arr_train_acc = []
    arr_val_acc = []

    for epoch in range(num_epochs):
        train_err = 0
        train_batches = 0
        train_acc = 0
        start_time = time.time()
        
        train_inputs, train_targets = get_data()
        
        for batch in iterate_minibatches(train_inputs, train_targets, batch_size, shuffle=True):
            inputs, targets = batch
            err = train_fn(inputs, targets)
            acc = train_fn_acc(inputs, targets)
            train_err += err
            train_acc += acc
            train_batches += 1

        arr_train_err.append(train_err / train_batches)
        arr_train_acc.append(train_acc / train_batches * 100)
        
        val_inputs, val_targets = get_data()

        val_err = 0
        val_acc = 0
        val_batches = 0
        for batch in iterate_minibatches(val_inputs, val_targets, batch_size, shuffle=False):
            inputs, targets = batch
            err, acc = val_fn(inputs, targets)
            val_err += err
            val_acc += acc
            val_batches += 1

        arr_val_err.append(val_err / val_batches)
        arr_val_acc.append(val_acc / val_batches * 100)

        if echo:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs, time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
#             print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
#             print("  validation accuracy:\t\t{:.2f} %".format(
#                 val_acc / val_batches * 100))

    test_err = 0
    test_acc = 0
    test_batches = 0
    
    test_inputs, test_targets = get_data()
        
    for batch in iterate_minibatches(test_inputs, test_targets, batch_size, shuffle=False):
        inputs, targets = batch
        err, acc = val_fn(inputs, targets)
        test_err += err
        test_acc += acc
        test_batches += 1

    if echo:
        print("Final results:")
        print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
        print("  test accuracy:\t\t{:.2f} %".format(
            test_acc / test_batches * 100))

    res['train_err'] = np.array(arr_train_err)
    res['val_err'] = np.array(arr_val_err)
    res['train_acc'] = np.array(arr_train_acc)
    res['val_acc'] = np.array(arr_val_acc)
    res['test_err'] = test_err / test_batches
    res['test_acc'] = test_acc / test_batches * 100

    return res

#### alpha = 1e-4

In [17]:
%%time
make_seed(1)
adam1 = run_method(adam_update, num_epochs=50, alpha=1e-4)

CPU times: user 21min 31s, sys: 9min 22s, total: 30min 53s
Wall time: 30min 31s


#### alpha = 5e-4

In [18]:
%%time
make_seed(1)
adam2 = run_method(adam_update, num_epochs=50, alpha=5e-4)

CPU times: user 18min 15s, sys: 8min 33s, total: 26min 49s
Wall time: 24min 30s


#### alpha = 1e-3

In [19]:
%%time
make_seed(1)
adam3 = run_method(adam_update, num_epochs=50, alpha=1e-3)

CPU times: user 17min 27s, sys: 8min 6s, total: 25min 34s
Wall time: 22min 33s


#### alpha = 5e-3

In [20]:
%%time
make_seed(1)
adam4 = run_method(adam_update, num_epochs=50, alpha=5e-3)

CPU times: user 17min 22s, sys: 8min 13s, total: 25min 36s
Wall time: 22min 33s


#### alpha = 1e-2

In [21]:
%%time
make_seed(1)
adam5 = run_method(adam_update, num_epochs=50, alpha=1e-2)

CPU times: user 17min 52s, sys: 8min 14s, total: 26min 7s
Wall time: 23min


#### alpha = 1e-1

In [22]:
%%time
make_seed(1)
adam6 = run_method(adam_update, num_epochs=50, alpha=1e-1)

CPU times: user 17min 40s, sys: 8min 7s, total: 25min 47s
Wall time: 22min 46s


In [23]:
import pickle
with open('lrate_adam', 'wb') as f:
    pickle.dump(adam1, f)
    pickle.dump(adam2, f)
    pickle.dump(adam3, f)
    pickle.dump(adam4, f)
    pickle.dump(adam5, f)
    pickle.dump(adam6, f)

In [24]:
%%time
make_seed(1)
bn_adam = run_method(adam_update, num_epochs=50, alpha=1e-3, BN=True)

CPU times: user 20min 52s, sys: 15min 59s, total: 36min 52s
Wall time: 25min 10s


In [25]:
import pickle
with open('bn_adam', 'wb') as f:
    pickle.dump(bn_adam, f)

In [45]:
plt.plot(adam3['train_acc'], 'b--')
plt.plot(adam3['val_acc'], 'b')
plt.plot(bn_adam['train_acc'], 'r--')
plt.plot(bn_adam['val_acc'], 'r')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.xlim(xmax=49)
plt.legend(['Adam 1e-3 train', 'Adam 1e-3 validation', 'BN Adam 1e-3 train', 'BN Adam 1e-3 validation'], loc=0, fontsize=12)
plt.title('Adam comparison')
plt.show()

In [43]:
plt.plot(adam1['val_acc'], 'b')
plt.plot(adam2['val_acc'], 'g')
plt.plot(adam3['val_acc'], 'r')
plt.plot(adam4['val_acc'], 'k')
plt.plot(adam5['val_acc'], 'y')
plt.plot(adam6['val_acc'], 'c')
plt.xlim(xmax=49)
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.title('Adam validation accuracy, different alpha')
plt.legend(['1e-4', '5e-4', '1e-3', '5e-3', '1e-2', '1e-1'], loc=0, fontsize=12)
plt.show()