In [1]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import time
import Image

import theano
import theano.tensor as T

import lasagne

from collections import OrderedDict
from lasagne import utils

In [2]:
def get_data():
    n, d = 500000, 3600
    train_images = np.zeros((n, d), dtype=np.uint8)
    train_labels = np.zeros(n, dtype=np.uint8)
    for i in range(n):
        image_open = Image.open("clMNIST/example" + str(i) + ".png")
        a = np.array(image_open.getdata())
        train_images[i] = a 
        train_labels[i] = np.uint(np.loadtxt("clMNIST/y" + str(i)))
        del(image_open)
        del(a)
    return np.reshape(train_images, (-1, 1, 60, 60)), np.ravel(train_labels)

In [3]:
%%time
train_input, train_target = get_data()

CPU times: user 5min 48s, sys: 31.5 s, total: 6min 20s
Wall time: 6min 46s


In [5]:
data_input, data_target = train_input, train_target

In [6]:
test_input, test_target = data_input[400000:], data_target[400000:]

In [7]:
val_input, val_target = data_input[300000:400000], data_target[300000:400000]

In [8]:
train_input, train_target = data_input[:300000], data_target[:300000]

In [13]:
def make_seed(n):
    np.random.seed(n)
    lasagne.random.set_rng(np.random)

In [14]:
def adam_update(loss_or_grads, params, learning_rate=1e-3, beta1=0.9,
                        beta2=0.999, epsilon=1e-8):
    all_grads = lasagne.updates.get_or_compute_grads(loss_or_grads, params)
    t_prev = theano.shared(utils.floatX(0.))
    updates = OrderedDict()

    t = t_prev + 1
    a_t = learning_rate * T.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)

    for param, g_t in zip(params, all_grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)
        v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)

        m_t = beta1 * m_prev + (1 - beta1) * g_t
        v_t = beta2 * v_prev + (1 - beta2) * g_t ** 2
        step = a_t * m_t / (T.sqrt(v_t) + epsilon)

        updates[m_prev] = m_t
        updates[v_prev] = v_t
        updates[param] = param - step

    updates[t_prev] = t
    return updates


def adam_update2(loss_or_grads, params, learning_rate=1e-3, beta1=0.9,
                 beta2=0.999, epsilon=1e-6):
    all_grads = lasagne.updates.get_or_compute_grads(loss_or_grads, params)


    t_prev = theano.shared(utils.floatX(0.))
    updates = OrderedDict()

    t = t_prev + 1
    a_t = learning_rate * T.sqrt(1 - beta2 ** t) / (1 - beta1 ** t)

    for param, g_t in zip(params, all_grads):
        value = param.get_value(borrow=True)
        m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)
        v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
                               broadcastable=param.broadcastable)

        m_t = beta1 * m_prev + (1 - beta1) * g_t
        v_t = beta2 * v_prev + (1 - beta2) * g_t ** 2
        step = a_t * m_t / (T.sqrt(v_t + epsilon))

        updates[m_prev] = m_t
        updates[v_prev] = v_t
        updates[param] = param - step

    updates[t_prev] = t
    return updates

In [None]:
def build_mlp(input_var=None, BN=False):
    l_in = lasagne.layers.InputLayer(shape=(None, 1, sz, sz),
                                     input_var=input_var)
    if BN:
        l_in = lasagne.layers.batch_norm(l_in)

    l_hid1 = lasagne.layers.DenseLayer(
            l_in, num_units=100,
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.GlorotUniform())
    if BN:
        l_hid1 = lasagne.layers.batch_norm(l_hid1)

    l_hid2 = lasagne.layers.DenseLayer(
            l_hid1, num_units=100,
            nonlinearity=lasagne.nonlinearities.rectify)
    if BN:
        l_hid2 = lasagne.layers.batch_norm(l_hid2)
        
    l_hid3 = lasagne.layers.DenseLayer(
            l_hid2, num_units=100,
            nonlinearity=lasagne.nonlinearities.rectify)
    if BN:
        l_hid3 = lasagne.layers.batch_norm(l_hid3)

    l_out = lasagne.layers.DenseLayer(
            l_hid3, num_units=10,
            nonlinearity=lasagne.nonlinearities.softmax)
    return l_out