In [1]:
# theano imports
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from theano.tensor.nnet.conv import conv2d
# from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.pool import pool_2d as max_pool_2d
from theano.tensor.nnet import batch_normalization

# other imports
from sklearn.cross_validation import train_test_split
import numpy as np
import pandas as pd
import pickle

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
srng = RandomStreams()

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def glorot_init_weights(shape):
    (h, w) = shape
    # 0.25 for sigmoid, 0.1 for softmax, 1.0 for tanh/relu
    normalizer = 2.0 * (6**0.5) / ((h + w)**0.5) * 1.0  #factors: 0.1 correct for uni[0,1], glo, glo, softmax deriv
    return theano.shared(floatX((np.random.random_sample(shape) - 0.5) * normalizer))

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def activate(X):
    return T.nnet.relu(X)

def rectify(X):
#     return T.maximum(X, 0.)
    return T.maximum(X, 0.01*X)  #leaky rectifier

def ELU(X, alpha=0.1):
    return T.switch(X > 0, X, alpha * (T.exp(X) - 1))
    
def softmax(X):
    e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 1, 'x', 'x'))
    return e_x / e_x.sum(axis=1).dimshuffle(0, 1, 'x', 'x')

def dropout(X, p=0.0):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    
    return updates

# data, input weights, gamma input, beta input, hidden weights, hidden bias, gamma hidden, beta hidden, 
# output weights, output bias, gamma output, beta output, p_drop, p_hidden_drop
def model(X, wi, gi, bbi, wh, bh, gh, bbh, wo, bo, go, bbo, p_drop_conv, p_drop_hidden):

    # --------------------------------------------
    
    layer_1 = conv2d(X, wi, border_mode='valid')
    layer_1 = layer_1.reshape((-1, 256))
    layer_1 = batch_normalization(layer_1, gamma=gi, beta=bbi, 
                                 mean=X.mean((0, ), keepdims=True), 
                                  std = T.ones_like(X.var((0,), keepdims=True)), 
                                  mode='high_mem')
    layer_1 = dropout(layer_1, p_drop_conv)

    # --------------------------------------------
    
    layer_2 = T.dot(layer_1, wh) + bh
    layer_2 = batch_normalization(layer_2, gamma=gh, beta=bbh, 
                                 mean=X.mean((0, ), keepdims=True), 
                                  std = T.ones_like(layer_2.var((0,), keepdims=True)), 
                                  mode='high_mem')
    
    layer_2 = rectify(layer_2)
    layer_2 = dropout(layer_2, p_drop_hidden)
    
    # --------------------------------------------
    
    layer_3 = T.dot(layer_2, wo) + bo
    layer_3 = batch_normalization(layer_3, gamma=go, beta=bbo, 
                                 mean=X.mean((0, ), keepdims=True), 
                                  std = T.ones_like(layer_3.var((0,), keepdims=True)), 
                                  mode='high_mem')
    
    layer_3 = dropout(layer_3, p_drop_hidden)
    
    # --------------------------------------------
    
#     pyx = softmax(T.dot(layer_2, wo))
    pyx = T.nnet.softmax(layer_3)
    return layer_1, layer_2, layer_3, pyx

In [3]:
# define mini-batch size
mbs = 128

# define number of desired features out of convolution
n_conv = 256

# define hidden layer depth
h_depth = 600

# define output layer size
o_depth = 6

# ------------------------------------------------------------------------------

# initialize weight matrices: wi, gi, bbi, wh, bh, gh, bbh, wo, bo, go, bbo

# input parameters
wi = np.random.rand((n_conv, 1, 13, 21))
gi = floatX(np.ones(n_conv)))
bbi = theano.shared(floatX(np.zeros(n_conv)))

# hidden parameters
wh = glorot_init_weights((n_conv, h_depth))
bh = theano.shared(floatX(np.zeros(h_depth)))
gh = theano.shared(floatX(np.ones(h_depth)))
bbh = theano.shared(floatX(np.zeros(h_depth)))

# output parameters
wo = glorot_init_weights((h_depth, o_depth))
bo = theano.shared(floatX(np.zeros(o_depth)))
go = theano.shared(floatX(np.ones(o_depth)))
bbo = theano.shared(floatX(np.zeros(o_depth)))

# ------------------------------------------------------------------------------

<TensorType(float32, 4D)>