In [2]:
import theano
import theano.tensor as T
import numpy as np

In [3]:
class layer:
    def __init__(self, activation_func, W_init, b_init):
        '''
        output = activation(WX+b)
        X is the data matrix (each column is one example)
        W takes 'n_in' inputs from the previous layer (the number of previous layers nodes)
        and has 'n_out' outputs (the number of the next layers nodes)
        
        parameters:
        W_init: initial weight values
            np.ndarray
        b_init: initial bias value
            np.ndarray
        activation_func: activation function for output of layer
            theano.tensor.elemwise.Elemwise
        '''
        n_out, n_in = W_init.shape
        self.W = theano.shared(value=W_init.astype(theano.config.floatX), borrow=True)
        #borrow=True as we allow Theano to use memory for this object (make faster)
        self.b = theano.shared(value=b_init.reshape(n_out, 1).astype(theano.config.floatX), 
                               borrow=True, 
                               broadcastable=(False, True))
        self.activation_func = activation_func
        self.params = [self.W, self.b]
        
    def output(self, X):
        '''
        Gives the output: activation(WX+b)
        '''
        pre_activation = T.dot(self.W, X) + self.b
        return(self.activation_func(pre_activation))

In [4]:
class mlp:
    def __init__(self, n_layer, output_activation_func, hidden_activation_func, W_init, b_init):
        '''
        output of sequential layers
        
        parameters:
        n_layer: number of layers
            np.ndarray
        output_activation_func: activation function of the output layer
        '''
        self.layers = [layer(hidden_activation_func, W_init, b_init) for i in range(n_layer-1)]
        self.layers.append(layer(output_activation_func, W_init, b_init))
            
        self.params = [item for layer in self.layers for item in layer.params]
        #mlp.params is a list of parameters, not a list of lists of parameters
    
    def output(self, X):
        for layer in self.layers:
            X = layer.output(X)
        return(X)
    
    def error(self, X, y):
        return(T.sum((y-self.output(X))**2))

In [5]:
def gradient_update(loss, params, eta):
    updated_param = []
    for param in params:
        step = -eta*T.grad(loss, param)
        updated_param.append((param, param + step))
    return(updated_param)

In [6]:
#Choose appropriate W and b
W_init = np.zeros((800,784))
b_init = np.zeros(800)

#input measurement matrix
X = T.matrix('X')

#classification vector
y = T.vector('y')

#initialize 3 layer MLP (1 hidden layers)
MLP = mlp(3, T.nnet.sigmoid, T.nnet.relu, W_init, b_init)

eta = 0.1

cost = MLP.error(X, y)
training = theano.function([X, y], cost, updates=gradient_update(cost, MLP.params, eta))
y_hat = theano.function([X], MLP.output(X))