In [0]:
# -*- coding: utf-8 -*-
"""ann_theano_model.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1xwn7DYqyEDKrdQ9atMc6LRIAOWGyLKMV
"""

import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt

def init_weights(M1, M2):
    return np.random.randn(M1, M2)/np.sqrt(M1), np.zeros(M2)

class HiddenLayer(object):
    def __init__(self, M1, M2, f):
        self.M1 = M1
        self.M2 = M2
        self.f = f
        W, b = init_weights(M1, M2)
        self.W = theano.shared(W)
        self.b = theano.shared(b)
        self.params = [self.W, self.b]

    def forward(self, X):
        if self.f == T.nnet.relu:
            return self.f(X.dot(self.W) + self.b, alpha= 0.1)
        return self.f(X.dot(self.W) + self.b)

class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes

    def fit(self, X, Y, X_test, Y_test, activation=T.nnet.relu, learning_rate=1e-3, mu=0.99, reg=0, epochs=100, batch_size=None, print_period=10, show_fig=True, decay=0.999, eps=10e-10):
        X = X.astype(np.float32)
        Y = Y.astype(np.int32)
        N, D = X.shape
        self.layers = []

        #add hidden layers
        M1 = D
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, activation)
            self.layers.append(h)
            M1 = M2

        #and output layer
        K = Y.shape[1]
        h = HiddenLayer(M1, K, T.nnet.softmax)
        self.layers.append(h)

        if batch_size == None:
            batch_size = N

        self.weights = []
        for h in self.layers:
            self.weights += h.params

        #data as theano variables
        thX = T.matrix('X')
        thY = T.matrix('Y')
        p_y_given_x = self.forward(thX)

        velocities = [theano.shared(np.zeros_like(w.get_value())) for w in self.weights]
        caches = [theano.shared(np.ones_like(w.get_value())) for w in self.weights]

        reg_pen = reg*T.mean([(w*w).sum() for w in self.weights])
        cost = -((thY*T.log(p_y_given_x)).sum() + reg_pen)
        predictions = T.argmax(p_y_given_x, axis=1)
        grads = T.grad(cost, self.weights)

        updates =  [(c, decay*c + (1-decay)*(g*g)) for c, g in zip(caches, grads)] + [(v, mu*v.get_value() - learning_rate * g/(T.sqrt(decay*c + (1-decay)*(g*g)) + eps)) for v, g, c in zip(velocities, grads, caches)] + [(w, w + mu*v.get_value() - learning_rate*g/(T.sqrt(decay*c + (1-decay)*(g*g)) + eps)) for w, v, g, c in zip(self.weights, velocities, grads, caches)]

        #functions
        train = theano.function(
            inputs=[thX, thY],
            updates=updates,
            outputs=[cost, predictions]
        )

        self.get_prediction = theano.function(
            inputs=[thX],
            outputs=predictions
        )

        #main training loop
        costs = []
        c_rates = []
        n_batches = N//batch_size
        for i in range(epochs+1):
            for j in range(n_batches):
                X_batch = X[j*batch_size:(j*batch_size + batch_size),]
                Y_batch = Y[j*batch_size:(j*batch_size + batch_size),]

                c, _ = train(X_batch, Y_batch)

            costs.append(c)
            c_r = self.score(X_test, Y_test)
            c_rates.append(c_r)
            if i%print_period == 0:
                print('Epoch ', i ,'/', epochs, ': Cost: ', c, ' Classification rate: ', c_r)

        if show_fig:
            plt.plot(costs)
            plt.show()
            plt.plot(c_rates)
            plt.show()

    def forward(self, X):
        p = X
        for h in self.layers:
            p = h.forward(p)

        return p

    def predict(self, X):
        return self.get_prediction(X)

    def score(self, X, Y):
        P = self.get_prediction(X)
        T = Y.argmax(axis=1)
        return np.mean(P == T)