# Footy Tipping with Neural Networks

In [4]:
%matplotlib inline
from IPython.display import Image
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.optimize import minimize

In [5]:
# load in training data
data_train = pd.read_csv('afl_train.csv')
# drop if year 2005
data_train = data_train.drop(data_train[data_train.season==2000].index)
data_train.fillna(0)
# make X, y variables
X = data_train[['home_percentage', 'away_percentage', 
                       'home_last_season_percentage', 'away_last_season_percentage']].as_matrix()
y = data_train[['home_team_win']].as_matrix()
m, n = X.shape
X_cols = ['home_percentage', 'away_percentage', 
                       'home_last_season_percentage', 'away_last_season_percentage']
y_col = ['home_team_win']

# load in cross validation data
data_train = pd.read_csv('afl_cval.csv')
X_cv = data_train[['home_percentage', 'away_percentage', 
                       'home_last_season_percentage', 'away_last_season_percentage']].as_matrix()
y_cv = data_train[['home_team_win']].as_matrix()
m_cv, n_cv = X.shape
X_cols_cv = ['home_percentage', 'away_percentage', 
                       'home_last_season_percentage', 'away_last_season_percentage']
y_col_cv = ['home_team_win']

In [177]:
class NN_3_layer:
    
    def __init__(self, X, y, hidden_layer_size, lamda, eps_init):
        """
        Initialises the class NN_3_layer, 
        which computes a neural network with one hidden layer.
        X is a m, input_layer_size size numpy array
        Y is a m, output_layer_size size numpy array
        m is the number of observations/training sets
        input_layer_size is the number of features
        output_layer_size is the size of the output layer
        hidden_layer_size is the size of the hidden layer
        lamda is the regularisation constant
        eps_init is a constant initialising the initial theta
        """
        self.X = X
        self.y = y
        self.hidden_layer_size = hidden_layer_size
        self.lamda = lamda
        self.eps_init = eps_init
        self.m = X.shape[0]
        self.input_layer_size = X.shape[1]
        self.output_layer_size = y.shape[1]

    def sigmoid(self, z):
        """
        Compute the sigmoid function
        """
        denominator = 1.0 + np.exp(-1.0 * z)
        return 1.0 / denominator
    
    def sigmoid_gradient(self, a):
        """
        computes the gradient of the sigmoid function at input value z
        """
        a = np.array(a)
        output_shape = a.shape
        output = np.array([a * (1 - a)])
        output.reshape(output_shape)
        return output
    
    def square_up(self, theta_vector):
        """
        Returns the theta vector to matrices
        """
        vector_1_length = (self.input_layer_size + 1) * self.hidden_layer_size
        matrix_1 = theta_vector[0:vector_1_length].reshape(
            (self.hidden_layer_size, self.input_layer_size + 1))
        matrix_2 = theta_vector[vector_1_length:]
        matrix_2 = matrix_2.reshape(
            (self.output_layer_size, self.hidden_layer_size + 1))
        return (matrix_1, matrix_2)        

    def flatten_out(self, thetas):
        """
        Converts features from matrices to vectors.
        """
        return np.hstack((thetas[0].reshape(-1), thetas[1].reshape(-1)))

    def forward_propogation(self, thetas, x):
        """
        Forward propogation for a three layer nn
        Inputs:
            i = input layer
            h = hidden layer
            o = output layer (number of classes)
            m = number of training sets / observations
            theta1: (i+1) x h numpy array
            theta2: (h+1) x o numpy array
            X: m x i numpy array

        """
        (self.theta1, self.theta2) = self.square_up(thetas)
        m_fp = x.shape[0]
        # input layer
        a1 = np.ones((m_fp, self.input_layer_size+1))
        a1[:,1:] = x
        # hidden Layer
        z2 = np.dot(self.theta1, a1.T)
        a2_0 = self.sigmoid(z2)
        a2 = np.ones((a2_0.shape[0]+1, m_fp))
        a2[1:,:] = a2_0
        # output layer
        z3 = np.dot(self.theta2, a2)
        a3 = self.sigmoid(z3)    
        return a1, z2, a2, z3, a3

    def cost_function(self, thetas):
        """
        Calculates the cost function J after a round of forward propogation
        Inputs
            i = input layer
            h = hidden layer
            o = output layer (number of classes)
            m = number of training sets / observations
            theta1: (i+1) x h numpy array
            theta2: (h+1) x o numpy array
            X: m x i numpy array
        Output is a float
        """
        a1, z1, a2, z2, a3 = self.forward_propogation(thetas, self.X)
        h = a3
        j1 = np.dot(np.log(h), self.y)
        j2 = np.dot(np.log(1 - h), (1-self.y))
        J = (-1./len(y)) * (j1+j2)
        J = J[0][0]
        return J
    
    def cost_function_reg(self, thetas):
        """
        Calculates the regularized cost function J after a round of forward propogation
        Inputs
            thetas are rolled feature spaces
            lamda is a float
            i = input layer
            h = hidden layer
            o = output layer (number of classes)
            m = number of training sets / observations
            theta1: (i+1) x h numpy array
            theta2: (h+1) x o numpy array
            X: m x i numpy array
        Output is a float
        """
        base = self.cost_function(thetas)
        reg_0 = self.lamda / float(2*self.m)
        theta_flat = self.flatten_out(thetas)
        reg_1 = sum([t**2 for t in theta_flat])
        reg_term = reg_0 * reg_1
        return base + reg_term

    def cost_gradient(self, thetas):
        """
        Approximates the gradient vector of the NN via backpropagation
        """
        (theta1, theta2) = self.square_up(thetas) 
        init1_shape = (self.hidden_layer_size, self.input_layer_size+1)
        init2_shape = (self.output_layer_size, self.hidden_layer_size+1)
        delta_1 = np.zeros(init1_shape)
        delta_2 = np.zeros(init2_shape)
        count = 0
        ## back propagation
        for obs in self.X:
            a1, z2, a2, z3, a3 = self.forward_propogation(thetas,
                                obs.reshape(1, self.input_layer_size))
            # layer three
            # layer three
            delta_3_k = (a3 - self.y[count])[0][0]
            # layer two
            term1 = np.dot(theta2.T, delta_3_k) 
            term2 = self.sigmoid_gradient(a2)[0]
            delta_2_k = term1 * term2
            delta_2_k = delta_2_k[1:]
            # calculating delta terms
            term_2_ij = np.dot(delta_3_k, a2.T)
            term_1_ij = np.dot(delta_2_k, a1)
            delta_2 = delta_2 + term_2_ij
            delta_1 = delta_1 + term_1_ij
            count+=1

        delta_1 = delta_1 / float(m)
        delta_2 = delta_2 / float(m)
        deltas = self.flatten_out((delta_1, delta_2))
        return deltas
    
    def cost_gradient_reg(self, thetas):
        """
        Approximates the regularised gradient vector of the NN via backpropagation

        """
        (theta1, theta2) = self.square_up(thetas)
        grad = self.cost_gradient(thetas)
        grad = self.square_up(grad)
        term1 = self.lamda / self.m
        t1 = np.zeros(theta1.shape)
        t1[:,1:] = self.theta1[:,1:]
        t2 = np.zeros(theta2.shape)
        t2[:,1:] = self.theta2[:,1:]
        grad1 = grad[0] + (term1 * t1)
        grad2 = grad[1] + (term1 * t2)
        grad = self.flatten_out((grad1, grad2))
        return grad
    
    def numerical_gradient(self):
        """
        Approximates the gradient of the cost function by perturbing 
        element ij of layer ell by eps
        """
        thetas = flatten_out((theta1, theta2))
        f = []
        eps_g = 1e-3
        for i in range(len(self.theta_flat)):
            print "Element", i, "of", len(self.theta_flat)
            theta_high = self.theta_flat
            theta_low = self.theta_flat
            theta_high[i] = theta_high[i] + eps_g
            theta_low[i] = theta_low[i] - eps_g
            j_high = self.cost_function_reg(theta_high)
            j_low = self.cost_function_reg(theta_low)
            f_theta_approx = (j_high - j_low) / (2 * eps_g)
            f.append(f_theta_approx[0])
        return np.array(f)
    
    def prediction(self, a, y):
        pr = np.argmax(a.T, axis=1).reshape(y.shape)
        accuracy = 100 * sum((pr==y)) / float(len(y))
        print 'The neural network correctly predicts %f percent of the cells' % accuracy
        return
    
    def theta_init(self):
        "Initialises the theta"
        self.theta1 = np.random.rand(self.hidden_layer_size, self.input_layer_size+1
                              ) * 2 * self.eps_init - self.eps_init
        self.theta2 = np.random.rand(self.output_layer_size, self.hidden_layer_size+1
                              ) * 2 * self.eps_init - self.eps_init
        self.thetas_flat = self.flatten_out((self.theta1, self.theta2))
    
    def opt_finder(self):
        self.theta_init()
        self.theta_opt = minimize(
            self.cost_function_reg, x0=self.thetas_flat, 
            method="TNC", jac=self.cost_gradient_reg, 
            options={"maxiter":500, "disp":True}).x

In [173]:
nn = NN_3_layer(X, y, 4, 0, 0.12)
nn.opt_finder()

In [180]:
a1, z2, a2, z3, a3 = nn.forward_propogation(nn.theta_opt, X)
nn.prediction(a3)

The neural network correctly predicts 42.045455 percent of the cells
