In [2]:
import numpy as np
import pandas as pd
import time
import random
import operator

In [35]:
class Neural_Network(object):

    def __init__(self, layers_size,\
                 do_random_seed=True, random_seed=1):
        self.layers_size = layers_size
        if do_random_seed == True:
            np.random.seed(random_seed)
        
        self.init_weights()
        self.init_biases()
        
        self.regularize = False
        self.do_l2 = False
        self.do_dropout = False
        self.test_time = False
    
    def init_biases(self):
        self.biases = [self.get_biases(self.layers_size[i]) for i in range(1, len(self.layers_size))]
    
    def get_biases(self, output_size):
        return np.zeros((1,output_size))
    
    def init_weights(self):
        self.weights = [self.get_weights(self.layers_size[i-1],self.layers_size[i]) for i in range(1, len(self.layers_size))]
    
    def get_weights(self, input_size, output_size):
        return np.random.randn(input_size,output_size)*np.sqrt(2.0/input_size)
    
    @staticmethod
    def sigmoid(x):
        stable_sigmoid = np.vectorize(Neural_Network.__stable_sigmoid_function)
        return stable_sigmoid(x)
    
    @staticmethod
    def __stable_sigmoid_function(x):
        "Numerically-stable sigmoid function."
        if x >= 0:
            z = np.exp(-x)
            return 1/(1+z)
        else:
            z = np.exp(x)
            return z/(1+z)
    
    @staticmethod
    def softmax(x):
        return np.exp(x-np.max(x))/np.sum(np.exp(x-np.max(x)), axis=1, keepdims=True)
    
    def cross_entropy(self, yHat, y):
        return -1/yHat.shape[0]*np.sum((y*np.log(yHat)+\
                (1-y)*np.log(1-yHat)))+\
                self.__loss_regularization_term()

    def forward(self, x):
        self.training_size = x.shape[0]
        self.activations = [x]
        for w, b, activation in zip(self.weights, self.biases, self.activations):
            self.activations.append(self.sigmoid(np.dot(activation, w) + b))
        return self.activations[-1]

    def backprop(self, x, y):
        deltas = [self.activations[-1]-y]
        dJdWeights = [np.dot(self.activations[-2].T,deltas[-1])]
        dJdBiases = [np.sum(deltas[-1],axis=0)]
        i=len(self.weights)-2
        while i>=0:
            deltas.append(np.dot(deltas[-1],self.weights[i+1].T)*(self.activations[i+1]*(1-self.activations[i+1])))
            dJdWeights.append(np.dot(self.activations[i].T,deltas[-1]))
            dJdBiases.append(np.sum(deltas[-1],axis=0))
            i = i - 1
        # tuple(map(operator.add, (dJdW1, dJdW2), self.__derivative_regularization_term()))
        return deltas, dJdWeights, dJdBiases

    def learn_using_stochastic_gradient_descent(self, learning_rate, x, y, mini_batch_size):
        combined_train_array = np.append(x,y,axis=1)
        random.shuffle(combined_train_array)
        x_train = combined_train_array[:,:-y.shape[1]]
        y_train = combined_train_array[:,-y.shape[1]:]
        for k in range(0, x_train.shape[0], mini_batch_size):
            self.forward(x_train[k:k+mini_batch_size])
            deltas, dJdWeights, dJdBiases = self.backprop(x_train[k:k+mini_batch_size],y_train[k:k+mini_batch_size])
            i=-1
            for j in range(len(self.weights)):
                self.weights[j] = self.weights[j] - learning_rate*dJdWeights[i]
                i = i - 1
            i=-1
            for j in range(len(self.biases)):
                self.biases[j] = self.biases[j] - learning_rate*dJdBiases[i]
                i = i - 1
    
    def dropout(self):
        if self.do_dropout==True:
            if self.test_time==False:
                self.dropout_hidden_mask = (np.random.rand(self.hidden_size)<self.dropout_rate)
                self.a2 *= self.dropout_hidden_mask
            else:
                self.W2 *= self.dropout_rate
        
    def apply_dropout(self, dropout_rate):
        self.do_dropout=True
        self.dropout_rate=1-dropout_rate
    
    def apply_l2_regularization(self, regularization_rate):
        self.regularize=True
        self.do_l2=True
        self.regularization_rate=regularization_rate
    
    def __derivative_regularization_term(self):
        if self.regularize==False:
            return 0, 0
        else:
            if self.do_l2==True:
                return self.regularization_rate/self.training_size*self.W1, self.regularization_rate/self.training_size*self.W2
            
    def __loss_regularization_term(self):
        if self.regularize==False:
            return 0
        else:
            if self.do_l2==True:
                return self.regularization_rate/(2*self.training_size)*\
            (sum(np.linalg.norm(w)**2 for w in self.W1)+\
             sum(np.linalg.norm(w)**2 for w in self.W2))
    
    @staticmethod
    def accuracy(yHat_test, y_test, threshold=None):
        multiple_outputs = y_test.shape[1] > 1
        correctly_classified=0
        for i in range(y_test.shape[0]):
            if multiple_outputs == True:
                if np.argmax(yHat_test[i])==np.argmax(y_test[i]):
                    correctly_classified+=1
            else:
                prediction = yHat_test[i] > threshold
                if int(prediction)==int(y_test[i]):
                    correctly_classified += 1
        return correctly_classified/y_test.shape[0]