# Neural Network Constructor

This notebook works through the construction of a two-layer neural network. (It will eventually be imported into its own python module)

In [74]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [75]:
%autoreload 2

In [76]:
import numpy as np
import HW06_utils as ut

import gradients as grad
import activationfns as af

In [86]:

class NeuralNet:
    """
    Train and store a neural network, based on supplied training data. 
    Use this network to predict classifications.
    """

    def __init__(self,nlayers=3,unitsperlayer=None,actfns=[af.sigmoid,af.sigmoid],Gradients=None,verbose=False):
        """
        Initialize the neural network
        - nlayers:       the number of layers in the neural network (includes input and output layers)
        - unitsperlayer: a list specifying (in order) the number of units in all sequential layers except input
        - actfns:        a list specifying (in order) the activation function used by all sequential layers except input
        - Gradient:      a class providing optimized gradient calculations for the given sequence of  
                         activation functions
        - verbose:       a boolean for descriptive output
        """
        if unitsperlayer == None:
            unitsperlayer = 3*np.ones(nlayers)
        elif nlayers == len(unitsperlayer)+1:    
            self.nlayers = nlayers-1
            self.unitsperlayer = unitsperlayer 
        elif nlayers > len(unitsperlayer)+1:
            print('ERROR: The number of units per layer were not given for at least one layer.')
        elif nlayers < len(unitsperlayer)+1:
            print('ERROR: More layers were given units than were specified by input "nlayers".')
        if nlayers == len(actfns)+1:
            self.actfns = actfns
        elif nlayers > len(actfns)+1:
            print('ERROR: The activation function was not given for at least one layer.')
        elif nlayers < len(actfns)+1:
            print('ERROR: More activation functions were provided than specified by input "nlayers".')
        if Gradients == None:
            print('ERROR: A gradient generator class must be included.')
        self.gradients = Gradients
        self.weight_matrices = []
        
    
    def initialize_weights(self,shape,mu=0,var=1):
        """
        Initialize weight matrix from normal distribution.
        - shape: tuple specifying desired shape of weight matrix
        - mu:    mean value of normal distribution
        - var:   variance of normal distribution
        """
        weight_matrix = np.random.normal(loc=mu,scale=np.sqrt(var),size=shape)
        
        return weight_matrix
    
    
    def weight_matrix_shape(self,n):
        """
        Create weight matrix with the proper number of rows and columns for this layer
        - n: the layer which will employ an activation function on the product of the 
             weight matrix and values
        """
        if n != 0 and n != range(self.nlayers)[-1]:
            WM_nrows = self.unitsperlayer[n]-1
            WM_ncols = self.unitsperlayer[n-1]
        elif n == 0:
            WM_nrows = self.unitsperlayer[n]-1
            WM_ncols = len(data[0])+1
        else:
            WM_nrows = self.unitsperlayer[n]
            WM_ncols = self.unitsperlayer[n-1]
        return WM_nrows,WM_ncols
    
    
    def forward(self,data):
        """
        Perform forward pass through neural network by multiplying data by weights
        and enforcing a nonlinear activation function for each layer.
        - data:           Nxd numpy array with N sample points and d features
        - weightmatrices: ordered list of sequential weight matrices corresponding to layers
        - actfns:         ordered list of sequential activation functions corresponding to layers
                         (functions are defined in activationfuncs.py)
        Returns layeroutputs, a list of the outputs from each layer. The last entry
        is an CxN numpy array with hypotheses for each sample N_i being in class C_j.
        """
        H = data.T
        layeroutputs = []
        for i in range(self.nlayers):
            W = self.weight_matrices[i]
            actfn = self.actfns[i]
            H = actfn(np.dot(W,H))
            # If the layer is not the output layer, add a fictitious unit for bias terms
            if i != self.nlayers-1:
                fictu = np.array([np.ones_like(H[0])])
                H = np.concatenate((H,fictu),axis=0)
            layeroutputs.append(H)
        return layeroutputs
    
    
    def backward(self,layeroutputs,labelrange,gradients=None):
        """
        Perform backward pass through neural network by computing gradients of 
        input weight matrices with respect to the loss function comparing hypotheses 
        to true values. Classes for gradients are provided in gradients.py module 
        (a unique gradient class is required for neural networks with different 
        numbers of layers and/or different activation functions)
        """
        if gradients == None:
            Gradients = self.gradients
        gradients = Gradients.calculate(self.weight_matrices,layeroutputs,labelrange)
        
        return gradients
    
    
    def classify_outputs(self,finaloutputs):
        """
        Convert final outputs into classifications
        -finaloutputs: a CxN numpy array with hypotheses for each sample N_i being in
                       class C_j.
        Returns a 1D, length-N array with values corresponding to point classifications
        """
        if len(finaloutputs) == 1:
            classifications = np.around(finaloutputs[0]).astype(int)
        if len(finaloutputs) > 1:
            # Add one for 1-indexing in classification labels
            classifications = (np.argmax(finaloutputs,axis=0)+np.ones(len(finaloutputs[0]))).astype(int)
        return classifications
    
    
    def stoch_grad_descent_prep(self,layeroutputs,classifications,labels):
        """
        For stochastic gradient descent, choose one misclassified point from the data set
        (index i) for performing backprop algorithm and reduce datasets accordingly     
        - layeroutputs: a list of the outputs from each layer
        - predictions:  1D, length-N numpy array with predictions for the N sample points
        - labels:       1D, length-N numpy array with true labels for the N sample points
        """
        diffclass = True
        tested_i = []
        while diffclass:
            i = np.random.randint(len(labels))
            if i not in tested_i:
                tested_i.append(i)
            if len(tested_i) == len(labels):
                print('All points classified correctly')
                return True,True
            if labels[i]!=classifications[i]:
                # Improperly classified point, so use it for gradient descent
                layeroutputs_i = [layeroutput[:,i] for layeroutput in layeroutputs]
                return layeroutputs_i,i
    
    
    def train(self,data,labels,epsilon=0.01):
        """
        Train the neural network on input data
        - data:   Nxd numppy array with N sample points and d features
        - labels: 1D, length-N numpy array with labels for the N sample points
        """
        # Ensure labels are integers
        labels = labels.astype(int)
       
        # Add fictitious unit for bias terms
        fictu = np.array([np.ones(len(data))]).T
        data = np.concatenate((data,fictu),axis=1)
    
        # Initialize Weights
        for n in range(self.nlayers):
            WM_nrows,WM_ncols = self.weight_matrix_shape(n)
            # Variance of weight matrix determined by fan-in (eta), the number of units in the previous layer 
            # (or the number of data features when initializing the first weight matrix)
            eta = WM_ncols
            weight_matrix = self.initialize_weights((WM_nrows,WM_ncols),mu=0,var=(1/eta))
            print(weight_matrix)
            self.weight_matrices.append(weight_matrix)
        
        # Execute gradient class overhead before beginning training loop
        self.gradients.prepare(data,labels,self.unitsperlayer[-1])
        
        # Begin loop
        layeroutputs = self.forward(data)
        classifications = self.classify_outputs(layeroutputs[-1])
        trainAccs = [ut.score_accuracy(classifications,labels)]
        counter=0
        while counter < 1000:
            layeroutputs_i,label_i = self.stoch_grad_descent_prep(layeroutputs,classifications,labels)
            if layeroutputs_i == True:
                break
            gradients = self.backward(layeroutputs_i,[label_i,label_i+1])

            for n in range(self.nlayers):
                self.weight_matrices[n]=self.weight_matrices[n]-epsilon*gradients[n]
            layeroutputs = self.forward(data)
            
            classifications = self.classify_outputs(layeroutputs[-1])
            if counter%1 == 0:
                print(counter)
                print(layeroutputs[-1])
                print('rounded',classifications)
            trainAccs.append(ut.score_accuracy(classifications,labels))
            counter+=1
        print(trainAccs)
        
        
    def predict(self,testdata):
        """
        Predict classfications for unlabeled data points using the previously 
        trained neural network.
        - testdata: Nxd numpy array with N sample points and d features
                    *Note, dimension d must match that used for the data array in NeuralNet.train*
        Returns a 1D, length-N numpy array of predictions (one prediction per point)
        """
        npoints = len(testdata)
        predictions = np.empty(npoints)
        layeroutputs = self.forward(data)
        predictions = classify_outputs(layeroutputs[-1])

        return predictions.astype(int)
    

### Tests

In [87]:
data = np.array([[2,2,2,5],[1,1,1,2],[10,3,1,6],[2,2,2,5],[2,1,5,3],[2,2,2,6],[8,1,2,1]])
data = data-np.mean(data,axis=0)
labels = np.array([1,1,2,1,1,1,2])

In [88]:
classifier = NeuralNet(nlayers=3,unitsperlayer=[3,2],actfns=[af.tanh,af.sigmoid],Gradients=grad.tanhsig2layer())

In [89]:
classifier.train(data,labels)

[[-0.6287949   0.2859587   0.42179369  0.3511788   0.2810423 ]
 [-0.55797678 -0.19553882  0.24463676 -0.14781208  0.31511919]]
[[-1.53104875  0.25296366  0.73007487]
 [ 0.17738619 -0.17652865 -0.65354257]]
0
[[ 0.3745752   0.51620623  0.87816082  0.3745752   0.37759347  0.36210644
   0.88151179]
 [ 0.34701742  0.3270297   0.34648701  0.34701742  0.34066926  0.35036845
   0.34221693]]
rounded [1 1 1 1 1 1 1]
1
[[ 0.37616265  0.51815094  0.87531385  0.37616265  0.379599    0.36358251
   0.87883157]
 [ 0.3459014   0.32615363  0.35092501  0.3459014   0.33927613  0.34929722
   0.34648498]]
rounded [1 1 1 1 1 1 1]
2
[[ 0.37774738  0.52008633  0.87241952  0.37774738  0.38160191  0.36505624
   0.87610819]
 [ 0.34479495  0.3252893   0.3553583   0.34479495  0.33789564  0.34823491
   0.35074861]]
rounded [1 1 1 1 1 1 1]
3
[[ 0.37931243  0.5211523   0.86957083  0.37931243  0.38337263  0.36664522
   0.87329761]
 [ 0.34369639  0.32461779  0.35967215  0.34369639  0.33665583  0.3470992
   0.35507879]]