In [1]:
'''
Comparing single layer MLP with deep MLP (using TensorFlow)
'''
import numpy as np
import pickle
from scipy.optimize import minimize
from scipy.io import loadmat
from math import sqrt
import time
import matplotlib.pyplot as plt

In [2]:
# Do not change this
def initializeWeights(n_in,n_out):
    """
    # initializeWeights return the random weights for Neural Network given the
    # number of node in the input layer and output layer

    # Input:
    # n_in: number of nodes of the input layer
    # n_out: number of nodes of the output layer
                            
    # Output: 
    # W: matrix of random initial weights with size (n_out x (n_in + 1))"""
    epsilon = sqrt(6) / sqrt(n_in + n_out + 1);
    W = (np.random.rand(n_out, n_in + 1)*2* epsilon) - epsilon;
    return W

In [3]:
# Replace this with your sigmoid implementation
def sigmoid(z):
    sigmoidresult = 1.0 / (1.0 + np.exp(-z))
    return sigmoidresult

In [4]:
# Replace this with your nnObjFunction implementation
def nnObjFunction(params, *args):

    n_input, n_hidden, n_class, training_data, training_label, lambdaval = args

    w1 = params[0:n_hidden * (n_input + 1)].reshape((n_hidden, (n_input + 1)))
    w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))
    obj_val = 0

    # Your code here
    
    label = np.array(training_label);
    rows = label.shape[0];
    rowsIndex = np.array([i for i in range(rows)])
    training_label = np.zeros((rows,2))
    # Set the kth column in "training_label" to 1 for label k
    training_label[rowsIndex,label.astype(int)]=1

    # Adding bias to training data and feed forwarding
    BiasTerm = np.ones(training_data.shape[0])
    training_data = np.column_stack((training_data,BiasTerm))
    num_samples = training_data.shape[0]
    
    # Finding the hidden output using sigmoid
    HiddenOutput = sigmoid(np.dot(training_data,w1.T))
    
    # Adding bias term to hidden layer
    NewBias = np.ones(HiddenOutput.shape[0])
    HiddenOutputWithBias = np.column_stack((HiddenOutput, NewBias))

    # Finding the final output using sigmoid
    FinalOutput = sigmoid(np.dot(HiddenOutputWithBias,w2.T))
    
    # Calculating error to find the Gradient using formula given in handout
    Delta = FinalOutput - training_label
    
    # Using the formula shared in handout. 
    Gradient_w2 = np.dot(Delta.T,HiddenOutputWithBias)
    Gradient_w1 = np.dot(((1-HiddenOutputWithBias)*HiddenOutputWithBias* (np.dot(Delta,w2))).T,training_data)
    
    # Updating gradient w1 to remove bias hidden nodes
    Gradient_w1 = np.delete(Gradient_w1, n_hidden,0)
    
    # Calculating NLL error function and gradient of error function
    lnFinal = np.log(FinalOutput)
    lnOneFinal  = np.log(1-FinalOutput)
    part_1 = (np.sum(-1*(training_label*lnFinal+(1 - training_label)*lnOneFinal)))
    part_1 = part_1/num_samples
    
    # Adding Regularization 
    sw1 = np.sum(np.square(w1))
    sw2 = np.sum(np.square(w2))
    part_2 = (lambdaval/(2*num_samples))* (sw1 +  sw2)
    
    # Final formula
    obj_val = part_1 + part_2

    # Regularization will not impact for lambdaval 0, for others it will
    Gradient_w1 = Gradient_w1 + lambdaval * w1
    Gradient_w2 = Gradient_w2 + lambdaval * w2  

    # Make sure you reshape the gradient matrices to a 1D array. for instance if your gradient matrices are grad_w1 and grad_w2
    # you would use code similar to the one below to create a flat array
    # obj_grad = np.concatenate((grad_w1.flatten(), grad_w2.flatten()),0)
    obj_grad = np.array([])
    obj_grad = np.concatenate((Gradient_w1.flatten(), Gradient_w2.flatten()),0)
    obj_grad = obj_grad/num_samples
    
    return (obj_val, obj_grad)

In [5]:
# Replace this with your nnPredict implementation
def nnPredict(w1,w2,data):
    Num_of_Items=data.shape[0]    

    # Adding bias term
    Bias = np.zeros([len(data), 1])
    DataWithBias = np.append(data, Bias ,1)
    
    hidden_input = np.dot(DataWithBias ,w1.T)
    hidden_output = sigmoid(hidden_input)
    
    # Second layer - Adding Bias Term   
    Bias = np.zeros([len(hidden_output), 1])
    FinalDataWithBias = np.append(hidden_output, Bias, 1)
    final_input = np.dot(FinalDataWithBias, w2.T)
    final_output = sigmoid(final_input)

    #Initialize an dummy output array
    label_list = [-1]*Num_of_Items
    for i in range(Num_of_Items):
        label_list[i] = np.argmax(final_output[i]);
    labels = np.array(label_list)

    return labels

In [6]:
# Do not change this
def preprocess():
    pickle_obj = pickle.load(file=open('face_all.pickle', 'rb'))
    features = pickle_obj['Features']
    labels = pickle_obj['Labels']
    train_x = features[0:21100] / 255
    valid_x = features[21100:23765] / 255
    test_x = features[23765:] / 255

    labels = labels[0]
    train_y = labels[0:21100]
    valid_y = labels[21100:23765]
    test_y = labels[23765:]
    return train_x, train_y, valid_x, valid_y, test_x, test_y

In [None]:
"""**************Neural Network Script Starts here********************************"""
train_data, train_label, validation_data, validation_label, test_data, test_label = preprocess()
#  Train Neural Network
# set the number of nodes in input unit (not including bias unit)
n_input = train_data.shape[1]
# set the number of nodes in hidden unit (not including bias unit)
n_hidden = 256
# set the number of nodes in output unit
n_class = 2

start = time.clock()
# initialize the weights into some random matrices
initial_w1 = initializeWeights(n_input, n_hidden);
initial_w2 = initializeWeights(n_hidden, n_class);
# unroll 2 weight matrices into single column vector
initialWeights = np.concatenate((initial_w1.flatten(), initial_w2.flatten()),0)
# set the regularization hyper-parameter
lambdaval = 10;
args = (n_input, n_hidden, n_class, train_data, train_label, lambdaval)

#Train Neural Network using fmin_cg or minimize from scipy,optimize module. Check documentation for a working example
opts = {'maxiter' :50}    # Preferred value.

nn_params = minimize(nnObjFunction, initialWeights, jac=True, args=args,method='CG', options=opts)
params = nn_params.get('x')
#Reshape nnParams from 1D vector into w1 and w2 matrices
w1 = params[0:n_hidden * (n_input + 1)].reshape( (n_hidden, (n_input + 1)))
w2 = params[(n_hidden * (n_input + 1)):].reshape((n_class, (n_hidden + 1)))

#Test the computed parameters
predicted_label = nnPredict(w1,w2,train_data)
#find the accuracy on Training Dataset
print('\n Training set Accuracy:' + str(100*np.mean((predicted_label == train_label).astype(float))) + '%')
predicted_label = nnPredict(w1,w2,validation_data)
#find the accuracy on Validation Dataset
print('\n Validation set Accuracy:' + str(100*np.mean((predicted_label == validation_label).astype(float))) + '%')
predicted_label = nnPredict(w1,w2,test_data)
#find the accuracy on Validation Dataset
print('\n Test set Accuracy:' +  str(100*np.mean((predicted_label == test_label).astype(float))) + '%')

elapsed = (time.clock() - start)
print("Time used:",elapsed)