# CS280 Programming Assignment 2
__Implementing the Backpropagation Algorithm__<br>
<br>
Compiler: Python 3.6.5<br>
OS: Windows 7

## Import training and validation data

In [61]:
import numpy as np

In [62]:
train_data = np.genfromtxt('training_set.csv', delimiter=',')
train_labels = np.genfromtxt('training_labels.csv', delimiter=',')

validation_data = np.genfromtxt('validation_set.csv', delimiter=',')
validation_labels = np.genfromtxt('validation_labels.csv', delimiter=',')


In [63]:
def labels_to_onehotvector(labels):
    unique_labels, label_counts = np.unique(labels, return_counts=True)
    onehotvector = np.zeros((len(labels), len(unique_labels)))
    for index, label in enumerate(labels):
        onehotvector[int(index), int(label)-1] = int(1)
    return onehotvector

In [64]:
def onehotvectors_to_labels(onehotvectors):
    labels = np.zeros(onehotvectors.shape[0])
    for index, onehotvector in enumerate(onehotvectors):
        target_label = np.argwhere(onehotvector/np.max(onehotvector) == 1)
        labels[index] = target_label + 1 # +1 because there is no class 0
        #print('onehotvector[%d]='%(index), onehotvector)
        #print('labels[%d]=%d'%(index,labels[index]))
    return labels

In [65]:
train_labels = labels_to_onehotvector(train_labels)
validation_labels = labels_to_onehotvector(validation_labels)

In [66]:
train_data.shape, train_labels.shape, validation_data.shape, validation_labels.shape

((7674, 354), (7674, 8), (1910, 354), (1910, 8))

In [67]:
#train_data = np.array([[0, 0, 0],
#                       [0, 0, 1],
#                       [0, 1, 0],
#                       [0, 1, 1],
#                       [1, 0, 0],
#                       [1, 0, 1],
#                       [1, 1, 0],
#                       [1, 1, 1]])
#train_labels = np.array([[0, 0, 0],
#                       [1, 1, 0],
#                       [1, 0, 1],
#                       [0, 1, 1],
#                       [0, 1, 1],
#                       [1, 0, 0],
#                       [1, 1, 0],
#                       [0, 0, 0]])

## Define the Neural network
Define architecture of Neural Network

In [68]:
NUM_INPUT = train_data.shape[1]
NUM_HIDDEN1_NEURONS = 7
NUM_HIDDEN2_NEURONS = 5
NUM_OUTPUT = train_labels.shape[1]

Define the Learning Rate, LR:

In [69]:
LR = 0.1

Declare the matrices for the weights and biases, and then initialize them with random numbers:

In [70]:
def init_neurons(num_input, num_hidden1_neurons, num_hidden2_neurons, num_output):
    
    INIT_RANGE = 0.1

    x_in = np.zeros((num_input, 1))

    w_h1 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden1_neurons, num_input))
    b_h1 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden1_neurons, 1))

    w_h2 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden2_neurons, num_hidden1_neurons))
    b_h2 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden2_neurons, 1))

    w_out = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_output, num_hidden2_neurons))
    b_out = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_output, 1))

    d_out = np.zeros((num_output, 1))
    
    return x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out, d_out

Training Phase:

In [71]:
def predict(input_data, w_h1, b_h1, w_h2, b_h2, w_out, b_out):
    ##### FORWARD PASS #####
    ## HIDDEN LAYER 1
    v_h1 = np.dot(w_h1,input_data) + b_h1
    y_h1 = 1/(1 + np.exp(-v_h1))
    ## HIDDEN LAYER 2
    v_h2 = np.dot(w_h2, y_h1) + b_h2
    y_h2 = 1/(1 + np.exp(-v_h2))            
    ## OUTPUT LAYER
    v_out = np.dot(w_out, y_h2) + b_out
    out = 1/(1 + np.exp(-v_out))
    
    return out, y_h1, y_h2

In [72]:
def predict_batch(batch_data, w_h1, b_h1, w_h2, b_h2, w_out, b_out, labels=None):
    total_error = 0
    num_classes = b_out.shape[0]
    onehotvector_predictions = np.zeros((batch_data.shape[0], num_classes))
    for index, data in enumerate(batch_data):
        error = 0
        x_in = batch_data[index].reshape(-1, 1)
            
        out, dummy1, dummy2 = predict(x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out)
        #print('onehotvector_predictions[%d].shape='%(index), onehotvector_predictions[index].shape)
        #print('out.shape=', out.shape)
        onehotvector_predictions[index] = out.reshape(-1,)
        
        if labels is not None:
            d_out = labels[index].reshape(-1, 1)
            error = d_out - out
            total_error = total_error + np.sum(error*error)
    total_error *= (1/batch_data.shape[0])
    predictions = onehotvectors_to_labels(onehotvector_predictions)
    
    return total_error, predictions
    

In [73]:
def train_fourlayer_neural_net(train_data, train_labels,num_input, num_hidden1_neurons,
                               num_hidden2_neurons, num_output, validation_data=None, validation_labels=None):
    
    MAX_EPOCH = 30000
    ERR_REPORT_PERIOD = 100 # Print error report every __ epochs
    ERR_TERMINATION_COND = 0.0010000000000

    x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out, d_out = init_neurons(num_input,
                                                                     num_hidden1_neurons,
                                                                     num_hidden2_neurons,
                                                                     num_output)
    # Initialize
    total_error = np.zeros((MAX_EPOCH, 1))
    total_validation_error = np.zeros((MAX_EPOCH, 1))
    training_ending_epoch = MAX_EPOCH
    epochs = range(0, MAX_EPOCH)
    
    for epoch_index in epochs:
        train_indices = np.random.permutation(train_data.shape[0])
        for train_index in train_indices:
            # READ DATA
            x_in = train_data[train_index].reshape(-1, 1)

            ##### FORWARD PASS #####
            out, y_h1, y_h2 = predict(x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out)
            
            ##### BACK PROPAGATION #####
            d_out = train_labels[train_index].reshape(-1, 1)
            error = d_out - out
            #print('\terror: ', error, '\n')
            delta_out = error*out*(1-out)
            #print('delta-out.shape=',delta_out.shape)
            #print('(1-y_h2).shape=',(1-y_h2).shape, ' y_h2*(1-y_h2).shape=', (y_h2*(1-y_h2)).shape, ' np.dot(w_out,delta_out).shape=', (np.dot(np.transpose(w_out),delta_out)).shape)
            delta_h2 = (y_h2*(1-y_h2))*(np.dot(np.transpose(w_out),delta_out))
            delta_h1 = (y_h1*(1-y_h1))*(np.dot(np.transpose(w_h2), delta_h2))

            ## Update the weights and biases
            w_out = w_out + LR*delta_out*np.transpose(y_h2)
            b_out = b_out + LR*delta_out
            
            w_h2 = w_h2 + LR*delta_h2*np.transpose(y_h1)
            b_h2 = b_h2 + LR*delta_h2
            
            w_h1 = w_h1 + LR*delta_h1*np.transpose(x_in)
            b_h1 = b_h1 + LR*delta_h1
            
            total_error[epoch_index] = total_error[epoch_index] + np.sum(error*error)
        
        total_error[epoch_index] *= (1/train_data.shape[0])
        
        if validation_data is not None:
            total_validation_error[epoch_index], predictions = predict_batch(validation_data,
                                                                             w_h1, b_h1, w_h2,
                                                                             b_h2, w_out, b_out, validation_labels)
            
        if epoch_index % ERR_REPORT_PERIOD == 0:
            print('\nEPOCH %d\ttraining error=%10.12f'%(epoch_index,total_error[epoch_index]))
            if validation_data is not None:
                print('\tvalidation_error=%10.12f'%(total_validation_error[epoch_index]))
        
        if total_error[epoch_index] < ERR_TERMINATION_COND:
            training_ending_epoch = epoch_index
            break

    print('\n\n--\nTRAINING ENDED AT EPOCH %d WITH training_error=%10.12f'%(training_ending_epoch, total_error[training_ending_epoch]))
    print('\nTRAINING ENDED AT EPOCH %d WITH validation_error=%10.12f'%(training_ending_epoch, total_validation_error[training_ending_epoch]))
    
    return w_h1, b_h1, w_h2, b_h2, w_out, b_out, total_error, total_validation_error, epochs, training_ending_epoch
        

        

In [None]:
w_h1, b_h1, w_h2, b_h2, w_out, b_out, total_training_error, total_validation_error, epochs, training_ending_epoch = train_fourlayer_neural_net(train_data, train_labels,
                                                                                                                       NUM_INPUT, NUM_HIDDEN1_NEURONS,
                                                                                                                       NUM_HIDDEN2_NEURONS, NUM_OUTPUT,
                                                                                                                       validation_data, validation_labels)
from playsound import playsound
playsound('Victory.mp3')


EPOCH 0	training error=0.880278460612
	validation_error=0.874214044785

EPOCH 100	training error=0.057552246594
	validation_error=0.069517414022

EPOCH 200	training error=0.036203742644
	validation_error=0.062006356848

EPOCH 300	training error=0.022736654561
	validation_error=0.031819651174

EPOCH 400	training error=0.015545649932
	validation_error=0.022870492138

EPOCH 500	training error=0.013092698825
	validation_error=0.018381199190

EPOCH 600	training error=0.009410036688
	validation_error=0.017668636368

EPOCH 700	training error=0.008813032687
	validation_error=0.015699748504

EPOCH 800	training error=0.008643960304
	validation_error=0.015793321088

EPOCH 900	training error=0.006569856341
	validation_error=0.014819690042

EPOCH 1000	training error=0.007422528568
	validation_error=0.015144376973

EPOCH 1100	training error=0.007258958067
	validation_error=0.015488149416

EPOCH 1200	training error=0.006203076653
	validation_error=0.017255230152

EPOCH 1300	training error=0.00629329

In [None]:
import matplotlib.pyplot as plt

fig= plt.figure(figsize=(6,3))
axes= fig.add_axes([0, training_ending_epoch, 0, max(np.max(total_training_error), np.max(total_validation_error))])
axes.plot(epochs, total_training_error, epochs, total_validation_error)
plt.show()


#plt.plot(epochs, total_training_error, epochs, total_validation_error)
#plt.axis([0, training_ending_epoch, 0, max(np.max(total_training_error), np.max(total_validation_error))])
#plt.show()