# CS280 Programming Assignment 2
__Implementing the Backpropagation Algorithm__<br>
<br>
Compiler: Python 3.6.5<br>
OS: Windows 7

# Classification via ANN
Let's implement an artificial neural network with:
* four layers (2 hidden layers)
* MSE as the error function
* Sigmoid function as the activation function

## Import training and validation data
The training and validation data used here are processed from __data.csv__ and __data_labels.csv__ using the scripts in the notebook __Handling_Imbalanced_Data.ipynb__

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [None]:
train_data = np.genfromtxt('training_set.csv', delimiter=',')
train_labels = np.genfromtxt('training_labels.csv', delimiter=',')

validation_data = np.genfromtxt('validation_set.csv', delimiter=',')
validation_labels = np.genfromtxt('validation_labels.csv', delimiter=',')


##  Convert labels to one-hot vector format
For classification tasks such as this, it is convenient to represent the target labels as a one-hot vector. In this case, the data contains 8 unique target labels, so the one-hot vector encoded labels would look like:<br>
> Class Label &ensp;&ensp;&ensp; One-Hot-Vector Encoded Format<br>
> 1 &ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;[1 0 0 0 0 0 0 0]<br>
> 2 &ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;[0 1 0 0 0 0 0 0]<br>
> ....... <br>
> 7 &ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;[0 0 0 0 0 0 1 0]<br>
> 8 &ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;&ensp;[0 0 0 0 0 0 0 1]<br>

This is so that the predictions of our model would come out as a vector of probability/confidence values ranging from 0 to 1 for each unique class label.<br>
<br>
To that end, we define two utility functions for converting from one format to another:

In [None]:
def labels_to_onehotvector(labels):
    unique_labels, label_counts = np.unique(labels, return_counts=True)
    onehotvector = np.zeros((len(labels), len(unique_labels)))
    for index, label in enumerate(labels):
        onehotvector[int(index), int(label)-1] = int(1)
    return onehotvector

In [None]:
def onehotvectors_to_labels(onehotvectors):
    labels = np.zeros(onehotvectors.shape[0])
    for index, onehotvector in enumerate(onehotvectors):
        target_label = np.argwhere(onehotvector/np.max(onehotvector) == 1)
        labels[index] = target_label + 1 # +1 because there is no class 0
    return labels

.... And use them to convert our labels to one-hot-vector format.

In [None]:
train_labels = labels_to_onehotvector(train_labels)
validation_labels = labels_to_onehotvector(validation_labels)

In [None]:
train_data.shape, train_labels.shape, validation_data.shape, validation_labels.shape

## Define the Neural Network
We define a couple of functions to be used for defining, training, and using the neural network

#### init_neurons()
For allocating the input and output layers and for initializing the weight and bias matrices for the hidden layers randomly using a uniform probability distribution from (-INIT_RANGE) to (+INIT_RANGE)

In [None]:
def init_neurons(num_input, num_hidden1_neurons, num_hidden2_neurons, num_output):
    
    INIT_RANGE = 0.1

    x_in = np.zeros((num_input, 1))

    w_h1 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden1_neurons, num_input))
    b_h1 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden1_neurons, 1))

    w_h2 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden2_neurons, num_hidden1_neurons))
    b_h2 = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_hidden2_neurons, 1))

    w_out = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_output, num_hidden2_neurons))
    b_out = np.random.uniform(low=-INIT_RANGE, high=INIT_RANGE, size=(num_output, 1))

    d_out = np.zeros((num_output, 1))
    
    return x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out, d_out

#### predict()
Perform forward-pass for a single data point

In [None]:
def predict(input_data, w_h1, b_h1, w_h2, b_h2, w_out, b_out):
    ## HIDDEN LAYER 1
    v_h1 = np.dot(w_h1,input_data) + b_h1
    y_h1 = 1/(1 + np.exp(-v_h1))
    ## HIDDEN LAYER 2
    v_h2 = np.dot(w_h2, y_h1) + b_h2
    y_h2 = 1/(1 + np.exp(-v_h2))            
    ## OUTPUT LAYER
    v_out = np.dot(w_out, y_h2) + b_out
    out = 1/(1 + np.exp(-v_out))
    
    return out, y_h1, y_h2

#### predict_batch()
Perform forward-pass for a batch of data points and subsequently returns the label predictions.<br>
Also computes the total prediction error if *labels* is given.

In [None]:
def predict_batch(batch_data, w_h1, b_h1, w_h2, b_h2, w_out, b_out, labels=None):
    # initialize the total error
    total_error = 0
    # set the number of classes to the number of neurons in the output layer
    num_classes = b_out.shape[0]
    onehotvector_predictions = np.zeros((batch_data.shape[0], num_classes))
    for index, data in enumerate(batch_data):
        error = 0
        x_in = batch_data[index].reshape(-1, 1)
            
        out, dummy1, dummy2 = predict(x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out)

        onehotvector_predictions[index] = out.reshape(-1,)
        
        if labels is not None:
            d_out = labels[index].reshape(-1, 1)
            error = d_out - out
            total_error = total_error + (np.sum(error*error))/(error.shape[0])
    total_error *= (1/batch_data.shape[0])
    predictions = onehotvectors_to_labels(onehotvector_predictions)
    
    return total_error, predictions
    

#### ann_fit()
Train the artificial neural network with the training data

In [None]:
def ann_fit(train_data, train_labels, num_input, num_hidden1_neurons, num_hidden2_neurons, num_output,
            LR=0.1, validation_period=10, validation_data=None, validation_labels=None):
    
    # Take note of the start time of training for training duration measurement purposes
    train_time_start = time.time()
    report_time_start = time.time()
    
    # Print error report every __ epochs
    ERR_REPORT_PERIOD = 100 
    
    # Stop training if the current total training error goes below this value
    ERR_TERMINATION_COND = 0.001
    
    
    # Initialize the input, output, and hidden layer neurons (i.e. their weights and biases matrices)
    x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out, d_out = init_neurons(num_input,
                                                                     num_hidden1_neurons,
                                                                     num_hidden2_neurons,
                                                                     num_output)
    # Initialize the error vectors
    total_error = np.zeros((MAX_EPOCH, 1))
    total_validation_error = np.zeros((int(MAX_EPOCH/VALIDATION_PERIOD), 1))
    
    # Initialize the epoch value at which training will have ended
    training_ending_epoch = MAX_EPOCH-1
    
    # Generate a vector for the epoch numbers
    epochs = range(0, MAX_EPOCH)
    
    for epoch_index in epochs:
        train_indices = np.random.permutation(train_data.shape[0])
        for train_index in train_indices:
            
            # READ DATA
            x_in = train_data[train_index].reshape(-1, 1)
            d_out = train_labels[train_index].reshape(-1, 1)

            ##### FORWARD PASS #####
            out, y_h1, y_h2 = predict(x_in, w_h1, b_h1, w_h2, b_h2, w_out, b_out)
            
            ##### BACK PROPAGATION #####
            error = d_out - out
            delta_out = error*out*(1-out)
            delta_h2 = (y_h2*(1-y_h2))*(np.dot(np.transpose(w_out),delta_out))
            delta_h1 = (y_h1*(1-y_h1))*(np.dot(np.transpose(w_h2), delta_h2))

            ## Update the weights and biases
            w_out = w_out + LR*delta_out*np.transpose(y_h2)
            b_out = b_out + LR*delta_out
            
            w_h2 = w_h2 + LR*delta_h2*np.transpose(y_h1)
            b_h2 = b_h2 + LR*delta_h2
            
            w_h1 = w_h1 + LR*delta_h1*np.transpose(x_in)
            b_h1 = b_h1 + LR*delta_h1
            
            # Update the partial total error for the whole epoch with each training data processed
            total_error[epoch_index] = total_error[epoch_index] + (np.sum(error*error))/error.shape[0]
            
        # .... then take the average for all training data samples processed (Mean Square Error)
        total_error[epoch_index] *= (1/train_data.shape[0])
        
        # Process the validation data set according to the validation period set
        current_valerror_index = int(epoch_index/validation_period)
        if validation_data is not None and epoch_index%validation_period == 0:
            total_validation_error[current_valerror_index], _ = predict_batch(validation_data,
                                                                             w_h1, b_h1, w_h2,
                                                                             b_h2, w_out, b_out, validation_labels)
        
        # Print an error report according to the error reporting period
        if epoch_index % ERR_REPORT_PERIOD == 0:
            report_time_end = time.time()
            print('\nEPOCH %d (duration: %3.4f seconds)'%(epoch_index, report_time_end - report_time_start))
            print('\ttraining error=%10.12f'%(total_error[epoch_index]))
            if validation_data is not None:
                print('\tvalidation_error=%10.12f'% (total_validation_error[current_valerror_index]))        
            report_time_start = time.time()
        

        # Update the learning rate LR as the training error gets closer to the target
        if LR <= 0.00001:
            pass
        elif total_error[epoch_index] > 0.006 and total_error[epoch_index] < 0.01:
            LR = 0.05
        elif total_error[epoch_index] > 0.002 and total_error[epoch_index] < 0.006:
            LR = LR - 0.0001
        elif total_error[epoch_index]< 0.002:
            LR = 0.02
        
        # Check conditions for early stopping.
        # If stopping would be early, compute the validation error using the latest weights and biases
        if (total_error[epoch_index] < ERR_TERMINATION_COND):
            training_ending_epoch = epoch_index
            current_valerror_index += 1
            total_validation_error[current_valerror_index], _ = predict_batch(validation_data,
                                                                             w_h1, b_h1, w_h2,
                                                                             b_h2, w_out, b_out, validation_labels)
            break

    train_time_end = time.time()
    
    print('\n\n--\nTRAINING ENDED AT EPOCH %d (%3.3f s) WITH training_error=%10.12f'%(training_ending_epoch,
                                                                                      train_time_end-train_time_start,
                                                                                      total_error[training_ending_epoch]))
    print('\nTRAINING ENDED AT EPOCH %d WITH validation_error=%10.12f'%(training_ending_epoch,
                                                                                      total_validation_error[current_valerror_index]))
    
    return w_h1, b_h1, w_h2, b_h2, w_out, b_out, total_error, total_validation_error, epochs, training_ending_epoch
        

        

## Train the ANN


In [None]:
# Define number of neurons per layer
NUM_INPUT = train_data.shape[1]
NUM_HIDDEN1_NEURONS = 11
NUM_HIDDEN2_NEURONS = 9
NUM_OUTPUT = train_labels.shape[1]

# Initial Learning Rate
LR = 0.1

# Define how often (in number of epochs) the validation data should be tested during training
VALIDATION_PERIOD = 10

# Define the maximum number of epochs to run before stopping even if the stopping criteria is not met
MAX_EPOCH = 30000

In [None]:
w_h1, b_h1, w_h2, b_h2, w_out, b_out, total_training_error, total_validation_error, epochs, training_ending_epoch = ann_fit(train_data, train_labels,
                                                                                                                       NUM_INPUT, NUM_HIDDEN1_NEURONS,
                                                                                                                       NUM_HIDDEN2_NEURONS, NUM_OUTPUT,
                                                                                                                       LR, VALIDATION_PERIOD,
                                                                                                                       validation_data, validation_labels)
### Optionally play a sound when training is done!
# from playsound import playsound
#playsound('Victory.mp3')

#### Plot the training error and the validation error

In [None]:
plt.figure(figsize=(8, 6), dpi=80)
plt.plot(epochs, total_training_error, range(0, MAX_EPOCH, VALIDATION_PERIOD), total_validation_error)
plt.axis([0, training_ending_epoch, 0, max(np.max(total_training_error), np.max(total_validation_error))])
plt.legend(['training error', 'validation error'])
plt.title('ANN errors vs. Epochs for NUM_HIDDEN1_NEURONS=%d and NUM_HIDDEN2_NEURONS=%d'%(NUM_HIDDEN1_NEURONS, NUM_HIDDEN2_NEURONS))
plt.grid(which='both')
plt.savefig('ANNerrors_%d_%d.png'%(NUM_HIDDEN1_NEURONS, NUM_HIDDEN2_NEURONS))
plt.show()

#### Since our test set is not labelled, let's measure instead the model's accuracy on the validation data set like so:

In [None]:
validation_labels = np.genfromtxt('validation_labels.csv', delimiter=',')

_, predicted_validation_labels = predict_batch(validation_data, w_h1, b_h1, w_h2, b_h2, w_out, b_out)

from sklearn import metrics
print("ANN Classification Accuracy:",metrics.accuracy_score(validation_labels, predicted_validation_labels))

## Use the newly trained ANN on the test data

In [None]:
test_data = np.genfromtxt('test_set.csv', delimiter=',')

_, predicted_labels = predict_batch(test_data, w_h1, b_h1, w_h2, b_h2, w_out, b_out)

## Commented-out to avoid overwriting the best result!
#np.savetxt('predicted_ann.csv', predicted_labels, delimiter=',\n', fmt='%d')

In [None]:
test_data.shape, predicted_labels.shape

# Classification via SVM Algorithm
Compiler: Python 3.6.5 <br>
OS: Windows 7<br>

In [None]:
from sklearn import svm

In [None]:
train_data = np.genfromtxt('training_set.csv', delimiter=',')
train_labels = np.genfromtxt('training_labels.csv', delimiter=',')

validation_data = np.genfromtxt('validation_set.csv', delimiter=',')
validation_labels = np.genfromtxt('validation_labels.csv', delimiter=',')

In [None]:
svm_classifier = svm.SVC(kernel='linear')
train_time_start = time.time()
svm_classifier.fit(train_data, train_labels)
train_time_end = time.time()

print('SVM Training Duration: %3.3f s'%(train_time_end - train_time_start))

In [None]:
predicted_validation_labels = svm_classifier.predict(validation_data)

from sklearn import metrics
print("SVM Classification Accuracy:",metrics.accuracy_score(validation_labels, predicted_validation_labels))

In [None]:
predicted_labels_svm = svm_classifier.predict(test_data)

In [None]:
## Commented-out to avoide overwriting the submitted csv
#np.savetxt('predicted_svm.csv', predicted_labels_svm, delimiter=',\n', fmt='%d')