Design and train (with code or manually) a neural network that achieves best possible accuracy on the attached training_data.csv file. Discuss your design choices. In particular, describe your architecture (i.e., depth = number of layers and width = number of units per layer), training method, and hyperparameters. State the number of trainable parameters in your network, and report the final accuracy on the test set, located in file test_data.csv. Submit your code on GitHub Classroom.

In [12]:
import sys
sys.path.insert(0, '..')
from IPython.display import Image
%matplotlib inline
import torch
import numpy as np
import pandas as pd 
import os
import matplotlib.pyplot as plt
import torch.nn as nn
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split

In [None]:
# Training data split features and labels 
df = pd.read_csv("training_data.csv")
data = np.array(df)
x = data[:,:-1]
y = data[:, -1:]

# Train validate split 
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size= 0.01)


In [None]:
# Create neural network class 
class NeuralNetMLP:

    # Initialize the weights and bias term vectors 
    def __init__(self, num_features, num_hidden, num_classes, random_seed=123):
        # Initialize values
        super().__init__()
        self.num_classes = num_classes

        # Hidden layers
        rng = np.random.RandomState(random_seed)
        self.weight_h = rng.normal(loc=0.0, scale=0.1, size=(num_hidden, num_features))
        self.bias_h = np.zeros(num_hidden)

        # Output layer
        self.weight_out = rng.normal(loc=0.0, scale=0.1, size=(num_classes, num_hidden))
        self.bias_out = np.zeros(num_classes)

    # Forward pass, take an example and make a prediction 
    def forward(self, x):
        # Hidden layer output z
        z_h = np.dot(x, self.weight_h.T) + self.bias_h
        # Hidden layer activation 
        a_h = relu(z_h)

        # Output layer output z 
        z_out = np.dot(a_h, self.weight_out.T) + self.bias_out
        # Output layer activation 
        a_out = relu(z_out)
        return a_h, a_out
    
    def backward(self, x, a_h, a_out, y):  
        # x = input layer
        # a_h = activation for hidden layers
        # a_out = output of activation 
        # y = true labels 

        # Part 1: dLoss/dOutWeights

        # d loss / d output activation
        d_loss__d_a_out = 2.*(a_out - y) / y.shape[0]

        # d output activation / d output z 
        d_a_out__d_z_out = a_out * (1. - a_out) 

        # change in output = dLoss/dOutAct * dOutAct/dOutNet
        delta_out = d_loss__d_a_out * d_a_out__d_z_out # "delta (rule) placeholder"

        # Gradient for output weights 
        d_z_out__dw_out = a_h
        
        # d loss/ d output w
        d_loss__dw_out = np.dot(delta_out.T, d_z_out__dw_out)
        # d loss/ d output bias term 
        d_loss__db_out = np.sum(delta_out, axis=0)
               
        # Part 2: dLoss/dHiddenWeights
        ## = DeltaOut * dOutNet/dHiddenAct * dHiddenAct/dHiddenNet * dHiddenNet/dWeight
        
        # [n_classes, n_hidden]
        d_z_out__a_h = self.weight_out
        
        # output dim: [n_examples, n_hidden]
        d_loss__a_h = np.dot(delta_out, d_z_out__a_h)
        
        # [n_examples, n_hidden]
        d_a_h__d_z_h = a_h * (1. - a_h) # sigmoid derivative
        
        # [n_examples, n_features]
        d_z_h__d_w_h = x
        
        # output dim: [n_hidden, n_features]
        d_loss__d_w_h = np.dot((d_loss__a_h * d_a_h__d_z_h).T, d_z_h__d_w_h)
        d_loss__d_b_h = np.sum((d_loss__a_h * d_a_h__d_z_h), axis=0)

        return (d_loss__dw_out, d_loss__db_out, 
                d_loss__d_w_h, d_loss__d_b_h)


# Write ReLU activation function
def relu(x):
    # ReLU = max{0, x}
    return max(0,x)

In [None]:
# Function that actually runs the neural network
def train(model, x_train, y_train, x_valid, y_valid, num_epochs,learning_rate=0.1):
    
    # Lists to store these values after each epoch
    epoch_loss = []
    epoch_train_acc = []
    epoch_valid_acc = []
    
    for e in range(num_epochs):

        # iterate over minibatches
        minibatch_gen = minibatch_generator(
            X_train, y_train, minibatch_size)

        for X_train_mini, y_train_mini in minibatch_gen:
            
            #### Compute outputs ####
            a_h, a_out = model.forward(X_train_mini)

            #### Compute gradients ####
            d_loss__d_w_out, d_loss__d_b_out, d_loss__d_w_h, d_loss__d_b_h = \
                model.backward(X_train_mini, a_h, a_out, y_train_mini)

            #### Update weights ####
            model.weight_h -= learning_rate * d_loss__d_w_h
            model.bias_h -= learning_rate * d_loss__d_b_h
            model.weight_out -= learning_rate * d_loss__d_w_out
            model.bias_out -= learning_rate * d_loss__d_b_out
        
        #### Epoch Logging ####        
        train_mse, train_acc = compute_mse_and_acc(model, X_train, y_train)
        valid_mse, valid_acc = compute_mse_and_acc(model, X_valid, y_valid)
        train_acc, valid_acc = train_acc*100, valid_acc*100
        epoch_train_acc.append(train_acc)
        epoch_valid_acc.append(valid_acc)
        epoch_loss.append(train_mse)
        print(f'Epoch: {e+1:03d}/{num_epochs:03d} '
              f'| Train MSE: {train_mse:.2f} '
              f'| Train Acc: {train_acc:.2f}% '
              f'| Valid Acc: {valid_acc:.2f}%')

    return epoch_loss, epoch_train_acc, epoch_valid_acc