In [None]:
import matplotlib.pyplot as plt
import numpy as np
from dispkernel import dispKernel

plt.rcParams['figure.figsize'] = (12, 5)

In [None]:
traindata = np.loadtxt('traindata.csv', delimiter=',')
trainlabel = np.loadtxt('trainlabel.csv', delimiter=',')
validdata = np.loadtxt('validdata.csv', delimiter=',')
validlabel = np.loadtxt('validlabel.csv', delimiter=',')

In [None]:
# Activation and Loss Functions
def linear(X, with_grad=True):
    if with_grad:
        dZ = np.ones_like(X)
        return X, dZ
    return X, None

def sigmoid(X, with_grad=True):
    Z = 1 / (1 + np.exp(-X))
    if with_grad:
        dZ = np.exp(-X) / (1 + np.exp(-X)) ** 2
        return Z, dZ
    return Z, None

def ReLU(X, with_grad=True):
    Z = np.max(X, 0)
    if with_grad:
        dZ = np.where(X > 0, 1, 0)
        return Z, dZ
    return Z, None

def mse_loss(Z, label, with_grad=True):
    L = np.sum((Z - label) ** 2)
    if with_grad:
        dLdZ = 2 * (Z - label)
        return L, dLdZ
    return L, None

In [None]:
# Single Neuron Classifier, with selectable activation function
class Model:
    def __init__(self, activation):
        self.activation = activation
        self.weights = np.random.random_sample(9)
        self.bias = np.random.random_sample()

    def forward(self, X, with_grad=True):
        Y = self.weights.dot(X) + self.bias
        Z, dZ = self.activation(Y, with_grad)
        if with_grad:
            dYdW = X
            dYdb = 1
            dZdW = dZ * dYdW
            dZdb = dZ * dYdb
            return Z, dZdW, dZdb  
        return Z, None, None

In [None]:
def train(traindata, trainlabel, validdata, validlabel, activation, lr, epochs, debug=False):
    neuron = Model(activation)
    train_loss = []
    train_acc = []
    val_loss = []
    val_acc = []

    for i in range(1, epochs + 1):
        Z, dZdW, dZdb = neuron.forward(traindata.T, with_grad=True)
        L, dLdZ = mse_loss(Z, trainlabel, with_grad=True)
        train_loss.append(L)
        train_acc.append(((Z >= 0.5) == trainlabel).sum() / trainlabel.size)

        dLdW = np.sum(dLdZ * dZdW, axis=1)
        dLdb = np.sum(dLdZ * dZdb, axis=0)

        neuron.weights -= dLdW * lr
        neuron.bias -= dLdb * lr

        val_out, _, _ = neuron.forward(validdata.T, with_grad=False)
        val_loss.append(mse_loss(val_out, validlabel, with_grad=False)[0])
        val_acc.append(((val_out >= 0.5) == validlabel).sum() / validlabel.size)

        if debug:
            print('epoch {}, train loss {:2f} acc {:2f} validation loss {:2f} acc {:2f}'.format(i, L, train_acc[-1], val_loss[-1], val_acc[-1]))
    
    return neuron, train_loss, train_acc, val_loss, val_acc

def plot_history(hyperparameters, train_loss, train_acc, val_loss, val_acc):
    plt.suptitle('Single Neuron Classifier, {} activation function, {} epochs, learning rate = {}'.format(
        hyperparameters['activation'].__name__,
        hyperparameters['epochs'],
        hyperparameters['lr']
    ))
    plt.subplot(1, 2, 1)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.plot(train_loss, label='training')
    plt.plot(val_loss, label='validation')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.plot(train_acc, label='training')
    plt.plot(val_acc, label='validation')
    plt.legend()
    plt.show()

Each of the hyperparameter cells here are run the training loop for one of the scenarios described in the assignment.

In [None]:
# LR too low
hyperparameters = {
    'activation': linear,
    'lr': 0.0000001,
    'epochs': 1000,
    'seed': 0
}

In [None]:
# LR too high
hyperparameters = {
    'activation': linear,
    'lr': 0.002,
    'epochs': 100,
    'seed': 0
}

In [None]:
# LR good
hyperparameters = {
    'activation': linear,
    'lr': 0.001,
    'epochs': 100,
    'seed': 0
}

In [None]:
# linear
hyperparameters = {
    'activation': linear,
    'lr': 0.0005,
    'epochs': 100,
    'seed': 0
}

In [None]:
# relu
hyperparameters = {
    'activation': ReLU,
    'lr': 0.0001,
    'epochs': 100,
    'seed': 0
}

In [None]:
# sigmoid
hyperparameters = {
    'activation': sigmoid,
    'lr': 0.005,
    'epochs': 100,
    'seed': 0
}

In [None]:
# epoch test, 5, 10, 25, 100
hyperparameters = {
    'activation': linear,
    'lr': 0.001,
    'epochs': 5,
    'seed': 0
}

In [None]:
# Learning rate test, 0.05, 0.005, 0.001, 0.0005, 0.00005, 0.000005
hyperparameters = {
    'activation': linear,
    'lr': 0.05,
    'epochs': 25,
    'seed': 0
}

In [None]:
# Activation function test
hyperparameters = {
    'activation': linear,
    'lr': 0.001,
    'epochs': 100,
    'seed': 0
}

In [None]:
# random seed test
hyperparameters = {
    'activation': linear,
    'lr': 0.001,
    'epochs': 25,
    'seed': 2
}

In [None]:
# best
hyperparameters = {
    'activation': sigmoid,
    'lr': 0.1,
    'epochs': 10,
    'seed': 0
}

This cell runs the training loop with the defined hyperparameters, and plots the training curves.

In [None]:
np.random.seed(hyperparameters['seed'])
neuron, train_loss, train_acc, val_loss, val_acc = train(
    traindata, trainlabel, validdata, validlabel,
    hyperparameters['activation'], hyperparameters['lr'], hyperparameters['epochs'])
plot_history(hyperparameters, train_loss, train_acc, val_loss, val_acc)
print(train_acc[-1], val_acc[-1])

In [None]:
dispKernel(neuron.weights, 3, 3)

In [None]:
print(neuron.weights, neuron.bias)