## numpy implementation of simple 2 layer nn

first layer neurons are activated by relu followed by softmax in next layer

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pt
import seaborn as sns
%matplotlib inline

In [None]:
test = pd.read_csv('data/simplenn_test.csv')
train = pd.read_csv('data/simplenn_train.csv')

In [None]:
data = pd.concat([train,test], axis=0, ignore_index=True)

#### need to normalize training data as scale of features are diffrent

In [None]:
from sklearn.preprocessing import StandardScaler

data_prc = data.drop(["sample_no","class_label","class_label_binary"],axis=1)

SS = StandardScaler()
SSModel = SS.fit(data_prc)
sc_data = SSModel.transform(data_prc)
train = sc_data
target = data["class_label"]

In [None]:
#batch size = 1
def batch_data(X,Y):
    y = Y.as_matrix() - 1
    for i in range(X.shape[0]):
        yield np.array(X[i]).T.reshape((178,1)), y[i]

In [None]:
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
output_size = 5
learning_rate = 1e-1
input_size = 178

# model parameters
W = np.random.randn(hidden_size, input_size)*0.1 # input to hidden
b = np.zeros((hidden_size, 1)) # hidden bias
Wy = np.random.randn(output_size, hidden_size)*0.1 # hidden to output
by = np.zeros((output_size, 1)) # output bias

def lossFunc(x, y):
    #forward pass
    h = np.dot(W, x) + b
    ah = np.maximum(h,0)#relu
    logits = np.dot(Wy, ah) + by
    softmax = np.exp(logits) / np.sum(np.exp(logits))
    loss = - np.log(softmax[y])
    #backward pass
    dWy, dby, dW, db = np.zeros_like(Wy), np.zeros_like(by), np.zeros_like(W), np.zeros_like(b)
    dy = np.copy(softmax)
    dy[y] -= 1
    dWy += np.dot(dy, ah.T)
    dby += dy
    dah = np.dot(Wy.T, dy)
    dh = np.multiply((h > 0) * 1., dah)
    dW += np.dot(dh, x.T)
    db += dh
    for dparam in [dWy, dby, dW, db]:
        np.clip(dparam, -5, 5, out=dparam)
    return loss, dWy, dby, dW, db

smooth_loss = -np.log(1.0/output_size)
mWy, mby, mW, mb = np.zeros_like(Wy), np.zeros_like(by), np.zeros_like(W), np.zeros_like(b)
n = 0
for epoch in range(20):
    for x, y in batch_data(train,target):
        loss, dWy, dby, dW, db = lossFunc(x, y)
        smooth_loss = smooth_loss * 0.999 + loss * 0.001
        if n % 100 == 0:
            print("smooth loss {}".format(smooth_loss)) # print progress
        # perform parameter update with Adagrad
        for param, dparam, mem in zip([Wy, by, W, b], [dWy, dby, dW, db], [mWy, mby, mW, mb]):
            mem += dparam * dparam
            param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update
        n = n + 1

In [None]:
test_proc = test.drop(["sample_no","class_label","class_label_binary"],axis=1)

In [None]:
sc_test = SSModel.transform(test_proc)

In [None]:
sc_test.shape

In [None]:
y_true = []
y_pred = []
for x, y in batch_data(sc_test,test["class_label"]):
    h = np.dot(W, x) + b
    ah = np.maximum(h,0)
    logits = np.dot(Wy, ah) + by
    pred = np.argmax(logits)
    y_true.append(y)
    y_pred.append(pred)

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_true, y_pred)