In [614]:
import numpy as np
import time
import pandas as pd
import matplotlib.pyplot as plt

In [615]:
# Training Data
x_train = np.array(pd.read_csv('training_set.csv', header=None).values)
y_train = np.array(pd.read_csv('training_labels_bin.csv', header=None).values)
x_val = np.array(pd.read_csv('validation_set.csv', header=None).values)
y_val = np.array(pd.read_csv('validation_labels_bin.csv', header=None).values)

In [616]:
N = len(x_train)
print(N)
print(len(y_train))
print(len(x_val))
print(len(y_val))

8442
8442
1048
1048


In [617]:
num_feats = x_train.shape[1]
n_out = y_train.shape[1]
print(num_feats)
print(n_out)

354
3


In [618]:
# add room for bias
bias_train = np.ones((x_train.shape[0], 1))
bias_val = np.ones((x_val.shape[0], 1))
x_train = np.append(x_train, bias_train, axis = 1)
x_val = np.append(x_val, bias_val, axis = 1)

In [619]:
# initialize the hidden layers
W1 = np.random.uniform(-1, 1, num_feats*num_feats).reshape((num_feats, num_feats))
W2 = np.random.uniform(-1, 1, num_feats*3).reshape((num_feats, 3))

In [620]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

def grad_sigmoid(x):
  return sigmoid(x)*(1-sigmoid(x))

def squared_error(guess, correct):
  return sum((guess-correct)**2)

def grad_squared_error(guess, correct):
  grad_vec = np.zeros(len(guess))
  for j in range(len(guess)):
    grad_vec[j] = 2*(guess[j]-correct[j])
  return np.reshape(grad_vec, (1, len(grad_vec)))

# fully connected, 2 hidden layers, vector of size 3 output
# we assume an input of 1x355 with the last entry a 1
class Computation_Graph:
  def __init__(self, w1, w2):
    # put the bias into the first weight matrix so the addition is a simply result of matrix multiplication
    # initialize a bias
    bias = np.random.uniform(-1, 1, 354)
    self.w1 = np.append(w1, np.array([bias]), axis = 0)
    self.w2 = w2
    self.a1 = np.zeros((1, 354))
    self.h1 = np.zeros((1, 354))
    self.a2 = np.zeros((1, 3))
    self.h2 = np.zeros((1, 3))
    return
  
  def forward_pass(self, src, sink):
    src = np.reshape(src, (1, 355))
    sink = np.reshape(sink, (1, 3))
    self.a1 = np.matmul(src, self.w1)
    self.h1 = sigmoid(self.a1)
    self.a2 = np.matmul(self.h1, self.w2)
    self.h2 = sigmoid(self.a2)
    self.J = squared_error(self.h2[0], sink[0])

  def backprop(self, src, sink, lr):
    sink = np.reshape(sink, (1, 3))
    src = np.reshape(src, (1, 355))
    g = grad_squared_error(self.h2[0], sink[0])
    g = g*grad_sigmoid(self.a2)
    grad_w2 = np.matmul(self.h1.T, g)
    g = np.matmul(g, self.w2.T)
    g = g*grad_sigmoid(self.a1)
    grad_w1 = np.matmul(src.T, g)
    g = np.matmul(g, self.w1.T)

    self.w1 -= lr*grad_w1
    self.w2 -= lr*grad_w2

In [621]:
# hyperparameters (you may change these)
eta = 0.1 # intial learning rate
gamma = 0.1 # multiplier for the learning rate
stepsize = 200 # epochs before changing learning rate
threshold = 0.08 # stopping criterion
test_interval = 10 # number of epoch before validating
max_epoch = 300

In [622]:
cg = Computation_Graph(W1, W2)

for epoch in range(0, max_epoch):
    
    order = np.random.permutation(N) # shuffle data
    
    sse = 0
    for n in range(0, N):
        idx = order[n]

        # get a sample (batch size=1)
        x_in = x_train[idx]
        y = y_train[idx]

        cg.forward_pass(x_in, y)
        cg.backprop(x_in, y, eta)
    
        sse += cg.J

    train_mse = sse/len(x_train)
    print(train_mse)

0.6655351445206026
0.30058756578536894
0.23410873277580604
0.1957214358657516
0.17526389214508992
0.16114179303090012
0.14788945272006745
0.13931013343580503
0.1293305642631005
0.1224735646750579
0.11294764317114678
0.11052969199514404


KeyboardInterrupt: ignored

In [623]:
cg.forward_pass(x_val[0], y_val[0])

In [625]:
cg.h2

array([[0.10244769, 0.99541194, 0.99609892]])

In [626]:
y_val[0]

array([0, 1, 1])

In [630]:
f = np.array([[2,1,2], [1,2], [1,2,2,3,3,2]])

  """Entry point for launching an IPython kernel.


In [633]:
f[2]

[1, 2, 2, 3, 3, 2]

In [None]:
if epoch % test_interval == 0: 
        # [ ] test on validation set here

        # if termination condition is satisfied, exit
        if val_mse < threshold:
            break

    if epoch % stepsize == 0 and epoch != 0:
        eta = eta*gamma
        print('Changed learning rate to lr=' + str(eta))