Design a Boolean classifier (outputs +1/-1) which is (approximately) accurate on the training set. This means that the total number of misclassifications that the classifier makes on the training set should be (near) zero. Use a classifier that is a neural network with one output and one layer of hidden units.

In [66]:
from os import error
import math
import numpy as np
from sklearn.preprocessing import normalize
# Load data
x = np.genfromtxt('./a2-train-data.txt', delimiter=' ', dtype=np.float128)
y = np.genfromtxt('./a2-train-label.txt', delimiter=' ', dtype=np.float128)

rows, cols = x.shape
x = x/np.max(x)

# Number of hidden units:
units = 500

# Xavier init bounds:
xg = 1/math.sqrt(cols+units)
xg2 = 1/math.sqrt(units+1)

# Draw weights randomly from Unif: (100 units in my hidden layer, 1000 weights for each unit)
w = np.zeros((units, cols))
for i in range(units):
  w[i] = np.random.uniform(-1*xg, xg, (cols)) # each row is different neuron weight
# Transpose so it's 1000 by units
w = w.reshape((cols, units))

wend = np.zeros((units))
for i in range(units):
  wend[i] = np.random.uniform(-1*xg2, xg2)

# Training response vector:
yhat = np.zeros((rows))

# Learning rate
c = 0.001

# Acc
acc = 0.95

print(x.shape, w.shape)

(900, 1000) (1000, 500)


Functions

In [67]:
def tanhP(x):
  return 1-np.tanh(x)**2
def loss(y, yhat):
  return (1/2)*(y-yhat)**2
def sign(y):
  return 1 if y > 0 else -1

Forward function (finish first before running back)

In [68]:
# Calculate yhat, forward pass
def forwpass(training, weights):
  return np.tanh(np.matmul(training, weights)) #also activated


Backwards function for gradient
update w like so <- w- learnRate * slope of error (dE(w)/dw)

In [69]:
def backprop(weights, finalweights, y, yhat, learningRate, inputs):
  dFinal = -1*(y-yhat)*(1-yhat**2)
  dHidden = dFinal* finalweights*(1-(forwpass(inputs, weights))**2) #500
  #forwp is a horizontal vector
  # weight updates:
  dHidden = dHidden.reshape((1, units))
  finalweights -= learningRate*dFinal* forwpass(inputs, weights)
  weights -= learningRate*np.dot(inputs[:,np.newaxis], dHidden)


Learning loop

In [70]:
while True:
  error = 0
  for i in range(rows):
    temp = forwpass(x[i], w)
    yhat[i] = forwpass(temp, wend)
    error += loss(y[i], yhat[i])
    backprop(w, wend, y[i], yhat[i], c, x[i])
  print("Error %:", error/rows)
  if error/rows < 1-acc:
    break

Error %: 0.49826270185299176995
Error %: 0.47215333341879836933
Error %: 0.44421517237077491272
Error %: 0.41192711498970721254
Error %: 0.37470908781644361355
Error %: 0.33405107735474178854
Error %: 0.2928780144574375839
Error %: 0.25414425110121557645
Error %: 0.21968234006649481181
Error %: 0.19002783018112655187
Error %: 0.16488994979195090065
Error %: 0.14365635497174237605
Error %: 0.12568373454600483994
Error %: 0.110408724949516818746
Error %: 0.09736899540649979087
Error %: 0.086192478840624911664
Error %: 0.076579963215015240814
Error %: 0.06828894518619510838
Error %: 0.061120244610845335236
Error %: 0.054907469373431773982
Error %: 0.04950930734819099615


In [71]:
for i in range(yhat.shape [0]):
  yhat[i] = sign(yhat[i])

misclassifications = 0
for i in range(y.shape [0]):
  misclassifications += 1 if y[i] != yhat[i] else 0

print("Misclassifications:", misclassifications)

Misclassifications: 1


Apply on unseen data

In [79]:
xtest = np.genfromtxt('/content/a2-test-data.txt', delimiter=' ')
ytest = np.genfromtxt('/content/a2-test-label.txt', delimiter=', ', deletechars = "[]")
# I can't gen from txt properly for the first and last elements
ytest[0] = 1
ytest[-1] = 1
# Apply:
xtest = xtest/np.max(x)
# use sign function and then compare misclassifications
temp3 = forwpass(xtest, w)
ytesthat = forwpass(temp3, wend)
for i in range(ytesthat.shape [0]):
  ytesthat[i] = sign(ytesthat[i])

misclassifications = 0
for i in range(ytest.shape [0]):
  misclassifications += 1 if ytest[i] != ytesthat[i] else 0

print("Misclassifications:", misclassifications)
print(ytesthat)
print(ytest)

Misclassifications: 22
[ 1. -1. -1.  1.  1.  1.  1. -1. -1. -1.  1.  1. -1.  1. -1.  1. -1. -1.
 -1. -1.  1. -1. -1.  1.  1.  1. -1. -1. -1. -1. -1. -1.  1. -1.  1. -1.
  1.  1.  1.  1. -1. -1.  1.  1.  1. -1.  1.  1.  1.  1.  1.  1.  1.  1.
 -1.  1.  1.  1.  1. -1.  1. -1.  1. -1. -1. -1. -1. -1. -1.  1.  1.  1.
  1. -1. -1.  1.  1. -1.  1.  1. -1. -1.  1. -1.  1.  1.  1. -1.  1.  1.
 -1. -1.  1.  1.  1. -1.  1. -1. -1.  1.]
[ 1.  1. -1.  1. -1.  1.  1. -1.  1. -1.  1.  1. -1.  1. -1.  1.  1. -1.
 -1.  1.  1. -1. -1.  1.  1. -1. -1. -1.  1. -1. -1. -1.  1. -1.  1.  1.
  1. -1.  1.  1. -1.  1. -1.  1.  1. -1.  1.  1.  1.  1. -1. -1.  1.  1.
 -1.  1.  1.  1. -1. -1.  1. -1. -1. -1. -1. -1.  1. -1.  1.  1.  1.  1.
  1. -1. -1. -1.  1. -1. -1.  1. -1. -1.  1.  1.  1. -1.  1. -1.  1.  1.
 -1. -1.  1. -1.  1. -1.  1. -1. -1.  1.]


In [84]:
# Write out to txt
#write 500
# write wend
wend = wend.reshape((units))
np.savetxt('wend.txt', wend, delimiter=' ')
# write w
w = w.reshape((units, cols))
np.savetxt('w.txt', w, delimiter=' ')

(500,)
