## NN by Hand

Implement a simple feedforward neural newtwork with backprop and gradient descent.

In [None]:
import numpy as np

In [None]:
from sklearn.datasets import load_iris

In [None]:
def BuildDataset():
  """Create the dataset"""

  # Get the dataset keeping the first two features
  iris = load_iris()
  x = iris["data"][:,:2]
  y = iris["target"]

  # Standardize and keep only classes 0 and 1
  x = (x - x.mean(axis=0)) / x.std(axis=0)
  i0 = np.where(y == 0)[0]
  i1 = np.where(y == 1)[0]
  x = np.vstack((x[i0], x[i1]))

  # Train and Test Dataset
  xtrn = np.vstack((x[:35], x[50:85]))
  ytrn = np.array([0]*35 + [1]*35)
  xtst = np.vstack((x[35:50], x[85:]))
  ytst = np.array([0]*15 + [1] * 15)

  idx = np.argsort(np.random.random(70))
  xtrn = xtrn[idx]
  ytrn = ytrn[idx]
  idx = np.argsort(np.random.random(30))
  xtst = xtst[idx]
  ytst = ytst[idx]

  return xtrn, ytrn, xtst, ytst

In [None]:
xtrn, ytrn, xtst, ytst = BuildDataset()

In [None]:
xtrn

array([[-0.53717756,  0.78880759],
       [-1.02184904,  0.78880759],
       [-1.26418478, -0.13197948],
       [-0.17367395, -0.59237301],
       [-0.53717756, -0.13197948],
       [-1.14301691,  0.09821729],
       [-0.90068117,  1.01900435],
       [-1.02184904,  0.78880759],
       [-1.26418478,  0.09821729],
       [-0.41600969, -1.74335684],
       [-0.53717756,  1.47939788],
       [-0.17367395,  3.09077525],
       [-0.41600969, -1.51316008],
       [ 0.67450115,  0.32841405],
       [ 0.91683689, -0.13197948],
       [-0.7795133 , -0.82256978],
       [ 1.15917263, -0.59237301],
       [ 0.31099753, -0.59237301],
       [-1.02184904, -0.13197948],
       [-0.90068117,  1.01900435],
       [-0.05250608, -0.82256978],
       [-0.41600969,  2.63038172],
       [-0.53717756,  1.93979142],
       [-0.29484182, -0.13197948],
       [-1.87002413, -0.13197948],
       [ 0.18982966, -0.82256978],
       [-0.90068117,  1.70959465],
       [ 0.31099753, -0.36217625],
       [ 0.4321654 ,

In [None]:
xtrn[0:5]

array([[-0.53717756,  0.78880759],
       [-1.02184904,  0.78880759],
       [-1.26418478, -0.13197948],
       [-0.17367395, -0.59237301],
       [-0.53717756, -0.13197948]])

In [None]:
# Sigmoid
def sigmoid(x):
  return 1.0 / (1.0 + np.exp(-x))

In [None]:
xtrn

array([[-0.53717756,  0.78880759],
       [-1.02184904,  0.78880759],
       [-1.26418478, -0.13197948],
       [-0.17367395, -0.59237301],
       [-0.53717756, -0.13197948],
       [-1.14301691,  0.09821729],
       [-0.90068117,  1.01900435],
       [-1.02184904,  0.78880759],
       [-1.26418478,  0.09821729],
       [-0.41600969, -1.74335684],
       [-0.53717756,  1.47939788],
       [-0.17367395,  3.09077525],
       [-0.41600969, -1.51316008],
       [ 0.67450115,  0.32841405],
       [ 0.91683689, -0.13197948],
       [-0.7795133 , -0.82256978],
       [ 1.15917263, -0.59237301],
       [ 0.31099753, -0.59237301],
       [-1.02184904, -0.13197948],
       [-0.90068117,  1.01900435],
       [-0.05250608, -0.82256978],
       [-0.41600969,  2.63038172],
       [-0.53717756,  1.93979142],
       [-0.29484182, -0.13197948],
       [-1.87002413, -0.13197948],
       [ 0.18982966, -0.82256978],
       [-0.90068117,  1.70959465],
       [ 0.31099753, -0.36217625],
       [ 0.4321654 ,

In [None]:
xtrn.shape[0]

70

In [None]:
# Forward
def Forward(net, x):
  """Pass the Date Through Network"""

  out = np.zeros(x.shape[0])

  for k in range(x.shape[0]):
    z0 = net["w0"]*x[k,0] + net["w2"]*x[k, 1] + net["b0"]
    a0 = sigmoid(z0)
    z1 = net["w1"]*x[k, 0] + net["w3"] * x[k, 1] + net["b1"]
    a1 = sigmoid(z1)
    out[k] = net["w4"] * a0 + net["w5"] * a1 + net["b2"]


  return out

In [None]:
# Evaluate
def Evaluate(net, x, y):
  """Evaluate the network"""

  out = Forward(net, x)
  tn = fp = fn = tp = 0
  pred = []

  for i in range(len(y)):
    c = 0 if (out[i] < 0.5) else 1
    pred.append(c)
    if (c == 0) and (y[i] == 0):
      tn += 1
    elif (c == 0) and (y[i] == 1):
      fn += 1
    elif (c == 1) and (y[i] == 0):
      fp += 1
    else:
      tp += 1
  return tn, fp, fn, tp, pred

In [None]:
# Gradient Descent
def GradientDescent(net, x, y, epochs, eta):
  """Perform gradient descent"""

  for e in range(epochs):
    # Pass over training set accumlating deltas
    dw0 = dw1 = dw2 = dw3 = dw4 = dw5 = db0 = db1 = db2 = 0.0

    for k in range(len(y)):
      # Forward pass
      z0 = net["w0"] * x[k,0] + net["w2"] * x[k,1] + net["b0"]
      a0 = sigmoid(z0)
      z1 = net["w1"]*x[k, 0] + net["w3"] * x[k, 1] + net["b1"]
      a1 = sigmoid(z1)
      a2 = net["w4"] * a0 + net["w5"] * a1 + net["b2"]

      # Backward Pass
      db2 += a2 - y[k]
      dw4 += (a2 - y[k]) * a0
      dw5 += (a2 - y[k]) * a1
      db1 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1)
      dw1 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) * x[k, 0]
      dw3 += (a2 - y[k]) * net["w5"] * a1 * (1 - a1) * x[k, 1]
      db0 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0)
      dw0 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) * x[k, 0]
      dw2 += (a2 - y[k]) * net["w4"] * a0 * (1 - a0) * x[k, 1]

    # Use average deltas to upgrade the network
    m = len(y)
    net["b2"] = net["b2"] - eta * db2 / m
    net["w4"] = net["w4"] - eta * dw4 / m
    net["w5"] = net["w5"] - eta * dw5 / m
    net["b1"] = net["b1"] - eta * db1 / m
    net["w1"] = net["w1"] - eta * dw1 / m
    net["w3"] = net["w3"] - eta * dw3 / m
    net["b0"] = net["b0"] - eta * db0 / m
    net["w0"] = net["w0"] - eta * dw0 / m
    net["w2"] = net["w2"] - eta * dw2 / m

  # Training done, return the updated network
  return net


In [None]:
# main
def main():
  """Build and train a simple neural network"""

  epochs = 1000 # training epochs
  eta = 0.01 # learning rate

  # Get the train/test data
  xtrn, ytrn, xtst, ytst = BuildDataset()

  # Initialize the network
  net = {}
  net["b2"] = 0.0
  net["b1"] = 0.0
  net["b0"] = 0.0
  net["w5"] = 0.0001 * (np.random.random() - 0.5)
  net["w4"] = 0.0001 * (np.random.random() - 0.5)
  net["w3"] = 0.0001 * (np.random.random() - 0.5)
  net["w2"] = 0.0001 * (np.random.random() - 0.5)
  net["w1"] = 0.0001 * (np.random.random() - 0.5)
  net["w0"] = 0.0001 * (np.random.random() - 0.5)

  # Do a forward pass to get initial performance
  tn0, fp0, fn0, tp0, pred = Evaluate(net, xtst, ytst)

  # Gradient Descent
  net = GradientDescent(net, xtrn, ytrn, epochs, eta)

  # Final model performance
  tn, fp, fn, tp, pred = Evaluate(net, xtst, ytst)

  # Summarize performance
  print()
  print("Train for %d epochs, learning rate %0.5f" % (epochs, eta))
  print()
  print("Before training:")
  print("   TN:%3d  FP:%3d" % (tn0, fp0))
  print("   FN:%3d  TP:%3d" % (fn0, tp0))
  print()
  print("After training:")
  print("   TN:%3d  FP:%3d" % (tn, fp))
  print("   FN:%3d  TP:%3d" % (fn, tp))
  print()


In [None]:
main()


Train for 1000 epochs, learning rate 0.01000

Before training:
   TN: 15  FP:  0
   FN: 15  TP:  0

After training:
   TN: 12  FP:  3
   FN:  0  TP: 15



## Neural Network

In [None]:
import numpy as np

In [None]:
# Activation Function and Derivative
def sigmoid(x):
  return 1.0 / (1.0 + np.exp(-x))

In [None]:
def sigmoid_prime(x):
  return sigmoid(x) * (1.0 - sigmoid(x))

In [None]:
# Loss Function and Derivative
def mse(y_true, y_pred):
  return (0.5 * (y_true - y_pred)**2).mean()

In [None]:
def mse_prime(y_true, y_pred):
  return y_pred - y_true

In [None]:
# Activation Layer

class ActivationLayer:
  def forward(self, input_data):
    self.input = input_data
    return sigmoid(self.input)

  def backward(self, output_error):
    return sigmoid_prime(self.input) * output_error

  def step(self, eta):
    return


In [None]:
# Fully Connected Layer

class FullyConnectedLayer():
  def __init__(self, input_size, output_size):
    # for accumulating error over a minibatch
    self.delta_w = np.zeros((input_size, output_size))
    self.delta_b = np.zeros((1, output_size))
    self.passes = 0

    # initialize the weights and biases w/small random values
    self.weights = np.random.rand(input_size, output_size) - 0.5
    self.bias = np.random.rand(1, output_size) - 0.5

  def forward(self, input_data):
    self.input = input_data
    return np.dot(self.input, self.weights) + self.bias

  def backward(self, output_error):
    input_error = np.dot(output_error, self.weights.T)
    weights_error = np.dot(self.input.T, output_error)

    # accumulate the error over the minibatch
    self.delta_w += np.dot(self.input.T, output_error)
    self.delta_b += output_error
    self.passes += 1
    return input_error

  def step(self, eta):
    # Update the weights and biases by the mean error
    # over the minibatch
    self.weights -= eta * self.delta_w / self.passes
    self.bias -= eta * self.delta_b / self.passes

    # reset for the next batch
    self.delta_w = np.zeros(self.weights.shape)
    self.delta_b = np.zeros(self.bias.shape)
    self.passes = 0

In [None]:
# Network

class Network:
  def __init__(self, verbose=True):
    self.verbose = verbose
    self.layers = []

  def add(self, layer):
    self.layers.append(layer)

  def predict(self, input_data):
    result = []
    for i in range(input_data.shape[0]):
      output = input_data[i]
      for layer in self.layers:
        output = layer.forward(output)
      result.append(output)
    return result

  def fit(self, x_train, y_train, minibatches, learning_rate, batch_size=64):
    for i in range(minibatches):
      err=0

      # select a random minibatch
      idx = np.argsort(np.random.random(x_train.shape[0]))[:batch_size]
      x_batch = x_train[idx]
      y_batch = y_train[idx]

      for j in range(batch_size):
        # forward propagation
        output = x_batch[j]
        for layer in self.layers:
          output = layer.forward(output)

        # accumulate loss
        err += mse(y_batch[j], output)

        # backward propagation
        error = mse_prime(y_batch[j], output)
        for layer in reversed(self.layers):
          error = layer.backward(error)

      # Update weights and biases
      for layer in self.layers:
        layer.step(learning_rate)

      # report mean loss over minibatch
      if (self.verbose) and ((i%10) == 0):
        err /= batch_size
        print('minibatch %5d/%d  error=%0.9f' % (i, minibatches, err))

## Application on MNIST Data Set

In [None]:
import numpy as  np

In [None]:
# Load, Reshape and Scale the data
x_train = np.load("train_images_small.npy")

In [None]:
x_test = np.load("test_images_small.npy")
y_train = np.load("train_labels_vector.npy")
y_test = np.load("test_labels.npy")

In [None]:
x_train.shape[0]

60000

In [None]:
x_train = x_train.reshape(x_train.shape[0], 1, 14 * 14)


In [None]:
print(x_train.shape)

(60000, 1, 196)


In [None]:
print(x_train[0])

[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   5.   9.  66.  50. 105.  94.   0.   0.
    0.   0.   0.  12. 139. 189. 232. 253. 253. 143. 158.  75.   0.   0.
    0.   0.   0.   5. 177. 217. 241.  98. 171.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   4.  74. 197.   1.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   3. 180. 114.  27.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.  20. 181. 220.  51.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   4. 149. 236.  16.   0.   0.   0.
    0.   0.   0.   0.   0.   0.  47. 165. 236. 223.   1.   0.   0.   0.
    0.   0.   0.   0.  22. 151. 245. 239. 134.  20.   0.   0.   0.   0.
    0.   0.  57. 167. 245. 251. 148.  22.   0.   0.   0.   0.   0.   0.
    0.   0.  97. 127.  87.  37.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   

In [None]:
x_train /= 255

In [None]:
print(x_train[0])

[[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.01960784 0.03529412
  0.25882354 0.19607843 0.4117647  0.36862746 0.         0.
  0.         0.         0.         0.04705882 0.54509807 0.7411765
  0.9098039  0.99215686 0.99215686 0.56078434 0.61960787 0.29411766
  0.         0.         0.         0.         0.         0.01960784
  0.69411767 0.8509804  0.94509804 0.38431373 0.67058825 0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.01568628 0.2901961  0.77254903 0.00392157
  0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.01176471
  0.7058824  0.44705883 0.10588235 0.         0.     

In [None]:
x_test = x_test.reshape(x_test.shape[0], 1, 14 * 14)


In [None]:
x_test /= 255

In [None]:
# Build the Network using sigmoid activation
net = Network()
net.add(FullyConnectedLayer(14 * 14, 100))
net.add(ActivationLayer())
net.add(FullyConnectedLayer(100, 50))
net.add(ActivationLayer())
net.add(FullyConnectedLayer(50, 10))
net.add(ActivationLayer())

In [None]:
# Loss and Train
net.fit(x_train, y_train, minibatches=40000, learning_rate=1.0)

minibatch     0/40000  error=0.127456974
minibatch    10/40000  error=0.044327957
minibatch    20/40000  error=0.044805849
minibatch    30/40000  error=0.043615938
minibatch    40/40000  error=0.043738225
minibatch    50/40000  error=0.043174584
minibatch    60/40000  error=0.042575446
minibatch    70/40000  error=0.040804467
minibatch    80/40000  error=0.041798142
minibatch    90/40000  error=0.039120405
minibatch   100/40000  error=0.039357197
minibatch   110/40000  error=0.040360153
minibatch   120/40000  error=0.036889298
minibatch   130/40000  error=0.035987725
minibatch   140/40000  error=0.035617853
minibatch   150/40000  error=0.036079982
minibatch   160/40000  error=0.031669548
minibatch   170/40000  error=0.035882630
minibatch   180/40000  error=0.032416326
minibatch   190/40000  error=0.032581279
minibatch   200/40000  error=0.031784853
minibatch   210/40000  error=0.030651498
minibatch   220/40000  error=0.028771215
minibatch   230/40000  error=0.028057483
minibatch   240/

In [None]:
# Build the confusion matrix the test predictions
out = net.predict(x_test)
cm = np.zeros((10, 10), dtype="uint32")
for i in range(len(y_test)):
  cm[y_test[i], np.argmax(out[i])] += 1

In [None]:
print()
print(np.array2string(cm))
print()
print("accuracy = %0.7f" % (np.diag(cm).sum() / cm.sum(),))
print()


[[   0    0    0    0    0  980    0    0    0    0]
 [   0    0    0    0    0 1135    0    0    0    0]
 [   0    0    0    0    0 1032    0    0    0    0]
 [   0    0    0    0    0 1010    0    0    0    0]
 [   0    0    0    0    0  982    0    0    0    0]
 [   0    0    0    0    0  892    0    0    0    0]
 [   0    0    0    0    0  958    0    0    0    0]
 [   0    0    0    0    0 1028    0    0    0    0]
 [   0    0    0    0    0  974    0    0    0    0]
 [   0    0    0    0    0 1009    0    0    0    0]]

accuracy = 0.0892000



In [None]:
import numpy as np
tm = np.zeros((10, 10), dtype="uint32")

In [None]:
print(tm)

[[0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]]


In [None]:
tm[0, 0]

0

In [None]:
tm[0, 0] += 1

In [None]:
print(tm[0, 0])

1
