Assignment 3: Neural Network By Hand!
In this assignment, you will code a little neural network from scratch! Don’t worry, it will be extremely guided. The two most important points being that you learn and you have fun.

For programmers already fluent in Python

If you have had some programming experience already, please stick to the code template below for grading purposes. Then, feel free to append to your notebook a more elaborate code. And check the bonuses at the end of this assignment!

We want our neural network to solve the XOR problem. This is how the data look like:



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pwd

'/content'

In [3]:
! ls -la

total 20
drwxr-xr-x 1 root root 4096 Jul 12 12:26 .
drwxr-xr-x 1 root root 4096 Jul 12 12:23 ..
drwxr-xr-x 4 root root 4096 Jul 10 13:34 .config
drwx------ 6 root root 4096 Jul 12 12:26 drive
drwxr-xr-x 1 root root 4096 Jul 10 13:35 sample_data


In [None]:
import numpy as np
import pandas as pd
import random
import math
np.random.seed(42)

import matplotlib.pyplot as plt
FONTSIZE = 16
params = {
         'figure.figsize' : (6,6),
         'axes.labelsize' : FONTSIZE,
         'axes.titlesize' : FONTSIZE+2,
         'legend.fontsize': FONTSIZE,
         'xtick.labelsize': FONTSIZE,
         'ytick.labelsize': FONTSIZE,
         'xtick.color'    : 'black',
         'ytick.color'    : 'black',
         'axes.facecolor' : 'white',
         'axes.edgecolor' : 'black',
         'axes.titlepad'  :  20,
         'axes.labelpad'  :  10}
plt.rcParams.update(params)

XNAME = 'x1'; XLABEL = r'$x_1$'
YNAME = 'x2'; YLABEL = r'$x_2$'
RANGE = (-6, 6); STEP = 0.1

def predict(output_node, boundary_value):
  output_node.reshape(-1, 1, 1) # a list (m, 1, 1)
  predictions = np.array(output_node > boundary_value, dtype=int)
  return predictions

def plot_cost_vs_iter(train_costs, test_costs, title="Cost evolution"):

  fig, ax = plt.subplots(figsize=(8, 6))
  iters = np.arange(1,len(train_costs)+1)
  ax.plot(iters, train_costs, color='red', lw=1, label='Training set')
  ax.plot(iters, test_costs, color='blue', lw=1, label='Testing set')
  ax.set_xlabel("Number of iterations"); ax.set_xlim(1, iters[-1])
  ax.set_ylabel("Cost")
  ax.legend(loc="upper right", frameon=False)
  ax.set_title(title)
  plt.show()


def get_decision_surface(weights, biases, boundary=0.5, range=RANGE, step=STEP):

  # Create a grid of points spanning the parameter space:
  x1v, x2v = np.meshgrid(np.arange(range[0], range[1]+step, step),
                         np.arange(range[0], range[1]+step, step))

  # Stack it so that it is shaped like X_train: (m,2)
  X_grid = np.c_[x1v.ravel(), x2v.ravel()].reshape(-1,2)

  # Feedforward on all grid points and get binary predictions:
  output = feedforward(X_grid, weights, biases)[-1] # getting only output node
  Ypred_grid = predict(output, boundary)

  return (x1v, x2v, Ypred_grid.reshape(x1v.shape))


def plot_scatter(sig, bkg, ds=None, xname=XNAME, xlabel=XLABEL, yname=YNAME, ylabel=YLABEL, range=RANGE, step=STEP, title="Scatter plot"):

  fig, ax = plt.subplots()

  # Decision surface
  if ds:
    (xx, yy, Z) = ds # unpack contour data
    cs = plt.contourf(xx, yy, Z, levels=[0,0.5,1], colors=['orange','dodgerblue'], alpha=0.3)

  # Scatter signal and background:
  ax.scatter(sig[xname], sig[yname], marker='o', s=10, c='dodgerblue', alpha=1, label='Positive class')
  ax.scatter(bkg[xname], bkg[yname], marker='o', s=10, c='orange',     alpha=1, label='Negative class')

  # Axes, legend and plot:
  ax.set_xlim(range); ax.set_xlabel(xlabel)
  ax.set_ylim(range); ax.set_ylabel(ylabel)

  ax.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", frameon=False)
  ax.set_title(title)
  plt.show()

In [5]:
import pandas as pd

In [6]:
# train and test
train = pd.read_csv('/content/drive/MyDrive/Easy_ML/ml_a3_data_train.csv')
test = pd.read_csv('/content/drive/MyDrive/Easy_ML/ml_a3_data_train.csv')

In [7]:
test.head(5)

Unnamed: 0,x1,x2,class
0,-0.136282,3.244889,0
1,-5.305888,5.629232,0
2,-4.147647,-0.226319,1
3,-2.170794,3.946986,0
4,-5.555908,1.155239,0


In [8]:
train.head(5)

Unnamed: 0,x1,x2,class
0,-0.136282,3.244889,0
1,-5.305888,5.629232,0
2,-4.147647,-0.226319,1
3,-2.170794,3.946986,0
4,-5.555908,1.155239,0


In [14]:
# create a sig and bkg dataframes that collect the real signal and value samples
sig = train[train['class'] > 0.5]
bkg = train[train['class'] < 0.5]

In [15]:
sig.head()

Unnamed: 0,x1,x2,class
2,-4.147647,-0.226319,1
6,0.332413,5.915578,1
9,4.031761,2.35169,1
10,-4.655533,-1.225733,1
11,-0.341086,-1.057909,1


In [16]:
bkg.head()

Unnamed: 0,x1,x2,class
0,-0.136282,3.244889,0
1,-5.305888,5.629232,0
3,-2.170794,3.946986,0
4,-5.555908,1.155239,0
5,-2.775872,0.499611,0


In [18]:
inputs = ['x1', 'x2']
X_train =  train[inputs].to_numpy()
y_train = train['class'].to_numpy()

X_test = test[inputs].to_numpy()
y_test = test['class'].to_numpy()

In [21]:
# a_Lminus1 and b are row vectors, W is a matrix
# @ is a function of matrix manipulation
def z(a_Lminus1, w, b):
  return a_Lminus1 @ W + b

In [23]:
# activation functions and derivatives
def tanh(z):
  return np.tanh(z)

def sigmoid(z):
  return 1/(1 +np.exp(-z))

def sigmoid_prime(z):
  return sigmoid(z)*(1-sigmoid(z))

def tanh_prime(z):
  return 1 - tanh(z)**2

In [33]:
# Cross-entropy cost function
def cross_entropy_cost(y_preds, y_vals):
    m = len(y_vals)
    epsilon = 1e-9
    summand = y_vals * np.log(y_preds + epsilon) + (1 - y_vals) * np.log(1 - y_preds + epsilon)
    return - (1/m) * summand.sum()

In [34]:
def L_prime(y_preds, y_obs):
  epsilon = 1e-9
  return - (y_obs / (y_preds + epsilon) - (1 - y_obs) / (1 - y_preds + epsilon))

In [32]:
# pseudocede
# def feedforward(all my weights matrices, all my biases, )


def feedforward(input_X, weights, biases):

  W1, W2, W3 = weights ; b1, b2, b3 = biases

  m  = len(input_X)
  a0 = input_X.reshape((m, -1, 1))

  # First layer
  #...
  #...
  z1 = z(a0, W1, b1)
  a1 = tanh(z1)


  # Second layer
  #...
  #...
  z2 = z(a1, W2, b2)
  a2 = tanh(z2)

  # Third layer
  #...
  #...
  z3 = z(a2, W3, b3)
  a3 = tanh(z3)




  nodes = [a0, z1, a1, z2, a2, z3, a3]

  return nodes