# Neural Network with Single Hidden Layer

In [35]:
import numpy as np
import matplotlib.pyplot as plt

In [36]:
# Sigmoid function

def sigmoid(z):

  val = 1/(1+np.exp(-z))

  return val

In [37]:
# Layer size

def layer_size(X,Y):

  n_x = X.shape[0]
  n_y = Y.shape[0]
  n_h = 4

  return (n_x, n_h, n_y)

In [38]:
# Initialize parameters

def init_params(n_x, n_h, n_y):

  W1 = np.random.randn(n_h, n_x) * 0.01
  b1 = np.zeros((n_h, 1))
  W2 = np.random.randn(n_y, n_h) * 0.01
  b2 = np.zeros((n_y,1))

  params = {"W1": W1,
            "b1": b1,
            "W2": W2,
            "b2": b2}

  return params

In [39]:
# Forward Propagation

def forward_propagation(X, params):

  W1 = params["W1"]
  b1 = params["b1"]
  W2 = params["W2"]
  b2 = params["b2"]

  Z1 = np.dot(W1,X) + b1
  A1 = np.tanh(Z1)
  Z2 = np.dot(W2,A1) + b2
  A2 = np.tanh(Z2)

  cache = {"Z1": Z1,
           "A1": b1,
           "Z2": Z2,
           "A2": A2
           }

  return A2, cache

In [40]:
# Cost function
def cost_computation(A2, Y):

  m = Y.shape[1]

  cost = (-1/m) * np.sum( np.multiply(np.log(A2),Y) + np.multiply(np.log(1-A2),1-Y) )

  cost = float(np.squeeze(cost))

  return cost

In [41]:
# Backward propagation

def backward_propagation(params, cache, X, Y):

  m = X.shape[1]

  W1 = params["W1"]
  W2 = params["W2"]

  A1 = cache["A1"]
  A2 = cache["A2"]

  dZ2 = A2 - Y
  dW2 = (1/m) * np.dot(dZ2, A1.T)
  db2 = (1/m) * np.sum(dZ2, axis = 1, keepdims = True)
  dZ1 = np.dot(W2.T, dZ2) * (1-np.power(A1,2))
  dW1 = (1/m) * np.dot(dZ1, X.T)
  db1 = (1/m) * np.sum(dZ1, axis = 1, keepdims= True)

  grads = {"dW1": dW1,
           "db1": db1,
           "dW2": dW2,
           "db2": db2}
  
  return grads


In [76]:
# update parameters

def update_parameters(params, grads, alpha=1.2):

  W1 = params["W1"]
  b1 = params["b1"]
  W2 = params["W2"]
  b2 = params["b2"]

  dW1 = grads["dW1"]
  db1 = grads["db1"]
  dW2 = grads["dW2"]
  db2 = grads["db2"]

  W1 = W1 - alpha*dW1
  b1 = W1 - alpha*db1
  W2 = W2 - alpha*dW2
  b2 = b2 - alpha*db2

  params = {"W1": W1,
            "b1": b1,
            "W2": W2,
            "b2": b2}

  return params

In [43]:
# Predict
def predict(params, X):

  A2, cache = forward_propagation(X, params)
  prediction = (A2 > 0.5)

  return prediction

In [77]:
# final model
def nn_model(X, Y, n_h, num_iterations):

  n_x = layer_size(X, Y)[0]
  n_y = layer_size(X, Y)[2]

  # initialize parameters
  params = init_params(n_x, n_h, n_y)

  for i in range(0, num_iterations):

    # forward propagation
    A2, cache = forward_propagation(X, params)

    # cost function
    cost = cost_computation(A2, Y)

    # backward propagation
    grads = backward_propagation(params, cache, X, Y)

    # update parameters
    params = update_parameters(params, grads)

    # predict
    Y_prediction = predict(params, X)
    print ('Accuracy: %d' % float((np.dot(Y,predictions.T) + np.dot(1-Y,1-predictions.T))/float(Y.size)*100) + '%')

    if i%1000 == 0:
      print("cost after iteration %i: %f" %(i,cost))

  return params

**Training digits dataset**

In [78]:
from sklearn.datasets import load_digits
digits = load_digits()

In [79]:
print("Input shape: " + str(digits.data.shape))
print("Target data shape: " + str(digits.target.shape))

Input shape: (1797, 64)
Target data shape: (1797,)


In [80]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size = 0.25, random_state=0)

In [81]:
x_train = x_train.reshape(x_train.shape[0], -1).T
x_train = x_train/255
y_train = y_train.reshape(1, y_train.shape[0])

print("x_train shape: " + str(x_train.shape))
print("y_train shape: " + str(y_train.shape))

x_train shape: (64, 1347)
y_train shape: (1, 1347)
