<a href="https://colab.research.google.com/github/solmvz/MLActivities/blob/main/MyNeuralNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [41]:
import pandas as pd
import numpy as np
from copy import deepcopy as copy
from sklearn.metrics import accuracy_score
# round_off numpy output to 3 decimal places
np.set_printoptions(precision=3)


In [37]:
class MyNeuralNet(object):
  
  def __init__(self, X, y, layers, m, lr=0.01, factor=0.01):
    """
    Our init values - X (the input), y (target values),
    learning_rate, factor (determines how small the parameters are)
    
    """
    self.X = X
    self.layers = layers
    self.y = y
    self.lr = lr
    self.factor = factor
    self.m = m

  @staticmethod
  def sigmoid(z):
    return 1 / (1 + np.exp(-z))

  def parameters_init(self):
      """
        Returns:
        parameters -- python dictionary containing initial parameter values:
        W1 - weight matrix of shape (n1, n0),
        b1 - bias vector of shape (n1, 1),
        W2 - weight matrix of shape (n2, n1),
        b2 - bias vector of shape (n2, 1), where,
        n0 - number of the neurons at the input,
        n1 - number of neurons at the hidden layer, and
        n2 - number of units at the last/output layer.

      """
      # num of neurons in each layer
      n0 , n1, n2 = self.layers

      np.random.seed(3)

      # LAYER 1
      w1 = np.random.randn(n1, n0) * self.factor
      b1 = np.zeros((n1, 1))

      # LAYER 2
      w2 = np.random.randn(n2, n1) * self.factor
      b2 = np.zeros((n2, 1))

      parameters = {
          "w1": w1,
          "b1": b1,
          "w2": w2,
          "b2": b2
      }
      
      return parameters

  def forward_propagation(self, parameters):

      """
        Returns:
        yhat - model output on one forward pass for all the training examples,
        layer_ouputs - a dictionary containing model outputs at each layer.

      """

      w1 = parameters["w1"]
     # print("w1 shape: ", w1.shape)

      b1 = parameters["b1"]
     # print("b1 shape: ", b1.shape)

      w2 = parameters["w2"]
     # print("w2 shape: ", w2.shape)

      b2 = parameters["b2"]
     # print("b2 shape: ", b2.shape)


      z1 = np.dot(w1, self.X) + b1
     # print("z1 shape: ", z1.shape)
      f1 = self.sigmoid(z1)
     # print("f1 shape: ", f1.shape)
      
      z2 = np.dot(w2, f1) + b2
     # print("z2 shape: ", z2.shape)

      yhat = self.sigmoid(z2)
     # print("yhat shape:", yhat.shape)

      assert(yhat.shape == (1, self.X.shape[1]))

      layer_outputs = {
          "z1": z1,
          "f1": f1,
          "z2": z2,
          "yhat": yhat
      }

      return yhat, layer_outputs

  def compute_cost(self, yhat, y):
      """
        Computes the cross-entropy cost of approximation
        Arguments:
        y_hat - Output of forward propagation. Its order is (1, # trainingexamples)
        y - vector of true values. Dimension is (1, # training examples)
        Returns:
        the cost

      """

      cost = -np.sum(np.multiply(y, np.log(yhat)) + np.multiply(1-y, np.log(1-yhat)))/self.m

      #squeeze and make sure cost is the expected dimension
      cost = float(np.squeeze(cost))

      return cost
  
  def backward_propagation(self, parameters, layers_output):

      """
        Arguments:
        parameters -- python dictionary containing our parameters 
        layers_output -- a dictionary containing "Z1", "f1", "Z2" and "yhat".
    
        Returns:
        gradients -- python dictionary containing our gradients with respect to different parameters

      """
      X = self.X

      w1 = parameters["w1"]
      w2 = parameters["w2"]

      f1 = layers_output["f1"]
      yhat = layers_output["yhat"]

      dz2 = yhat - self.y
      dw2 = np.dot(dz2, f1.T)/self.m
      # sum along columns (axis=1)
      # keepdims= Truethe axes which are reduced are left in 
      # the result as dimensions with size one. This allows for correct
      # array broadcasting
      db2 = 1/self.m * np.sum(dz2, axis=1, keepdims=True)

      dz1 = np.dot(w2.T ,dz2) * (1-f1)*f1
      dw1 = np.dot(dz1, X.T)/self.m
      db1 = 1/self.m *np.sum(dz1, axis=1, keepdims=True)

      gradients = {
          "dw1": dw1,
          "db1": db1,
          "dw2": dw2,
          "db2": db2
      }

      return gradients

  def update_parameters(self, parameters, gradients):

    """
        Updates parameters using the gradient descent update rule discussed above
        Arguments:
        parameters -- python dictionary containing your parameters 
        gradients -- python dictionary containing your gradients 
        Returns:
        parameters -- python dictionary containing your updated parameters 
      
    """

    # Retrieve a copy of each parameter from the dictionary "parameters".
    w1 = copy(parameters["w1"])
    b1 = copy(parameters["b1"])
    w2 = copy(parameters["w2"])
    b2 = copy(parameters["b2"])
        
    # Retrieve each gradient from the dictionary "grads"
    dw1 = copy(gradients["dw1"])
    db1 = copy(gradients["db1"])
    dw2 = copy(gradients["dw2"])
    db2 = copy(gradients["db2"])

    # update parameters 
    # hidden-input layer
    w1 = w1 - self.lr*dw1
    b1 = b1 - self.lr*db1
    # output-hidden layer
    w2 = w2 - self.lr*dw2
    b2 = b2 - self.lr*db2

    parameters = {
        "w1": w1,
        "b1": b1,
        "w2": w2,
        "b2": b2
    }

    return parameters


# Training the model

In [39]:
def train_model(n_iter, X, y, layers, n_examples, lr, factor):
    """
    Arguments:
        n_iterations - number of training iterations,
        X - the input data (feature data),
        y - target/true values,
        layers - a tuple with the # neurons for each layer
        lr - the learning rate,
        factor -  user-defined value that determines how big initialized parameters are
    
    """
    s = MyNeuralNet(X, y, layers, n_examples, lr, factor)

    parameters = s.parameters_init()

    for iter in range(n_iter+1):
      # perform forward propagation
      y_hat, layers_output = s.forward_propagation(parameters)
      # compute cost
      cost = round(s.compute_cost(y_hat, y), 3)
      # compute gradients and update parameters
      gradients = s.backward_propagation(parameters, layers_output)
      parameters = s.update_parameters(parameters, gradients)
      # make predictions and compute accuracy
      predictions = np.select(
          [y_hat<0.5, y_hat>=0.5],
          [0, 1])
      accuracy = round(accuracy_score(np.squeeze(y), np.squeeze(predictions)), 3)
      if iter%1000==0 or iter==n_iter:
            print("iteration",iter,"cost:-->", cost, "accuracy-->", accuracy)

    return parameters


In [45]:
df = pd.read_csv("https://kipronokoech.github.io/assets/datasets/marks.csv")

df_train = df[:300]

X = np.array(df_train.drop(["y"], axis=1)).T
y = np.array(df_train["y"]).reshape(1, -1)

# Defining our NN
n0 = X.shape[0] #num of features
n1 = 4 #num of neurons in hidden layer
n2 = y.shape[0]
layers = (n0, n1, n2)

#print(n0, n1, n2)

n_examples = X.shape[1]

parameters = train_model(4500, X, y, layers, n_examples, lr=0.2, factor=0.01)

print(parameters)

iteration 0 cost:--> 0.691 accuracy--> 0.67
iteration 1000 cost:--> 0.278 accuracy--> 0.88
iteration 2000 cost:--> 0.215 accuracy--> 0.947
iteration 3000 cost:--> 0.452 accuracy--> 0.823
iteration 4000 cost:--> 0.206 accuracy--> 0.897
iteration 4500 cost:--> 0.129 accuracy--> 0.933
{'w1': array([[-1.195e-03, -3.654e-01,  1.147e+00],
       [-1.305e-01, -1.780e-01,  1.073e+00],
       [-3.036e-01,  3.260e-04,  1.036e+00],
       [-2.828e-01, -2.660e-02,  1.040e+00]]), 'b1': array([[-7.243],
       [-7.145],
       [-6.857],
       [-6.82 ]]), 'w2': array([[2.845, 2.782, 2.574, 2.565]]), 'b2': array([[-7.372]])}


# Testing the model

In [46]:
df_test = df[300:]
print(f"Testing on {len(df_test)} data points")

X_test = np.array(df_test.drop(["y"], axis=1)).T # feature matrix
y_test = np.array(df_test["y"]).reshape(1, -1)

#Defining size of our layers
n0 = X_test.shape[0] #input size = number of features
n1 = 4 # 4 neurons on the hidden layer
n2 = y_test.shape[0] # one neuron for output layers

n_examples = X_test.shape[1]

#Tuple of our layers.
layers = (n0, n1, n2)

#Initialize the class for OurNeuralNetwork
s = MyNeuralNet(X=X_test, y=y_test, layers=layers, m=n_examples, lr=0.2, factor=0.01)

# Note the parameters in this case are parameters stored on the last iteration
# of model training
y_hat, layers_output = s.forward_propagation(parameters)

# we replace with values greater or equal to 0.5 with 1 and 0 otherwise
predictions = np.select(
    [y_hat<0.5, y_hat>=0.5],      # list of conditions
    [0, 1])# list of corresponding values or computations)
# print the model output and the cost at each iteration.
accuracy_score(y_true=np.squeeze(y_test), y_pred=np.squeeze(predictions), normalize=True)


Testing on 95 data points


0.8421052631578947