# Building a Neural Network from Scratch 

In [180]:
import numpy as np
import pandas as pd

# Input Data

In [181]:
data = pd.read_csv("data.csv")
data_leatures = data[["gre", "gpa", "rank"]]
data_lables = data["admit"]
y = data_lables 
X = data_leatures.as_matrix()
X = X[:1]
y  = y[:1]
X.shape
layer_x_activation = {
    'relu': lambda x: np.maximum(0, x),
    'sigmoid': lambda x: 1/(1+np.exp(-x))
}

# Compute First Hidden Layer Values

## Initialize Weights for the first hidden layer

In [182]:
layer_1_NumberOfHiddenNeurons = 4

In [183]:
NumberOfRowsInW1 = X.shape[1] # number of data points we have per input ([gre, gpa, rank] = 3)
NumberOfColsInW1 = layer_1_NumberOfHiddenNeurons
W1 = np.random.rand(NumberOfRowsInW1, NumberOfColsInW1) * 0.01 # row x col

In [184]:
W1

array([[ 0.00406243,  0.00935036,  0.00542352,  0.00837482],
       [ 0.00788238,  0.00975496,  0.00696914,  0.0015549 ],
       [ 0.00702277,  0.00970429,  0.00760143,  0.00876302]])

## Compute Sums


 First_hidden_layer_sums $=\sum_{i} X_iW_{ij}$


In [185]:
hidden_later_1_sums = np.dot(X, W1)
hidden_later_1_sums

array([[ 1.59324648,  3.61746456,  2.10890068,  3.21433324]])

![title](images/nn_layer1.jpg)

## First Hidden Layer Activation function


Use the ReLu actication function in the first Layer.



In [186]:
hidden_later_1_activation = layer_x_activation['sigmoid'](hidden_later_1_sums)

![title](images/nn_layer2.jpg)

In [187]:
hidden_later_1_activation

array([[ 0.83107237,  0.97385144,  0.89176527,  0.96137011]])

In [188]:
hidden_later_1_output = hidden_later_1_activation
hidden_later_1_output.shape

(1, 4)

# Compute Second Hidden Layer Values

## Initialize Weights for the second hidden layer

In [189]:
layer_2_NumberOfHiddenNeurons = 3

In [190]:
NumberOfRowsInW2 = hidden_later_1_output.shape[1] # Number of rows = number of data points
NumberOfColsInW2 = layer_2_NumberOfHiddenNeurons # number of neurons in hidden layer 2
W2 = np.random.rand(NumberOfRowsInW2, NumberOfColsInW2)

In [191]:
W2

array([[ 0.24929869,  0.76472739,  0.64165642],
       [ 0.67370924,  0.30807162,  0.51744161],
       [ 0.00883316,  0.13400266,  0.31187144],
       [ 0.65990124,  0.18891927,  0.08680288]])

## Compute Sums

In [192]:
hidden_later_2_sums = np.dot(hidden_later_1_output, W2)

In [193]:
hidden_later_2_sums

array([[ 1.50556441,  1.23668006,  1.39873999]])

## Second Hidden Layer Activation

In [194]:
hidden_later_2_activation = layer_x_activation['sigmoid'](hidden_later_2_sums)

In [195]:
hidden_later_2_output = hidden_later_2_activation
hidden_later_2_output

array([[ 0.81840292,  0.7749856 ,  0.80198387]])

![title](images/nn_full_layer2.jpg)

# Compute Third Hidden Layer Values 

In [196]:
layer_3_NumberOfHiddenNeurons = 1

In [197]:
NumberOfRowsInW3 = hidden_later_2_output.shape[1] # Number of rows = number of data points
NumberOfColsInW3 = layer_3_NumberOfHiddenNeurons # number of neurons in hidden layer 2
W3 = np.random.rand(NumberOfRowsInW3, NumberOfColsInW3)
hidden_later_3_sums = layer_x_activation['sigmoid'](np.dot(hidden_later_2_output, W3))

In [198]:
y_hat = hidden_later_3_sums[:, 0]

![title](images/nn_full_layer3.jpg)

# Predictions ($\hat y$)

$\hat y=\sigma(Wx+b)$

In [199]:
y_hat

array([ 0.74361435])

# Backpropagation

backpropagation will consist of:

- Doing a feedforward operation.
- Comparing the output of the model with the desired output.
- Calculating the error.
- Running the feedforward operation backwards (backpropagation) to spread the error to each of the weights.
- Use this to update the weights, and get a better model.
- Continue this until we have a model that is good.


## Error Function 

$E(W)=-\frac{1}{m}\sum_{i=1}^{m}y_iln(\hat y_i) + (1-y_i)ln(1-\hat y_i)$

## Gradient of the error function

$\nabla E = (\frac{\partial{E}}{\partial{w_1}}, \frac{\partial{E}}{\partial{w_2}}, \frac{\partial{E}}{\partial{w_3}}+ ... + \frac{\partial{E}}{\partial{b}})$



## Error in the output layer 

$E_{output} = (y_k - \hat y_k)\sigma^{\prime}(a_k)$


## Error in the hidden layers 


$E_{j} = \sum{[w_{jk}E_{k}]\sigma^{\prime}(h_j)}$


![title](images/hidden_error.jpg)


# Calculate the error 

In [200]:
E = (y - y_hat)

## Compute error gradient in output unit


$E^{(3)} = (y-y_{hat})\sigma^{\prime}(\sum_{i}(h_{2i}w^{(3)}_{1i}))$

$E^{(3)} = (y-y_{hat})\sigma(\sum_{i}(h_{2i}w^{(3)}_{1i}))(1-\sigma(\sum_{i}(h_{2i}w^{(3)}_{1i})))$

$E^{(3)} = (y-y_{hat})\sigma(h)(1-\sigma(h))$

![title](images/error_output.jpg)


In [248]:
E_3 = ((y - y_hat)*y_hat*(1-y_hat))[0]

In [249]:
E_3

-0.14177159990633648

### Compute $E^{2}$


$E^{2} = \sum[w^{3}E^{3}]\sigma^{\prime (2)}(...)$


$
w^{3}
=
\begin{bmatrix}
    w^{3}_{11} \\
    w^{3}_{12} \\
    w^{3}_{13}
\end{bmatrix}
$


$
E^{3}
=
\begin{bmatrix}
    E^3
\end{bmatrix}
$

$
\sigma^{\prime (2)}(...) = \sigma^{(2)}(...)(1-\sigma^{(2)}(...))
$

$\sigma^{(2)}(...)=X^{2}(1-X^{2})$

$
\begin{bmatrix}
    w^{3}_{11} \\
    w^{3}_{12} \\
    w^{3}_{13}
\end{bmatrix}
\begin{bmatrix}
    E^3
\end{bmatrix}
X^{2}(1-X^{2})
$


In [271]:
sigma_2_prime = hidden_later_2_output * (1-hidden_later_2_output)
sigma_2_prime

array([[ 0.14861958,  0.17438292,  0.15880574]])

In [284]:
E_2  = (np.dot(E_3, W3).T*sigma_2_prime).T

In [285]:
E_2 

array([[-0.00721922],
       [-0.01375461],
       [-0.0099171 ]])

## Compute error gradient in hidden layer 2

In [144]:
E_layer2 = np.dot(E_output[:, None], W3.T)*hidden_later_2_output*(1-hidden_later_2_output)

In [145]:
E_layer2

array([[-0.00359111, -0.01917735, -0.00640235]])

![title](images/error_layer_2.jpg)

In [233]:
E_layer2

array([[-0.00359111, -0.01917735, -0.00640235]])

## Compute error gradient in hidden layer 1

In [146]:
E_layer1 = np.dot(E_layer2, W2.T)*hidden_later_1_output*(1-hidden_later_1_output)

In [147]:
E_layer1

array([[-0.00012288, -0.00077438, -0.00361205, -0.00040351]])

![title](images/error_layer_3.jpg)

# Compute Errors 

## Compute errors at the layer 3 (output layer) 



In [247]:
E_3

-0.14177159990633648