# Creating a Shallow Neural Network

[**1. Initialization**](#1.-Initialization)

[**2. Model Building**](#2.-Model-Building)

[**3. Hyperparameter Tunning**](#3.-Hyperparameter-Tunning)

[**4. Predicting and Evaluation**](#4.-Predicting-and-Evaluation)


## 1. Initialization

### 1.1. Package imports

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model

%matplotlib inline

### 1.2. Loading Data

**m**: total number of samples  
**n_x**: number of features  
**X**: input data  
**Y**: labels

In [31]:
data = pd.read_csv("winequality-red.csv")

shape_data = data.shape
m = shape_data[0]
n_x = shape_data[1]

X = np.reshape(np.array(data.iloc[:,:n_x-1]),(m,n_x-1))
X = X.T
Y = np.reshape(np.array(data.iloc[:,n_x-1]),(m,1))
Y = Y.T

shape_X = np.shape(X)
shape_Y = np.shape(Y)


print ('The shape of X is: ' + str(shape_X))
print ('The shape of Y is: ' + str(shape_Y))
print ('Total number of examples: %d' % (m))

The shape of X is: (11, 1599)
The shape of Y is: (1, 1599)
Total number of examples: 1599


### 1.3. Deviding Data into Train, Cross Validation and Test sets

**train**: training set  
**cv**: cross validation set  
**test**: test set  

In [56]:
m_train = int(m*0.6)
m_cv = int(m*0.2)
m_test = m - m_train - m_cv

X_train , X_cv , X_test = np.split(X , [m_train,m_train+m_cv] , axis=1)
Y_train , Y_cv , Y_test = np.split(Y , [m_train,m_train+m_cv] , axis=1)

## 2. Model Building

### 2.1. Defining neural network structure

**n_x**: the size of the input layer (this is number of features)  
**n_h**: the size of the hidden layer (this is initially set to 4, but will be tunned in next section)  
**n_y**: the size of the output layer (based on the data, Y has 6 unique values from 3 to 8 inclusive)

In [58]:
n_x
n_h = 4
n_y = 8

### 2.2. Initializing the model's parameters

**parameters**: python dictionary containing:  
*W1*: weight matrix of shape (n_h, n_x)  
*b1*: bias vector of shape (n_h, 1)  
*W2*: weight matrix of shape (n_y, n_h)  
*b2*: bias vector of shape (n_y, 1)

In [59]:
def initialize_parameters(n_x, n_h, n_y):
    
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.zeros((n_y,1))
    
    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

### 2.3. Defining Sigmoid function

In [None]:
def sigmoid(z):
    
    s = 1/(1+np.exp(-z))
        
    return s

### 2.4. Implementing forward propagation

**Z1**: W1\*X+b1  
**A1**: The tanh output of the first layer activation  
**Z2**: W2\*A1+b2  
**A2**: The sigmoid output of the second layer activation  
**tmp**: a dictionary containing "Z1", "A1", "Z2" and "A2"

In [60]:
def forward_propagation(X, parameters):
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
   
    Z1 = np.dot(W1,X)+b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2,A1)+b2
    A2 = sigmoid(Z2)
        
    assert(A2.shape == (1, X.shape[1]))
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return A2, tmp

### 2.5. Computing Cost

**cost**: cross-entropy cost

In [63]:
def compute_cost(A2, Y):
    
    m = Y.shape[1]

    cost = (-1/m)*np.sum(Y*np.log(A2)+(1-Y)*np.log(1-A2))
    cost = float(np.squeeze(cost))  # making sure cost is the dimension we expect!
                                    
    assert(isinstance(cost, float))
    
    return cost

### 2.6. Implementing backward propagation

**grads**: python dictionary containing gradients with respect to different parameters

In [66]:
def backward_propagation(parameters, tmp, X, Y):

    m = X.shape[1]
    
    W1 = parameters["W1"]
    W2 = parameters["W2"]
   
    A1 = tmp["A1"]
    A2 = tmp["A2"]

    dZ2 = A2-Y
    dW2 = (1/m)*np.dot(dZ2,A1.T)
    db2 = (1/m)*np.sum(dZ2,axis=1,keepdims=True)
    dZ1 = np.dot(W2.T,dZ2)*(1-np.power(A1,2))
    dW1 = (1/m)*np.dot(dZ1,X.T)
    db1 = (1/m)*np.sum(dZ1,axis=1,keepdims=True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

### 2.7. Parameters update

In [67]:
def update_parameters(parameters, grads, learning_rate = 1.2):

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

### 2.8. Integrating the Model

In [68]:
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):

    parameters = initialize_parameters(n_x, n_h, n_y)

    for i in range(0, num_iterations):
        A2, tmp = forward_propagation(X, parameters)
        cost = compute_cost(A2, Y, parameters)
        grads = backward_propagation(parameters, tmp, X, Y)
        parameters = update_parameters(parameters, grads)
        
        if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))

    return parameters

## 3. Hyperparameter Tunning

## 4. Predicting and Evaluation