In [2]:
import numpy as np

# 1. Initialize parameters

In [3]:
def initialize_parameters(number_of_layers, layers_dims):
    """
    Generate dictionary of parameters (weights and biases) for each layer of NN.
        - Weight matrices will contain small random number
        - Bias vectors will contain zeros.
    
    Argumetns:
    number_of_layers -- integer specifying number of layers in NN; L
    layers_dims -- 1D numpy array, containing number of units (integer) in each layer of NN, including input layer; from layer 0 to L
    
    Returns:
    params -- dictionary with parameter matrices 'W' and vectors 'b' for each layer of NN, from 1 to L:
                Wl - weight matrix of shape (layer_dims[l], layer_dims[l-1])
                bl - bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(1)
    params = {}
    
    for l in range(1, number_of_layers+1):
        params['W'+str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) *0.01
        params['b'+str(l)] = np.zeros((layers_dims[l], 1))
    
    return params

# 2. Activation functions

In [4]:
def sigmoid(Z):
    """
    Compute Sigmoid function for matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    A -- 2D numpy array; Sigmoid function of Z:
        A = 1 / (1 + e^(-Z))
        shape is the same as Z: (layer_dims[l], m)
    """
    
    A = 1 / (1 + np.exp(-Z))
    
    return A

In [5]:
def tanh(Z):
    """
    Compute Tanh function for matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    A -- 2D numpy array; Tanh function of Z:
        A = (e^Z - e^(-Z)) / (e^Z + e^(-Z)) 
        shape is the same as Z: (layer_dims[l], m)
    """
    
    A = (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))
    
    return A

In [6]:
def relu(Z):
    """
    Compute ReLU function for matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    A -- 2D numpy array; ReLU function of Z:
        A = max(0, Z)
        shape is the same as Z: (layer_dims[l], m)
    """
    
    A = np.maximum(0, Z)
    
    return A

In [7]:
def leaky_relu(Z):
    """
    Compute Leaky ReLU function for matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    A -- 2D numpy array; Leaky ReLU function of Z
        shape is the same as Z: (layer_dims[l], m)
    """
    
    A = np.maximum(0.01*Z, Z)
    
    return A

# 3. Derivatives

In [8]:
def d_sigmoid(Z):
    """
    Compute derivative of sigmoid function wrt matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    g_prim -- 2D numpy array; derivative of sigmoid function of Z:
        g_prim = g(Z) * (1-g(Z))
        shape is the same as Z: (layer_dims[l], m)
    """
    
    g_prim = sigmoid(Z) * (1 - sigmoid(Z))
    
    return g_prim

In [9]:
def d_tanh(Z):
    """
    Compute derivative of tanh function wrt matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    g_prim -- 2D numpy array; derivative of tanh function of Z:
        g_prim = 1 - tanh(Z)^2 
        shape is the same as Z: (layer_dims[l], m)
    """
    
    g_prim = 1 - np.power(tanh(Z), 2)
    
    return g_prim

In [10]:
def d_relu(Z):
    """
    Compute derivative of ReLU function wrt matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    g_prim -- 2D numpy array; derivative of ReLU function of Z:
        g_prim = {
            0 if Z < 0
            1 if Z >= 0
        }
        shape is the same as Z: (layer_dims[l], m)
    """
    
    g_prim = (Z >= 0) * 1  # Boolean matrix multiplied by 1
    
    return g_prim

In [11]:
def d_leaky_relu(Z):
    """
    Compute derivative of Leaky ReLU function wrt matrix Z.
    
    Arguments:
    Z -- 2D numpy array, containing linear transformation of previous layer post-activation matrix.
        Z = WA_prev + b
        shape: (size of current layer, number of examples)  --> (layer_dims[l], m)
    
    Returns:
    g_prim -- 2D numpy array; derivative of Leaky ReLU function of Z:
        g_prim = {
            0.01   if Z < 0
            1      if Z >= 0
        }
        shape is the same as Z: (layer_dims[l], m)
    """
    
    g_prim = (Z >= 0) * 0.99 + 0.01  # It will give 0.01 to all and add 0.99 for positive Z
    
    return g_prim

# 4. Forward

In [12]:
def forward_propagation(X, number_of_layers, layers_dims, params):
    """
    Perform a full forward propagation process.
    
    Arguments:
    X -- 2D numpy array;
    architecture -- tuple; 
    
    Returns:
    """
    
#     # Initialize parameters
#     params = initialize_parameters(number_of_layers, layers_dims)
    
    # Create dictionaries to store values of Z and A for each layer
    Z_dict = {}
    A_dict = {"A0": X}
    
    # Perform forward steps for layers 1, ..., L-1
    for l in range(1, number_of_layers):
        Z_dict["Z"+str(l)] = np.dot(params["W"+str(l)], A_dict["A"+str(l-1)]) + params["b"+str(l)]
        A_dict["A"+str(l)] = relu(Z_dict["Z"+str(l)])   # ReLU activation
        
    L = number_of_layers
    Z_dict["Z"+str(L)] = np.dot(params["W"+str(L)], A_dict["A"+str(L-1)]) + params["b"+str(L)]
    A_dict["A"+str(L)] = sigmoid(Z_dict["Z"+str(L)])   # sigmoid activation
    
    return Z_dict, A_dict

# 5. Cost function

In [13]:
def compute_cost(Y, AL):
    """
    Computes cost function - function to measure how good is the fit.
    
    Arguments:
    Y -- vector of true labels, shape (1, number of examples)
    AL - probability vector corresponding to predicted labels, shape (1, number of examples)
    
    Returns:
    cost -- float number; cross-entropy cost"""
    
    m = Y.shape[1]
    
    cost = -1/m * (np.dot(Y, np.log(AL).T) + np.dot(1-Y, np.log(1-AL).T))
    
    cost = np.squeeze(cost)    # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    
    return cost               

# 6. Backward

In [14]:
def backward_propagation(Y, Z_dict, A_dict, number_of_layers, params):
    """
    Arguments:
    Y, Z_dict, A_dict, number_of_layers, params
    
    Returns:
    grads - dictionary of gradients
    """
    L = number_of_layers
    grads = {}
    m = Y.shape[1]
    
    ###
    AL = A_prev = A_dict["A"+str(L)]
    ZL = Z_dict["Z"+str(L)]
    A_prev = A_dict["A"+str(L-1)]
    WL = params["W"+str(L)]
    
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    dZL = np.multiply(dAL, d_sigmoid(ZL))       # sigmoid activation
    dWL = 1/m * np.dot(dZL, A_prev.T)
    dbL = 1/m * np.sum(dZL, axis=1, keepdims=True)
    dA_prev = np.dot(WL.T, dZL)
    
    grads["dW" + str(L)] = dWL
    grads["db" + str(L)] = dbL
    
    for l in reversed(range(1, L)):
        Zl = Z_dict["Z"+str(l)]
        A_prev = A_dict["A"+str(l-1)]
        Wl = params["W"+str(l)]

        dZl = np.multiply(dA_prev, d_relu(Zl))    # ReLU activation
        dWl = 1/m * np.dot(dZl, A_prev.T)
        dbl = 1/m * np.sum(dZl, axis=1, keepdims=True)
        dA_prev = np.dot(Wl.T, dZl)

        grads["dW" + str(l)] = dWl
        grads["db" + str(l)] = dbl
    
    
    return grads

# 7. Update parameters

In [15]:
def update_parameters(params, grads, learning_rate):
    """
    Arguments:
    
    Returns:
    """
    
    L = len(params) // 2
    
    for l in range(1, L+1):
        params["W"+str(l)] = params["W"+str(l)] - learning_rate * grads["dW"+str(l)]
        params["b"+str(l)] = params["b"+str(l)] - learning_rate * grads["db"+str(l)]
        
    return params

# 8. Complete model

In [16]:
def neural_network(X_train, Y_train, learning_rate, n_epochs):
    
    # Get dimensions of input data
    n_x = X_train.shape[0]
    n_y = Y_train.shape[0]
    m = X_train.shape[1]
    
    # Get the architecture from the user
    print("Determine the architecture of your neural network.")
    print("--------------------------------------------------")
    
    number_of_layers = int(
        input("Specify number of layers in your neural network (number of hidden layers plus output layer): "))
    
    layers_dims = []
    layers_dims.append(n_x)
    
    for i in range(1, number_of_layers):
        number_of_units = int(input(f"Specify number of units in a layer {i}: "))
        layers_dims.append(number_of_units)
    
    layers_dims.append(n_y)
    architecture = (number_of_layers, layers_dims)
    
    print("--------------------------------------------------")
    print(f"Your Neural Network has {number_of_layers} layers (hidden and output).")
    print(f"Input layer has {layers_dims[0]} units")
    print(f"Hidden layers have {layers_dims[1:-1]} units respectively.")
    print(f"Output layer has {layers_dims[-1]} units.")
    # --------------------
    # Train neural network
    # --------------------
    
    # Initialize parameters
    params = initialize_parameters(*architecture)
    
    for i in range(n_epochs):
        # Forward propagation
        Z_dict, A_dict = forward_propagation(X_train, number_of_layers, layers_dims, params)
        # Compute cost
        cost = compute_cost(Y_train, A_dict["A"+str(number_of_layers)])
        # Backpropagation - Compute gradients
        grads = backward_propagation(Y_train, Z_dict, A_dict, number_of_layers, params)
        # Update parameters
        params = update_parameters(params, grads, learning_rate)
        
        if i%100 == 0:
            print("Cost after iteration "+str(i+1)+": ", cost)
    
    print("Cost after iteration "+str(n_epochs)+": ", cost)
    
    return params, architecture

---

# Run Model

In [17]:
X_train = np.genfromtxt('x_train.csv', delimiter=',')
X_test = np.genfromtxt('x_test.csv', delimiter=',')
Y_train = np.genfromtxt('y_train.csv', delimiter=',')
Y_test = np.genfromtxt('y_test.csv', delimiter=',')

In [18]:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape )

(1000, 2) (1000, 2) (1000,) (1000,)


In [19]:
# Reshape 
X_train = X_train.reshape(X_train.shape[0], -1).T
X_test = X_test.reshape(X_test.shape[0], -1).T
Y_train = Y_train.reshape(Y_train.shape[0], -1).T
Y_test = Y_test.reshape(Y_test.shape[0], -1).T

In [20]:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

(2, 1000) (2, 1000) (1, 1000) (1, 1000)


In [21]:
n_x = X_train.shape[0]
m = X_train.shape[1]

In [22]:
new_params, architecture = neural_network(X_train, Y_train, learning_rate=0.01, n_epochs=601)

Determine the architecture of your neural network.
--------------------------------------------------


Specify number of layers in your neural network (number of hidden layers plus output layer):  3
Specify number of units in a layer 1:  8
Specify number of units in a layer 2:  4


--------------------------------------------------
Your Neural Network has 3 layers (hidden and output).
Input layer has 2 units
Hidden layers have [8, 4] units respectively.
Output layer has 1 units.
Cost after iteration 1:  0.6931595464996017
Cost after iteration 101:  0.36102888051418824
Cost after iteration 201:  0.2651528561784696
Cost after iteration 301:  0.02376653365386409
Cost after iteration 401:  0.015670695007657522
Cost after iteration 501:  0.012638561017945964
Cost after iteration 601:  0.01101069175768086
Cost after iteration 601:  0.01101069175768086


In [23]:
# Predict
Z_predicted, A_predicted = forward_propagation(X_test, *architecture, new_params)
number_of_layers = architecture[0]
Y_hat = A_predicted["A"+str(number_of_layers)]

# The average difference between true label and the probabilty of label 1:
np.sum(Y_test - Y_hat) / Y_test.shape[1]

0.0020524524596953974

---