In [1]:
import numpy as np
import matplotlib.pyplot as plt


Matplotlib is building the font cache; this may take a moment.


In [87]:
def init_params(layer_dims):
    np.random.seed(3)
    params = {'W': {}, 'b': {}}

    for l in range(len(layer_dims)):
        params['W'][l] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        params['b'][l] = np.zeros((layer_dims[l], 1))

    return params

In [32]:
# Z (linear hypothesis) - Z = W*X + b , 
# W - weight matrix, b- bias vector, X- Input 

def sigmoid(Z):
    A = 1/(1+np.exp(np.dot(-1, Z)))
    # cache = (Z)

    return A

In [48]:
def forward_prop(X, params):

    A = X # input to first layer i.e. training data
    caches = []
    L = len(params['W'])
    for l in range(1, L+1):
        A_prev = A

        # Linear Hypothesis
        weight = params['W'+str(l)]
        bias = params['b'+str(l)]
        Z = np.dot(weight, A_prev) + bias

        # Storing the linear cache
        linear_cache = (A_prev, weight, bias)

        # Applying sigmoid on linear hypothesis
        A = sigmoid(Z)
        activation_cache = (Z)

         # storing the both linear and activation cache
        cache = (linear_cache, activation_cache)
        caches.append(cache)

    return A, caches

In [50]:
def cost_function(A, Y):
    m = Y.shape[1]

    cost = (-1/m)*(np.dot(np.log(A), Y.T) + np.dot(np.log(1-A), 1-Y.T))

    return cost

In [5]:
def one_layer_backward(dA, cache):
    linear_cache, activation_cache = cache

    Z = activation_cache
    dZ = dA*sigmoid(Z)*(1-sigmoid(Z)) # The derivative of the sigmoid function

    A_prev, W, b = linear_cache
    m = A_prev.shape[1]

    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [80]:
def backprop(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))

    current_cache = caches[L-1]
    grads['dA'+str(L-1)], grads['dW'+str(L-1)], grads['db'+str(L-1)] = one_layer_backward(dAL, current_cache)

    for l in reversed(range(1, L-1)):

        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = one_layer_backward(grads["dA" + str(l+1)], current_cache)
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads


In [81]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(1, L):
        parameters['W'+str(l+1)] = parameters['W'+str(l+1)] - learning_rate*grads['W'+str(l+1)]
        parameters['b'+str(l+1)] = parameters['b'+str(l+1)] -  learning_rate*grads['b'+str(l+1)]

    return parameters

In [92]:
def train(X, Y, layer_dims, epochs, lr):
    params = init_params(layer_dims)
    print("params init-ed")
    print(params['W'])
    cost_history = []

    # for i in range(epochs):
    #     print(f'epoch: {i}')
    #     Y_hat, caches = forward_prop(X, params)
    #     print(f'epoch {i} forward')
    #     cost = cost_function(Y_hat, Y)
    #     cost_history.append(cost)
    #     print(f'epoch {i} cost')
    #     grads = backprop(Y_hat, Y, caches)
    #     print(f'epoch {i} back')

    #     params = update_parameters(params, grads, lr)
    #     print(f'epoch {i} udpate')


    return params, cost_history


In [83]:
import pandas as pd
chicago_taxi_dataset = pd.read_csv("https://download.mlcc.google.com/mledu-datasets/chicago_taxi_train.csv")

In [93]:
sample_set = chicago_taxi_dataset.sample(frac=0.1)

input = sample_set.iloc[:, 5:6].values
Y = sample_set.iloc[:, 11:12].values
layer_dims = [len(input),len(input),len(input)]

output = train(input, Y, layer_dims, 5, 0.01)


params init-ed
{0: array([[ 0.01788628,  0.0043651 ,  0.00096497, ..., -0.01105802,
        -0.00973133, -0.00869474],
       [-0.01077872, -0.00025493,  0.00303501, ...,  0.00025722,
         0.00808777,  0.00197194],
       [-0.00724444,  0.00356433,  0.00557131, ...,  0.0157029 ,
         0.00427096, -0.00701972],
       ...,
       [-0.02031105,  0.00427488,  0.01216303, ...,  0.00013409,
         0.00550045,  0.01804406],
       [ 0.00234855,  0.00456082,  0.01224881, ..., -0.00775281,
        -0.00497009,  0.01289139],
       [ 0.00923398, -0.00279362, -0.00048861, ..., -0.00705207,
        -0.00500258, -0.00712836]]), 1: array([[ 0.00332398,  0.00336965,  0.00224342, ...,  0.01431808,
         0.00562085,  0.00519799],
       [-0.01808816,  0.01445452, -0.00982747, ...,  0.00024788,
         0.00048003,  0.00292163],
       [-0.00145554,  0.01003835, -0.00649487, ...,  0.01021231,
         0.01129222,  0.01105009],
       ...,
       [-0.0110136 , -0.00491309,  0.01109839, ..., 