<a href="https://colab.research.google.com/github/thapaliya123/cat_dog_predictions/blob/master/reg_utils.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#import necessary libraries
import os
import numpy as np
from matplotlib import  pyplot as plt

import pickle

#loading_feature_and_labels_from_created_pickle_file
def load_datasets(feature_path, labels_path):
  
  pickle_in = open("{}".format(feature_path), "rb")
  X_train = pickle.load(pickle_in)
  X_train = X_train/255.0

  pickle_in = open("{}".format(labels_path),"rb")
  Y_train = pickle.load(pickle_in)

  return X_train, Y_train

In [0]:
#initializing the parameters
def initialize_paramters_deep(layers_dims):
  '''
  Arguments:
  layer_dims -- python array(list) containing the dimension of each layer in our network

  Returns:
  parameters -- python dictionary containing your parameters "W1", "b1",.."WL", "bL"
  WL -- Weight matrix of shape(layer_dims[L], layer_dims[L-1])
  bL -- bias vector of shape(layer_dims[L], 1)
  '''
  np.random.seed(3)
  parameters = {}
  L = len(layers_dims)

  for l in range(1, L):
    parameters["W"+str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) * np.sqrt(2/layers_dims[l-1])#He_initialization_pf_parameters
    parameters["b"+str(l)] = np.zeros((layers_dims[l], 1))
  
  return parameters

In [0]:
#forward_propagation_module

#implementation_of_linear_activation
def linear_forward(A, W, b):
  '''
  Implement the linear part of the layers forward propagation

  Arguments:
  A -- activation from the previous layers(or input):(size_of_prev_layer, no_of_examples)
  W -- Weight matrix:numpy array of shape(size_of_current_layer, size_of_prev_layer)
  b -- bias vector:numpy array of shape(size_of_current_layer, 1)
  
  Returns:/
  Z -- the input of the activation functions, also called pre-activation parameters
  cache -- a python dictionary containing "A", "W", "b" stored for computing the backward pass efficiently
  '''
  Z = np.dot(W, A)+b

  assert(Z.shape == (W.shape[0], A.shape[1]))

  cache = A, W, b
  
  
  return  Z, cache

In [0]:
#implementation_of_sigmoid_activation_function
def  sigmoid(Z):
  '''
  Implements the sigmoid activation in numpy

  Arguments:
  Z -- numpy array of any shape

  Returns:
  A -- output of the sigmoid(Z) same shape as that of Z
  cache -- returns Z as well usefull during the backpropagation
  '''
  A = 1/(1+np.exp(-Z))
  cache = Z

  return A, cache


In [0]:
#implementation_relu_activation_function

def relu(Z):
  '''
  Implements the relu activation function

  Arguments:
  Z -- Output of the linear layer of any shape

  Returns:
  A -- output of relu(Z) same shape as that of Z
  cache -- returns Z as activation_cache as well usefull during the backpropagation
  '''
  A = np.maximum(0,Z)
  cache = Z

  return A,cache

In [0]:
#implement_linear_activation_forward

def linear_activation_forward(A_prev, W, b, activation, keep_prob):
  '''
  Implements forward propagation for linear->activation layer

  Arguments:
  A_prev -- Activation from the previous layer
  W -- Weight matrix of current layer of shape(size_of_current_layer, size_of_previous_layer)
  b -- bias vector:numpy array of shape(size_of_current_layer, 1)
  activation -- activation function to be used either sigmoid or relu 
  '''
  global cache
  global linear_cache
  global activation_cache
  np.random.seed(1)
  if activation == 'sigmoid':
    Z, linear_cache = linear_forward(A_prev, W, b)
    A, activation_cache = sigmoid(Z)
    cache = (linear_cache, activation_cache)

  elif activation == 'relu':
    if(keep_prob<1):
      Z, linear_cache = linear_forward(A_prev, W, b)
      A, activation_cache = relu(Z)
      D = np.random.rand(A.shape[0], A.shape[1]) #making_same_shape_as_that_of_A
      D = (D<keep_prob).astype(int) #shut_neuron_greater_than_keep_prob
      A = A*D #shut_neuron
      A = A/keep_prob
      cache = (linear_cache, D, activation_cache)


    elif(keep_prob==1):
      Z, linear_cache = linear_forward(A_prev, W, b)
      A, activation_cache = relu(Z)
      cache = (linear_cache, activation_cache)

    
    else:
      pass
    
  else:
    pass
    
  # assert(A.shape == (W.shape[0], A.shape[1]))
  return A, cache
  

In [0]:
def L_model_forward(X, parameters, keep_prob=1):
  '''
  Implement forward propagation for the [Linear->Relu]*(L-1) -> Linear->Sigmoid computation

  Arguments:
  X -- data, numpy array of shape(input_size, number_of_examples)
  parameters -- output of initialize_parameters_deep()

  Returns:
  AL -- last post_activation_value
  caches -- list of cache containing every cache containing:every cache of linear_relu_forward()
   -(there are L-1 of them, indexed from 0 to L-2)
   -the cache of linear_sigmoid_forward()(there is one, indexed L-1)
  '''
  caches = []
  A = X
  L = len(parameters)//2 #numbers of layers in the network
  
  for l in range(1, L):
    A_prev = A
    A, cache = linear_activation_forward(A_prev, parameters["W"+str(l)], parameters["b"+str(l)], 'relu', keep_prob)
    caches.append(cache)
  
  AL, cache = linear_activation_forward(A, parameters["W"+str(L)], parameters["b"+str(L)], 'sigmoid', keep_prob)
  caches.append(cache)

  assert(AL.shape == (1,X.shape[1]))

  return AL, caches

In [0]:
#compute_cost

def compute_cost(AL, Y):
  '''
  Implements the cost function defined by crossentropy 

  Arguments:
  AL -- probability vector corresponding to your label predictions, shape(1, number_of_examples)
  Y -- True labels vector for examples containing 0 if cat and 1 if dog, shape(1, number_of_examples)

  Returns:
  cost -- crossentropy cost
  '''

  m = Y.shape[1]

  #compute cost from AL and Y
  logprobs = np.multiply(np.log(AL), Y) + np.multiply(np.log(1-AL), 1-Y)
  cost = - (1/m)*np.sum(logprobs)

  assert(cost.shape == ())

  return cost

In [0]:
#Regularizations
def compute_cost_with_regularization(AL, Y, parameters, lambd):
  '''
  Implement_the_cost_with_L2_regularization

  Arguments:
  AL -- post activation, output of forward propagation of shape(output_size, number_of_example)
  Y -- "true" labels vector of shape(output_size, number_of_examples)
  parameters -- python Dictionary containing your parameters
  lambd -- L2 Regularization

  Returns:
  cost -- value of the regularized cost functions
  '''

  m = Y.shape[1]
  L = len(parameters)//2 #number_of_layers_in_the_network
  L2_regularization_cost = 0
  cross_entropy_cost = compute_cost(AL, Y)
  for l in range(1, L+1):
    L2_regularization_cost = L2_regularization_cost + ((lambd/(2*m))*(np.sum(np.square(parameters["W"+str(l)]))))
  
  cost = cross_entropy_cost+L2_regularization_cost

  return cost



In [0]:
#implements_relu_backward_function

def relu_backward(dA, cache):
    """
    Implement the backward propagation for a single RELU unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z<=0] = 0

    assert (dZ.shape == Z.shape)
    
    return dZ

In [0]:
def sigmoid_backward(dA, cache):
  '''
  Implements the backward propagation for the single sigmoid unit

  Arguments:
  dA -- post activation gradients,of any shape
  cache -- "Z" where we store for computing backward propagation more efficiently

  Returns:
  dZ -- Gradient of the cost with respect to Z
  '''
  Z = cache
  s = 1/(1+np.exp(-Z))

  dZ = dA*s*(1-s)

  assert(dZ.shape == Z.shape)
  return dZ

In [0]:
#backward_propagation

#linear_backward

def linear_backward(dZ, cache, lambd):
    """
    Implement the linear portion of backward propagation for a single layer (layer l)

    Arguments:
    dZ -- Gradient of the cost with respect to the linear output (of current layer l)
    cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer

    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    A_prev, W, b = cache
    m = A_prev.shape[1]
    if(lambd == 0):
      dW = (1./m)*(np.dot(dZ, A_prev.T))
    
    else:
      dW = ((1./m)*(np.dot(dZ, A_prev.T)))+((lambd*W)/m)
    
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

In [0]:
#linear_activation_backward

def linear_activation_backward(dA, cache, activation, lambd, keep_prob):
    """
    Implement the backward propagation for the LINEAR->ACTIVATION layer.
    
    Arguments:
    dA -- post-activation gradient for current layer l 
    cache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
    activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
    
    Returns:
    dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
    dW -- Gradient of the cost with respect to W (current layer l), same shape as W
    db -- Gradient of the cost with respect to b (current layer l), same shape as b
    """
    global linear_cache
    global activation_cache
    global D
    if activation == "relu":
      if(keep_prob==1):
        linear_cache, activation_cache = cache
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd)
      
      elif(keep_prob<1):
        linear_cache, D, activation_cache = cache
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd)
        # dA_prev = dA_prev*D #dropping_out_same_neuron_that_was_dropped_in_forward_propagation
        # dA_prev = dA_prev/keep_prob #scaling_the_remaining_neuron_by_keep_prob
      
      else:
        pass
        
    elif activation == "sigmoid":
        linear_cache, activation_cache = cache
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache, lambd)
    
    return dA_prev, dW, db

In [0]:
#backpropagate_through_network
def L_model_backward(AL, Y, caches, lambd=0, keep_prob=1):
    """
    Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group, if value of lambd is passed the it will apply L2 regularization
    
    Arguments:
    AL -- probability vector, output of the forward propagation (L_model_forward())
    Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
    caches -- list of caches containing:
                every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
                the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
    
    lambd -- L2 regularizing parameter if value is passed then it will apply L2 regularization
    Returns:
    grads -- A dictionary with the gradients
             grads["dA" + str(l)] = ... 
             grads["dW" + str(l)] = ...
             grads["db" + str(l)] = ... 
    """
    grads = {}
    L = len(caches) # the number of layers
    m = AL.shape[1]
    Y = np.array(Y)
    Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
    
    # Initializing the backpropagation
    ### START CODE HERE ### (1 line of code)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    ### END CODE HERE ###
    
    # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "dAL, current_cache". Outputs: "grads["dAL-1"], grads["dWL"], grads["dbL"]

    current_cache = caches[L-1]
    grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, 'sigmoid', lambd, keep_prob)
    # if(keep_prob<1):
    #   l_cache, D, a_cache = caches[L-2]
    #   grads["dA" + str(L-1)] = grads["dA" + str(L-1)]*D #dropping_out_the_neuron
    #   grads["dA" + str(L-1)] = grads["dA" + str(L-1)]/keep_prob #scaling_neuron_with_the_keep_prob_value

    # Loop from l=L-2 to l=0
    for l in reversed(range(L-1)):
        # lth layer: (RELU -> LINEAR) gradients.
        # Inputs: "grads["dA" + str(l + 1)], current_cache". Outputs: "grads["dA" + str(l)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] 
        current_cache = caches[l]
        if(keep_prob<1):
          l_cache, D, a_cache = current_cache
          grads["dA"+str(l+1)] = grads["dA"+str(l+1)]*D
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, 'relu', lambd, keep_prob)
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads

In [0]:
#predictions for binary classifier

def predict(parameters, X):
  '''
  Using the learned parameters,predicts the class for each example

  Arguments:
  parameters -- python dictionary containing your learned parameters
  X -- input data of size(n_x, m)

  Return:
  predictions -- vector of predictions of our model(cat:0/dog:1)
  '''

  AL, cache = L_model_forward(X, parameters)

  for i in range(AL.shape[1]):
    if(AL[0,i] <= 0.5):
      AL[0, i] = 0
    else:
      AL[0,i] = 1
  
  return AL