In [None]:
import pandas as pd
import numpy as np

In [None]:
def split(X, Y, val_split = 0.1, test_split = 0.2):
  '''
  splits the input and output in
  train, validation and test set
  '''
  train_size = int(X.shape[1] * (1 - (val_split + test_split)))
  val_size = int(X.shape[1] * val_split)
  test_size = X.shape[1] - train_size - val_size                     

  X_tr = X.iloc[:, :train_size]
  Y_tr = Y.iloc[:, :train_size]

  X_val = X.iloc[:, train_size:train_size + val_size]
  Y_val = Y.iloc[:, train_size:train_size + val_size]
  X_val.columns = list(range(val_size))                       ### to set column name from 0 to val_size
  Y_val.columns = list(range(val_size))

  X_ts = X.iloc[:, train_size + val_size:]
  Y_ts = Y.iloc[:, train_size + val_size:]
  X_ts.columns = list(range(test_size))                       ### to set column name from 0 to test_size
  Y_ts.columns = list(range(test_size))

  return X_tr, Y_tr, X_val, Y_val, X_ts, Y_ts

In [None]:
def initialize_params(dims, seed):
  '''
  dims: number of neurons in each layer
  returns:
  parameters: python dict containing W1, b1, w2, b2...., WL, bL
              W1--- weight matrix of shape(dims[l], dims[l-1])
              b1--- bias matrix of shape(dims[l], 1)

  '''
  np.random.seed(seed)
  parameters = {}  ## empty dict
  L = len(dims)    ## number of layers in NN

  for l in range(1, L):
    parameters["W" + str(l)] = np.random.randn(dims[l], dims[l-1])
    parameters["b" + str(l)] = np.zeros((dims[l], 1))

  return parameters

In [None]:
def linear_forward(A, W, b):
  '''
  A: activations from previous layer
  W: weight matrix of shape(size of surrent layer, size of prev layer)
  b: bias matrix of shape(size_current_layer, 1)

  returns:
  Z: W*A + b --- the input of activation function
  cache: (A, W, b) --- for backpropagation
  '''
  Z = np.dot(W,A) + b
  cache = (A, W, b)

  return Z, cache

In [None]:
def sigmoid(Z):
  '''
  Z: array of any shape
  
  returns:
  A = sigmoid(Z)
  cache = Z ---- helpful in backprop
  '''
  A = 1/(1 + np.exp(-Z))
  cache = Z
  return A, cache

In [None]:
def relu(Z):
  '''
  Z: array of any shape

  returns:
  A = max(0, Z)
  cache = Z --- helpful in backprop
  '''
  A = np.maximum(0, Z)
  cache = Z
  return A, cache

In [None]:
def linear_activation_forward(A_prev, W, b, activation):
  '''
  A_prev: Activations from prev layer
  W: weight matrix
  b: bias matrix
  activation: either relu or sigmoid

  returns:
  A: activation values
  cache: (linear_cache, activation_cache)
  '''
  if activation == "sigmoid":
    Z, linear_cache = linear_forward(A_prev, W, b)     ### Z stores = A_prev * W + b, linear_cache stores = (A_prev, W, b)
    A, activation_cache = sigmoid(Z)                   ### A stores = sigmoid(Z),    activation_cache stores = (Z)

  elif activation == "relu":
    Z, linear_cache = linear_forward(A_prev, W, b)    ### Z stores = A_prev * W + b, linear_cache stores = (A_prev, W, b)
    A, activation_cache = relu(Z)                     ### ### A stores = sigmoid(Z),    activation_cache stores = (Z)

  cache = (linear_cache, activation_cache)            ### cache stores = ((A_prev, W, b), (Z))
  return A, cache                                     ### this function returns A, ((A_prev, W, b), (Z)) ---- helpful in backprop

In [None]:
def l_layer_forward(X, parameters):
  '''
  for l layered NN, this function will calculate relu activation for (l-1) layers and sigmoid activatio for last layer
  X: input
  parameters: initialized parameters

  returns:
  AL: last layer activation value
  caches: list of caches containing:
          linear_relu_cache (there will be l-1 of these caches ---- indexed from 0 to l-2)
          linear_sigmoid_cache (there will 1 cache  ------ indexed l-1)
  '''
  caches = []
  L = len(parameters)//2             ### number of layers in NN
  A = X                              ### Activation of input layer is X

  for l in range(1, L):             ### for l-1 relu layers
    A_prev = A
    A, cache = linear_activation_forward(A_prev, parameters["W"+str(l)], parameters["b"+str(l)], "relu")
    caches.append(cache)       ### caches stores (A_prev, W, b, Z) for every layer

  AL, cache = linear_activation_forward(A, parameters["W"+str(L)], parameters["b"+str(L)], "sigmoid")  ### for sigmoid layer
  caches.append(cache)

  return AL, caches

In [None]:
def compute_cost(AL, Y):
  '''
  AL: activations of last layer of shape (1, number of examples)
  Y: actual output labels of shape (1, number of examples)

  returns:
  cost: cross_entropy loss
  '''
  m = Y.shape[1]
  epsilon = 1e-5
  cost_val = np.multiply(Y, np.log(AL+epsilon)) + np.multiply((1-Y), np.log(1-AL+epsilon))
  cost = -1/m * cost_val

  cost = np.squeeze(cost)         ### convert [[1.23]] in 1.23
  return cost

In [None]:
def linear_backward(dZ, cache):
  '''
  dZ: gradient of cost wrt Z of current layer (l)
  cache: (A_prev, W, b)
  
  returns:
  dW: gradient of cost wrt W (current_layer) ---- same shape as W
  db: gradient of bias wrt b (current layer) ---- same shape as b
  dA_prev: gradient of cost wrt to activation of previous layer ---- same shape as A_prev
  '''
  A_prev, W, b = cache
  m = A_prev.shape[1]

  dW = 1/m * np.dot(dZ, A_prev.T)
  db = 1/m * np.sum(dZ,axis=1,keepdims = True)
  dA_prev = np.dot(W.T, dZ)

  return dA_prev, dW, db

In [None]:
def sigmoid_backward(dA, cache):
  '''
  dA: gradient of activation wrt cost ---- dA = dCost/dA
  cache: 'Z' that we stored earlier

  returns
  dZ: gradient of cost wrt to Z ----- dZ = dCost/dZ = dCost/dA * dA/dZ

  for sigmoid: dA/dZ = A*(1-A)
  '''
  Z = cache
  epsilon = 1e-3
  s = 1/(1+np.exp(-Z)+epsilon)
  dZ = dA * s * (1-s)
  return dZ

In [None]:
def relu_backward(dA, cache):
  '''
  dA: gradient of activation wrt cost ---- dA = dCost/dA
  cache: 'Z' that we stored earlier

  returns
  dZ: gradient of cost wrt to Z ----- dZ = dCost/dZ = dCost/dA * dA/dZ
  '''
  Z = cache
  dZ = np.array(dA, copy=True)
  dZ[Z<=0] = 0
  return dZ

In [None]:
def linear_activation_backward(dA, cache, activation):
  '''
  dA: gradient of activation wrt cost
  cache: (linear_cache, activation_cache)
  activation: either sigmoid or relu

  returns:
  dA_prev: gradient of cost wrt A_prev (layer l-1)
  dW: gradient of cost wrt W --- current layer
  db: gradient of cost wrt b --- current layer
  ''' 
  linear_cache, activation_cache = cache     #### linear_cache stores: (A_prev, W, b) and activation_cache stores: (Z)
  if activation == "sigmoid":
    dZ = sigmoid_backward(dA, activation_cache)

  elif activation == "relu":
    dZ = relu_backward(dA, activation_cache)

  dA_prev, dW, db = linear_backward(dZ, linear_cache)

  return dA_prev, dW, db

In [None]:
def l_layer_backward(AL, Y, caches):
  '''
  to initialize backprop, we need to calculate dAL ---- dAL = dCost/dAL

  AL: activation of last layer
  Y: actual output labels
  caches: list of caches containing:
          every linear_activation_cache with relu function --- indexed from 0 to l-2(because we have l-1 relu layers)
          linear_activation_cache with sigmoid function ---- indexed at L-1

  returns: 
  grads dict containing
  dA, dW, db
  '''
  grads = {}
  L = len(caches)
  m = Y.shape[1]
  Y = Y.reshape(AL.shape)
  epsilon = 1e-3
  dAL = -(np.divide(Y, AL+epsilon) - np.divide((1-Y), (1-AL+epsilon)))
  current_cache = caches[L-1]
  grads["dA"+str(L-1)], grads["dW"+str(L)], grads["db"+str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")

  for l in reversed(range(L-1)):
    current_cache = caches[l]
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+str(l+1)], current_cache, "relu")
    grads["dA"+str(l)] = dA_prev_temp
    grads["dW"+str(l+1)] = dW_temp
    grads["db"+str(l+1)] = db_temp

  return grads

In [None]:
def update_params(parameters, grads, learning_rate):
  '''
  parameters: dict containing parameters
  grads: dict containing gradients
  learning_rate: learning_rate of model

  returns:
  parameters: dict containing updated parameters
  '''
  L = len(parameters)//2
  for l in range(L):
    parameters["W"+str(l+1)] = parameters["W"+str(l+1)] - learning_rate * grads["dW"+str(l+1)]
    parameters["b"+str(l+1)] = parameters["b"+str(l+1)] - learning_rate * grads["db"+str(l+1)]

  return parameters

In [1]:
def predict(X, Y, parameters):
  '''
  X: data set that we want to predict labels of 
  Y: actual labels of data set
  parameters: final parameters obtained after training

  returns:
  predicted labels
  '''
  m = X.shape[1]
  L = len(parameters)//2
  Y_pred = np.zeros((1, m))

  A_L, caches = l_layer_forward(X, parameters)
  #print(A_L)
  for i in range(m):
    if A_L[0,i] >= 0.5:
      Y_pred[0,i] = 1
    else:
      Y_pred[0,i] = 0

  return pd.DataFrame(Y_pred)

In [None]:
def confusion_matrix(Y, Y_pred):
  '''
  Y: actual labels
  Y_pred: predicted labels

  returns
  confusion_matrix
  '''
  df_compare = Y.copy()
  df_compare[1] = Y_pred
  df_compare.columns = ['Actual', 'Predicted']
  conf_mat = pd.crosstab(df_compare['Actual'], df_compare['Predicted'], rownames = ['Actual'], colnames = ['Predicted'])
  return conf_mat

In [None]:
def performance_measure(confusion_matrix):
  '''
  returns accuracy, specificity and sensitivity
  '''
  accuracy = (confusion_matrix[0][0] + confusion_matrix[1][1])/(np.sum(np.sum(confusion_matrix)))
  specificity = (confusion_matrix[0][0])/(np.sum(confusion_matrix[0]))
  sensitivity = (confusion_matrix[1][1])/(np.sum(confusion_matrix[1]))
  return accuracy, specificity, sensitivity

In [None]:
def normalize(X, min_features, max_features):
  n = X.shape[0]
  m = X.shape[1]
  X1 = pd.DataFrame(np.zeros((n,m)))
  for i in range(n):
    X1.iloc[i,:] = (X.iloc[i,:] - min_features[i])/(max_features[i] - min_features[i])
  return X1