<a href="https://colab.research.google.com/github/samyarsworld/COSMOS-Ecommerce/blob/master/GradDescent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

In [2]:
import requests
from pathlib import Path

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helper_functions.py").is_file():
  print("helper_functions.py already exists, skipping download")
else:
  # Note: you need the "raw" GitHub URL for this to work
  request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/helper_functions.py")
  with open("helper_functions.py", "wb") as f:
    f.write(request.content)

Downloading helper_functions.py


In [3]:
def sigmoid(Z):
  return 1 / (1 + np.exp(-Z))

def relu(Z):
  return np.maximum(0, Z)

In [4]:
def initilize_parameters(layer_dims):
  state_dict = {}
  for l in range(1, len(layer_dims)):
    state_dict["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
    state_dict["b" + str(l)] = np.zeros((layer_dims[l], 1))
  return state_dict

In [5]:
def forward(A_prev, W, b, activation):
  Z = np.dot(W, A_prev) + b
  cache = (A_prev, W, b, Z)
  A = sigmoid(Z) if activation == "sigmoid" else relu(Z)
  return A, cache

In [6]:
def model(X, state_dict):
  L = len(state_dict) // 2
  A = X
  caches = []
  for l in range(1, L):
    A, cache = forward(A, state_dict["W" + str(l)], state_dict["b" + str(l)], "relu")
    caches.append(cache)
  A, cache = forward(A, state_dict["W" + str(L)], state_dict["b" + str(L)], "sigmoid")
  caches.append(cache)

  return A, caches

In [7]:
def cost(y, yhat):
  m = len(y)
  return - (1 / m) * np.sum(np.multiply(y, np.log(yhat)) + np.multiply((1 - y), np.log(1 - yhat)))

In [8]:
def backward(dA, cache, activation):
  A_prev, W, b, Z = cache

  if activation == "sigmoid":
    dZ = np.dot(dA, (A_prev * (1 - A_prev)))
  else:
    dZ = np.multiply(dA, (Z > 0))

  m = A_prev.shape[1]
  dW = 1 / m * np.dot(dZ, A_prev.T)
  db = 1 / m * np.sum(dZ, axis=1, keepdims = True)
  print(W, dZ)
  dA_prev = np.dot(W.T, dZ)
  return dA_prev, dW, db

In [9]:
def backprop(AL, Y, caches):
  grads = {}
  L = len(caches)
  dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dA_prev, dW, db = backward(dAL, caches[L - 1], "sigmoid")
  grads["dA" + str(L - 1)] = dA_prev
  grads["dW" + str(L)] = dW
  grads["db" + str(L)] = db

  for l in range(L - 2, -1, -1):
    dA_prev, dW, db = backward(grads["dA" + str(l + 1)], caches[l], "relu")
    grads["dA" + str(l)] = dA_prev
    grads["dW" + str(l + 1)] = dW
    grads["db" + str(l + 1)] = db

  return grads

In [10]:
def update(state_dict, grads, lr=0.01):
  L = len(layer_dims) - 1
  for l in range(1, L + 1):
    state_dict["W" + str(l)] = state_dict["W" + str(l)] - lr * grads["dW" + str(l)]
    state_dict["b" + str(l)] = state_dict["b" + str(l)] - lr * grads["db" + str(l)]
  return state_dict

In [11]:
X = np.array([[0.1, 0.2, -0.1], [0.16, 0.6, -0.3]])
Y = np.array([1, 0, 1])
layer_dims = [len(X),4,3,1]
state_dict = initilize_parameters(layer_dims)

# Start
A, caches = model(X, state_dict)
loss = cost(Y, A)
loss
grads = backprop(A, Y, caches)
state_dict = update(state_dict, grads, 0.01)

[[ 0.00644114 -0.00265501  0.00372686]] [[-5.86452165e-05 -2.31316239e-04 -2.56039388e-05]]
[[-0.00327448  0.00590931  0.00347938  0.00037465]
 [-0.00388711 -0.01888726 -0.01387702 -0.00088784]
 [ 0.00531247 -0.00717529 -0.00323239  0.01669087]] [[-3.77741918e-07 -1.48993976e-06 -0.00000000e+00]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
 [-0.00000000e+00 -0.00000000e+00 -9.54222819e-08]]
[[ 0.00530998 -0.00980276]
 [-0.01058299  0.03440102]
 [ 0.00848747 -0.00039595]
 [ 0.00103299  0.00467294]] [[ 0.00000000e+00  0.00000000e+00 -5.06927621e-10]
 [-2.23219249e-09 -8.80450963e-09  0.00000000e+00]
 [-1.31430735e-09 -5.18406532e-09  0.00000000e+00]
 [-1.41520390e-10 -5.58203488e-10 -0.00000000e+00]]
