<a href="https://colab.research.google.com/github/supulkalhara/Data_Science/blob/main/SEM5_Deep_Learning_Ass_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [437]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [438]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import e
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import accuracy_score

## Methods

In [439]:
# methods for W and b initialization
def default__init__():
  W1 = np.random.randn(14, 100) * np.sqrt(2/14)
  b1 = np.zeros((100, 1))
  W2 = np.random.randn(100, 40) * np.sqrt(2/100)
  b2 = np.zeros((40, 1))
  W3 = np.random.randn(40, 4) * np.sqrt(2/40)
  b3 = np.zeros((4, 1))
  
  return W1, b1, W2, b2, W3, b3

def file__init__(W, b):
  W1 = W.iloc[:14].dropna(axis=1).to_numpy().T
  b1 = b.iloc[0].dropna().to_numpy().reshape(100, 1)
  W2 = W.iloc[14:114].dropna(axis=1).to_numpy().T
  b2 = b.iloc[1].dropna().to_numpy().reshape(40, 1)
  W3 = W.iloc[114:154].dropna(axis=1).to_numpy().T
  b3 = b.iloc[2].dropna().to_numpy().reshape(4, 1)
  
  return W1, b1, W2, b2, W3, b3

In [440]:
# methods for ReLU
def ReLU(Z):
  return np.maximum(0, Z)

def backward_ReLU(dA, Z):
  dZ = np.array(dA, copy=True)
  dZ[Z <= 0] = 0
  return dZ

In [441]:
def softmax(z):
  return np.exp(z) / np.sum(np.exp(z))

In [442]:
# methods for forward and backwards propagations
def forward_propagation(X, params):
  W1, b1, W2, b2, W3, b3 = params

  # layer 1 -> layer 2
  Z1 = (W1 @ X) + b1 
  A1 = ReLU(Z1)

  # layer 2 -> layer 3
  Z2 = (W2 @ A1) + b2 
  A2 = ReLU(Z2)

  # layer 3 -> layer 4
  Z3 = (W3 @ A2) + b3
  A3 = softmax(Z3)

  return X, Z1, A1, Z2, A2, Z3, A3

def backward_propagation(params, values, Y):
  W1, b1, W2, b2, W3, b3 = params
  A0, Z1, A1, Z2, A2, Z3, A3 = values
  m = A3.shape[1]

  dZ3 = A3 - Y
  dW3 = 1. / m * np.dot(dZ3, A2.T)
  db3 = (1/m) * np.sum(dZ3, axis=1, keepdims=True)

  dA2 = W3.T @ dZ3
  dZ2 = backward_ReLU(dA2, Z2)
  dW2 = 1. / m * np.dot(dZ2, A1.T)
  db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)

  dA1 = (W2.T @ dZ2)
  dZ1 = backward_ReLU(dA1, Z1)
  dW1 = 1. / m * np.dot(dZ1, A0.T)
  db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

  return dW1, db1, dW2, db2, dW3, db3

In [443]:
# method for calculating cost
def find_cost(A, Y):
  return np.sum(-np.sum(Y * np.log(A+1e-20), axis=0)) / Y.shape[1]

In [444]:
# method for updating parameters
def update_params(params, learning_rate, grads):
  W1, b1, W2, b2, W3, b3 = params
  dW1, db1, dW2, db2, dW3, db3 = grads

  W1 = W1 - learning_rate*dW1
  b1 = b1 - learning_rate*db1
  W2 = W2 - learning_rate*dW2
  b2 = b2 - learning_rate*db2
  W3 = W3 - learning_rate*dW3
  b3 = b3 - learning_rate*db3

  return W1, b1, W2, b2, W3, b3

In [445]:
class Model:

  # model and parameters initialization
  def __init__(self, weights="", biases="", random=True):
    if random :
      self.params = default__init__()
    else:
      self.params = file__init__(weights,biases)

  #gradient descent
  def grad_dec(self, X, Y, n_iterations=100, learning_rate=0.001):
    for i in range(n_iterations):
      values = forward_propagation(X, self.params)
      cost = find_cost(values[-1], Y)
      grads = backward_propagation(self.params, values, Y)
      params = update_params(self.params, learning_rate, grads)

    out = params, grads, cost
    return out

  def predict(self, X, params):
    values = forward_propagation(X, params)
    A3 = values[-1]
    return A3

---

## __ __init__ __

In [446]:
X = np.array([-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1]).reshape(14,1)
Y = np.array([0, 0, 0, 1]).reshape(4,1)

In [447]:
X_1 = np.array([[-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1], [-1, -1, 1, -1, -1, -1, 1, -1, 1, 1, -1, -1, 1, -1]]).reshape(14,2)
Y_1 = np.array([[0, 0, 0, 1], [1, 0, 0, 0]]).reshape(4,2)

## Task_1/a

In [448]:
w = pd.read_csv('/content/drive/MyDrive/Datasets/SEM5_Deep_Learning_Ass_1/Task_1/a/w.csv', header=None)
w.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,weights btw layer0 to layer1,0.471435,-1.190976,1.432707,-0.312652,-0.720589,0.887163,0.859588,-0.636523,0.015696,...,0.079842,-0.399965,-1.027851,-0.584718,0.816594,-0.081947,-0.344766,0.528288,-1.068989,-0.511881
1,weights btw layer0 to layer1,0.291205,0.566534,0.503592,0.285296,0.484288,1.363482,-0.781105,-0.468018,1.224574,...,0.209395,-0.592886,-1.473116,-0.896581,1.104352,-0.43155,-0.161137,0.889157,0.288377,-1.051539
2,weights btw layer0 to layer1,-0.319561,-0.619993,0.156998,-0.571455,1.057633,-0.791489,-0.524627,0.071878,1.910759,...,0.386254,0.822775,-0.68379,1.057203,0.03188,1.343182,-0.05054,-0.36401,-1.553342,-0.319298
3,weights btw layer0 to layer1,0.527046,0.711112,-0.217545,2.637791,-1.742138,-0.094435,1.431184,0.592758,0.170297,...,0.393892,-0.950026,0.332507,0.528944,-1.120521,0.048264,0.061988,-1.027516,-0.238335,1.932178
4,weights btw layer0 to layer1,-0.226632,-0.923831,0.355839,-1.270064,-0.195472,-0.463419,0.989415,1.388647,1.087714,...,0.725714,0.916976,-0.56389,-1.52218,-0.014279,-0.246721,-0.165329,0.119114,-2.07498,-1.002755


In [449]:
b = pd.read_csv('/content/drive/MyDrive/Datasets/SEM5_Deep_Learning_Ass_1/Task_1/a/b.csv', header=None)
b.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,100
0,bias for layer1,-0.635155,-0.68102,-1.501586,-0.368925,-0.352999,0.127384,0.517531,0.747837,-0.809378,...,0.156078,0.413614,1.211116,-0.176121,0.942438,0.633436,-0.11388,0.457997,-0.615561,0.385286
1,bias for layer2,0.665451,1.426556,1.158077,-0.496191,-0.01035,0.617206,0.336297,-0.358238,-0.995016,...,,,,,,,,,,
2,bias for layer3,1.508889,2.038297,0.945609,1.193867,,,,,,...,,,,,,,,,,


In [450]:
w.drop(0, axis=1, inplace=True)
b.drop(0, axis=1, inplace=True)

In [451]:
model = Model(weights=w, biases=b, random=False)

In [452]:
output = model.grad_dec(X, Y, n_iterations=1)
params, grads, cost = output[0], output[1], output[2]
print(cost)

46.051701859880914


In [453]:
model.predict(X, params)

array([[8.12732385e-139],
       [6.25933025e-104],
       [1.00000000e+000],
       [4.40270759e-042]])

In [454]:
true_dw = pd.read_csv('/content/drive/MyDrive/Datasets/SEM5_Deep_Learning_Ass_1/Task_1/a/true-dw.csv', header=None)
true_db = pd.read_csv('/content/drive/MyDrive/Datasets/SEM5_Deep_Learning_Ass_1/Task_1/a/true-db.csv', header=None)

In [455]:
true_dw.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,-0.0,-5.302999,-0.0,-1.029493,-0.0,-0.0,8.007869,0.62155,8.262644,-0.0,...,4.95448,8.113829,-5.106945,-10.410107,-0.0,-1.82363,-4.100732,-4.915877,-2.378255,8.752061
1,0.0,5.302999,0.0,1.029493,0.0,0.0,-8.007869,-0.62155,-8.262644,0.0,...,-4.95448,-8.113829,5.106945,10.410107,0.0,1.82363,4.100732,4.915877,2.378255,-8.752061
2,0.0,5.302999,0.0,1.029493,0.0,0.0,-8.007869,-0.62155,-8.262644,0.0,...,-4.95448,-8.113829,5.106945,10.410107,0.0,1.82363,4.100732,4.915877,2.378255,-8.752061
3,0.0,5.302999,0.0,1.029493,0.0,0.0,-8.007869,-0.62155,-8.262644,0.0,...,-4.95448,-8.113829,5.106945,10.410107,0.0,1.82363,4.100732,4.915877,2.378255,-8.752061
4,-0.0,-5.302999,-0.0,-1.029493,-0.0,-0.0,8.007869,0.62155,8.262644,-0.0,...,4.95448,8.113829,-5.106945,-10.410107,-0.0,-1.82363,-4.100732,-4.915877,-2.378255,8.752061


In [456]:
dW1 =  true_dw.iloc[:14].dropna(axis=1).to_numpy()
db1 = true_db.iloc[0].dropna().to_numpy().reshape(100, 1)
dW2 =  true_dw.iloc[14:114].dropna(axis=1).to_numpy()
db2 = true_db.iloc[1].dropna().to_numpy().reshape(40, 1)
dW3 =  true_dw.iloc[114:154].dropna(axis=1).to_numpy()
db3 = true_db.iloc[2].dropna().to_numpy().reshape(4, 1)

In [460]:
grads[2].T

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-2.30953918,  0.        ,  2.72487712, ..., -0.72915438,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-1.6515336 ,  0.        ,  1.94853855, ..., -0.52141266,
         0.        ,  0.        ],
       [-0.31289287,  0.        ,  0.36916223, ..., -0.09878473,
         0.        ,  0.        ],
       [-0.12763654,  0.        ,  0.15059016, ..., -0.04029667,
         0.        ,  0.        ]])

In [461]:
dW2

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-2.30953918,  0.        ,  2.72487712, ..., -0.72915438,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-1.6515336 ,  0.        ,  1.94853855, ..., -0.52141266,
         0.        ,  0.        ],
       [-0.31289287,  0.        ,  0.36916223, ..., -0.09878473,
         0.        ,  0.        ],
       [-0.12763654,  0.        ,  0.15059016, ..., -0.04029667,
         0.        ,  0.        ]])

---

## Task_1/b

---

## Task_2

---