<a href="https://colab.research.google.com/github/mayuri-lalwani/Deep-Learning/blob/main/Assignment%203/%20Homework_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [46]:
import numpy as np
import matplotlib.pyplot as plt

Prepare Data

In [47]:
# 3 Dimension non-linear input data

n = 200
d = 3
x = np.random.uniform(-1, 1, (n, d))

weights_true = np.array([[2, 1,-1],[3,0,-1],]).T
bias_true = np.array([0.5,0.1])

y_true = (x ** 2) @ weights_true + x @ weights_true + bias_true
print(f'x: {x.shape}, weights: {weights_true.shape}, bias: {bias_true.shape}, y: {y_true.shape}')


x: (200, 3), weights: (3, 2), bias: (2,), y: (200, 2)


In [48]:
# Let's use gradient descent to learn the weights and bias that minimizes the loss function.
# For this, we need the gradient of the loss function and the gradients of the linear function.

class MSE:
  def __call__(self, y_pred, y_true):
    self.y_pred = y_pred
    self.y_true = y_true
    return ((y_pred - y_true) ** 2).mean()

  def backward(self):
    n = self.y_true.shape[0]
    self.gradient = 2. * (self.y_pred - self.y_true) / n
    # print('MSE backward', self.y_pred.shape, self.y_true.shape, self.gradient.shape)
    return self.gradient


class Linear:
  def __init__(self, input_dim: int, num_hidden: int = 1):
    self.weights = np.random.randn(input_dim, num_hidden) * np.sqrt(2. / input_dim)
    self.bias = np.zeros(num_hidden)
  
  def __call__(self, x):
    self.x = x
    output = x @ self.weights + self.bias
    #output = (x ** 2) @ weights_true + x @ weights_true + bias_true
    return output

  def backward(self, gradient):
    self.weights_gradient = self.x.T @ gradient
    self.bias_gradient = gradient.sum(axis=0)
    self.x_gradient = gradient @ self.weights.T
    return self.x_gradient

  def update(self, lr):
    self.weights = self.weights - lr * self.weights_gradient
    self.bias = self.bias - lr * self.bias_gradient

Adding non-linearity: Relu

In [49]:
# In order to learn non-linear functions, we need non-linearities in our model.

class Relu:
    def __call__(self, input_):
        self.input_ = input_
        self.output = np.clip(self.input_, 0, None)
        return self.output
    
    def backward(self, output_gradient):
      # import pdb; pdb.set_trace()  # By the way, this is how you can debug
      self.input_gradient = (self.input_ > 0) * output_gradient
      return self.input_gradient


relu = Relu()
input_ = np.expand_dims(np.array([1, 0.5, 0, -0.5, -1]), -1)
print(relu(input_))
print(relu.backward(input_))

[[1. ]
 [0.5]
 [0. ]
 [0. ]
 [0. ]]
[[ 1. ]
 [ 0.5]
 [ 0. ]
 [-0. ]
 [-0. ]]


Train our new non linear model

In [50]:
class Model:
  def __init__(self, input_dim, num_hidden, output_dim):
    self.linear1 = Linear(input_dim, num_hidden)
    self.relu1 = Relu()
    self.relu2 = Relu()
    self.linear2 = Linear(num_hidden, output_dim)
  
  def __call__(self, x):
    l1 = self.linear1(x)
    r1 = self.relu1(l1)
    r2 = self.relu2(r1)
    l2 = self.linear2(r2)
    return l2
  
  def backward(self, output_gradient):
    linear2_gradient = self.linear2.backward(output_gradient)
    relu2_gradient = self.relu2.backward(linear2_gradient)
    relu1_gradient = self.relu1.backward(relu2_gradient)
    linear1_gradient = self.linear1.backward(relu1_gradient)
    # print('Model backward', linear2_gradient.shape, relu_gradient.shape, linear1_gradient.shape)
    # import pdb; pdb.set_trace()
    return linear1_gradient

  def update(self, lr):
    self.linear2.update(lr)
    self.linear1.update(lr)

loss = MSE()
model = Model(d, 10,output_dim=2)
y_pred = model(x)
loss_value = loss(y_pred, y_true)
loss_gradient = loss.backward()
print(loss_value)
model.backward(loss_gradient)
#plot_3d(x, y_true, y_pred)

4.465571848314308


array([[ 7.44818433e-03, -4.14124519e-03,  5.45782577e-03],
       [-4.43299380e-04, -1.92181334e-03,  2.19222544e-03],
       [ 1.32073014e-02,  2.31484808e-03, -5.12966698e-03],
       [ 2.29105456e-02,  1.41184995e-02, -1.01177182e-02],
       [ 1.75787516e-02, -8.93761105e-03, -7.56682882e-03],
       [ 1.52848809e-03, -4.39602663e-03,  4.13828194e-03],
       [ 9.14760689e-03, -8.28661670e-03,  2.18992502e-03],
       [ 9.22604974e-03, -1.11282836e-02, -1.10699120e-02],
       [ 4.74483818e-03, -5.61318367e-03,  6.00900773e-03],
       [-9.81402759e-03, -9.12874777e-03,  5.22259929e-03],
       [-5.02541728e-03, -7.68882280e-03,  6.41634431e-03],
       [ 6.06002590e-02, -2.83565769e-02, -2.05895726e-02],
       [ 5.28948139e-03, -5.40175180e-03, -1.93839686e-02],
       [ 3.80490308e-03,  9.12259642e-04, -1.08160155e-03],
       [ 1.07482679e-02, -3.57848349e-02, -3.48729562e-02],
       [-2.20578871e-02, -1.12353784e-02,  8.07155097e-03],
       [ 6.13314524e-02, -2.83109484e-02

In [51]:
from typing import Callable

def fit(x: np.ndarray, y: np.ndarray, model: Callable, loss: Callable, lr: float, num_epochs: int):
  for epoch in range(num_epochs):
    y_pred = model(x)
    loss_value = loss(y_pred, y)
    print(f'Epoch {epoch}, loss {loss_value}')
    gradient_from_loss = loss.backward()
    model.backward(gradient_from_loss)
    model.update(lr)


Initializing non-linear and loss for three dimensional data<br/>
3-d input and 2-d output with 2 hidden layers, 10 neurons for hidden layers

In [52]:

loss = MSE()
nonlinear = Model(3,20,2)
y_pred = nonlinear(x)
print(x.shape, weights_true.shape, y_true.shape, y_pred.shape)
print(loss(y_pred,y_true))

(200, 3) (3, 2) (200, 2) (200, 2)
5.118961022288858


In [54]:
fit(x, y_true, model=nonlinear, loss=loss, lr=0.1, num_epochs=400)
y_pred = nonlinear(x)

Epoch 0, loss 0.10812060268520675
Epoch 1, loss 0.1037469567658415
Epoch 2, loss 0.09964683845657962
Epoch 3, loss 0.09581019281681184
Epoch 4, loss 0.09218438083417499
Epoch 5, loss 0.08874491184508213
Epoch 6, loss 0.08543219122424343
Epoch 7, loss 0.08235346485267586
Epoch 8, loss 0.07952291702503936
Epoch 9, loss 0.07690210024246197
Epoch 10, loss 0.07449454243178634
Epoch 11, loss 0.07223129589624784
Epoch 12, loss 0.07012572158026914
Epoch 13, loss 0.0681492170662241
Epoch 14, loss 0.0662843917331307
Epoch 15, loss 0.06450545294747034
Epoch 16, loss 0.06284004776197315
Epoch 17, loss 0.06128352034268616
Epoch 18, loss 0.059828595041792824
Epoch 19, loss 0.058437018801955565
Epoch 20, loss 0.05711878727854556
Epoch 21, loss 0.05586255485042511
Epoch 22, loss 0.05466219218072915
Epoch 23, loss 0.053535347937308526
Epoch 24, loss 0.052457854595245094
Epoch 25, loss 0.05142257288619349
Epoch 26, loss 0.05043243984446047
Epoch 27, loss 0.04949764129178183
Epoch 28, loss 0.048616857121

In [55]:
from sklearn.manifold import TSNE
view = TSNE(n_components=2, random_state=123).fit_transform(y_pred)

In [56]:
plt.figure(figsize=(20,10))
plt.scatter(view[:,0], view[:,1], c=labels, alpha=0.5)
plt.xlabel('t-SNE-1')
plt.ylabel('t-SNE-2')

NameError: ignored

<Figure size 1440x720 with 0 Axes>