<a href="https://colab.research.google.com/github/sayan0506/Deep-Neural-Network-with-Pytorch-/blob/main/Pytorch_for_Deep_Learning_Course(Basics).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch

In [None]:
# we are creating a 0 dimensional tensor of type float32
# tensors are heart of Deep lEarning
# pytorch is a deep learning framework
t1 = torch.tensor(4.0, dtype = torch.float32)

In [None]:
# we can find details
# it is the datatype of the values stored in the tensor
t1.dtype

torch.float32

In [None]:
# type of tensor
t1.type

<function Tensor.type>

In [None]:
# creatinfg tensor from a list
t2 = torch.tensor([1,2,3])
print(t2)

tensor([1, 2, 3])


In [None]:
# creating tensor from matrix
t3 = torch.tensor([[1,2],[3,4],[5,6]], dtype = torch.float32)
print(t3)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])


In [None]:
# checking the dimension of the tensor
# a tensor should have a regular no of shape, if we change length in the sequence of rows, then it will throw error, so should be uniform in shape
# like no of elements in each row will be the same
print('shape: ', t3.shape)
print(t3.ndimension())

shape:  torch.Size([3, 2])
2


# Tensor operation and gradient

In [None]:
# creating three tensors
x = torch.tensor(3, dtype = torch.float32)
# here we are passing the requires_grad = True, so as to ensure that, ptorch can calculate gradient of that 0 d tensor with the help of backward porpagation
w = torch.tensor(4, dtype = torch.float32, requires_grad=True)
b = torch.tensor(5, dtype = torch.float32, requires_grad=True)

In [None]:
# linear regression(univariant)
y = w * x + b
print(y)

tensor(17., grad_fn=<AddBackward0>)


In [None]:
# compute derivatives
y.backward()

In [None]:
print('dy/dx:', x.grad)
print('dy/dw:', w.grad)
print('dy/db:', b.grad.data)

dy/dx: None
dy/dw: tensor(3.)
dy/db: tensor(1.)


# Interoperability with Numpy

In [None]:
# we can easily convert pytorch to numpy or vice-versa
# torch to numpy
print(y.detach().numpy())

17.0


In [None]:
# numpy to torch
import numpy as np

print(torch.from_numpy(np.array([1,2,3])))

tensor([1, 2, 3])


y.backward() works for any complex function, which is differentiable

In [None]:
# create random weights and biases tensors
w = torch.randn(2,3, requires_grad=True)
b = torch.randn(2, requires_grad= True)

In [None]:
print(w)
print(b)

tensor([[-1.2110, -0.3444,  1.2513],
        [-3.2996,  0.2650,  0.9479]], requires_grad=True)
tensor([-0.7569, -0.8342], requires_grad=True)


In [None]:
# creating the linear regression model
# as w is multiplies with transpose thus. w.t() is the transpose of w
# in pytorch @ represents the matrix multiplication for torch tensors
def model(x):
  return x @ w.t() + b


In [None]:
# create inputs and targets tensors for the model
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58]], dtype = np.float32)
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133]], dtype = np.float32)

In [None]:
# converting to pytorch tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [None]:
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.]])


In [None]:
# our data consist of 3 datapoints, each with 2 input features

In [None]:
# for model training creating a w and b
w = torch.randn(2,3, requires_grad=True)
b = torch.randn(2, requires_grad= True)
print(w)
print(b)

tensor([[ 0.7570, -0.4780, -0.1573],
        [-0.1857,  0.4784, -0.3220]], requires_grad=True)
tensor([-0.3627,  0.2528], requires_grad=True)


In [None]:
preds = model(inputs)
print(inputs)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.]])


In [None]:
# defining a function for mean squared error
def mse(t1, t2):
  return torch.mean((t1 - t2)**2)
print(mse(targets, preds))

tensor(7667.0249, grad_fn=<MeanBackward0>)


In [None]:
# so loss is too high, thus we need to tune the parameters to reduce the loss

In [None]:
loss = mse(targets, preds)
loss.backward()
print(w)
print(w.grad)

tensor([[ 0.7570, -0.4780, -0.1573],
        [-0.1857,  0.4784, -0.3220]], requires_grad=True)
tensor([[-6603.6880, -8443.8613, -4398.8945],
        [-7501.6909, -8896.4922, -4985.0059]])


In [None]:
print(b.grad)

tensor([-77.0540, -88.2445])


In [None]:
# setting grads to zero

w.grad.zero_()
b.grad.zero_()
print(w.grad, b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]]) tensor([0., 0.])


In [None]:
# adjusting weights and biases with Gradient descent
# we can't subtract gradient from w, so we introduce learning rate, i.e we include fraction or rate of grads, and by which we can also say that
# VVIMP: using learning rate we can continue to maintain the variance of the w.grad as constant, i.e maintain the grads in a limit
with torch.no_grad():
  w -= w.grad * 1e-5
  b -= b.grad * 1e-5
  w.grad.zero_()
  b.grad.zero_()

# here we are using with torch.no_grad() which ensures pytorch that, it should not do any grad calculation while subtracting

In [None]:
# in python for instant help about a module 
?torch

In [None]:
# as inputs is a tensor, to find out the value stored in the tensor instead of 0 d tensor, we use tensor.item()
inputs[0][0].item() 

73.0

In [None]:
# to reduce the loss run several epochs
for i in range(10000):
  preds = model(inputs)
  loss = mse(preds,targets)
  loss.backward()
  with torch.no_grad():
    w -= w.grad * 1e-05
    b -= b.grad * 1e-05
    w.grad.zero_()
    b.grad.zero_()

In [None]:
print(mse(preds, targets))

tensor(2.2123, grad_fn=<MeanBackward0>)


# Using nn modules in pytorch

In [None]:
import torch.nn as nn 

In [None]:
# inputs
# for pytorch linear model, default type is float32

inputs = np.array([[73, 67, 43], [91,88,64],[87,134,58],[102,43,37],
                   [69, 96, 70], [87,134,58], [102,43,37],[91,88,64]], dtype = 'float32')
# targets
targets = np.array([[56,70],[81,101],[119,133],[22,37],[103,119],
                    [119, 133],[22,37],[81,101]], dtype = 'float32')

In [None]:
# converting to pytorch tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [None]:
print(inputs[0])

tensor([73., 67., 43.])


# Dataset and dataloader

In [None]:
# the dataset or Tensordataset helps to access the rows of pytorch tensor or the dataset in a more efficient way
from torch.utils.data import TensorDataset

In [None]:
# creating instance of the TensorDataset class
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [None]:
train_ds[0]

(tensor([73., 67., 43.]), tensor([56., 70.]))

In [None]:
# so with the help of TensorDataset the (input, target) pair is created
# we passed inputs(x), targets(y) in the constructor of the TensorDataset class

In [None]:
# we can print row numbers
train_ds.__getitem__([0,1])

(tensor([[73., 67., 43.],
         [91., 88., 64.]]), tensor([[ 56.,  70.],
         [ 81., 101.]]))

In [None]:
# by default a python list is created with that name of the object whe,ever, the TensorDataset class is instantiated
# TensorDataset helped to create the datasets for training,
# wheras the Dataloader helps to access the datapoints from the dataset
# we can create batches of data and so on, even we can use transforms on the data

In [None]:
from torch.utils.data import DataLoader

In [None]:
# define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

In [None]:
# so what dataloader object does, it creates a batch of minibtch size = 5, and it shuffles the samples while loading the data
for i, (xb, yb) in enumerate(train_dl):
  print(xb)
  print(yb)
  print(i)
  break
  # so what we see is, dataloader creates list of 2 minibatches, where 1st minibatch consists of 5 samples

tensor([[ 87., 134.,  58.],
        [ 91.,  88.,  64.],
        [ 91.,  88.,  64.],
        [ 69.,  96.,  70.],
        [102.,  43.,  37.]])
tensor([[119., 133.],
        [ 81., 101.],
        [ 81., 101.],
        [103., 119.],
        [ 22.,  37.]])
0


#Defining Linear regression and Gradient descent using Pytorch default **nn** module

In [None]:
# define model
# nn.Linear helps to create a linear regression model having input_features = 3, and output = 2
# so weights and bias matetrices are made accordingly
 
model = nn.Linear(in_features = 3, out_features=2)

In [None]:
print(model.weight)

Parameter containing:
tensor([[ 0.5175, -0.4879, -0.0941],
        [-0.0574, -0.3247, -0.2902]], requires_grad=True)


In [None]:
print(model.bias)

Parameter containing:
tensor([-0.5000, -0.4349], requires_grad=True)


In [None]:
# we can see that, linear regression creates a neural network gaving single layer, consisting two nodes or output nodes
# that is why shape of the weight matrix = (3,2)

In [None]:
preds = model(inputs)
print(preds)
print(model.__call__(inputs))
# we can see here model object is callable with the built_in __call__() fn
# so in python if we want to make the object callable
# define the functionality which is callable with object name
# create a function named __call__() and write the functionality
# then the object of the class will be callable, as it will overload the function __call__()
# this is the procesudre to make any object callable

tensor([[  0.5482, -38.8626],
        [ -2.3566, -52.8100],
        [-26.3035, -65.7750],
        [ 27.8289, -30.9943],
        [-18.2091, -55.8852],
        [-26.3035, -65.7750],
        [ 27.8289, -30.9943],
        [ -2.3566, -52.8100]], grad_fn=<AddmmBackward>)
tensor([[  0.5482, -38.8626],
        [ -2.3566, -52.8100],
        [-26.3035, -65.7750],
        [ 27.8289, -30.9943],
        [-18.2091, -55.8852],
        [-26.3035, -65.7750],
        [ 27.8289, -30.9943],
        [ -2.3566, -52.8100]], grad_fn=<AddmmBackward>)


# Loss function

In [None]:
# using built-in loss function  instead of defining los function manually

In [None]:
import torch.nn.functional as F

In [None]:
# define the loss
loss_fn = F.mse_loss
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(15748.6045, grad_fn=<MseLossBackward>)


# Optimizer

In [None]:
# we are implementing the Stochastic gradient descent optimizer, using the built-in function
opt = torch.optim.SGD(params = model.parameters(), lr = 1e-05)
# we pass the model parameters which helps to optimize the model
# we also pass the learning rate
# we call it stochastic as sample are selected by random sampling

In [None]:
# create a fit method to implement the model training
def fit(epochs, model, loss_fn, train_dl, opt):

  # train models for given number of epochs
  for epoch in range(epochs):

    # fetching the batches from dataloader
    for xb, yb in train_dl:
      # predicting the output
      pred = model(xb)  

      # obtain loss
      loss = loss_fn(pred, yb)

      # backprop
      loss.backward()

      # update parameters, (w- lr*dw(w.grad))
      opt.step()

      # resets grads to zero
      opt.zero_grad()
    
    # print the progress
    if (epoch + 1)%10 == 0:
      print(loss.item())



In [None]:
fit(100, model, loss_fn, train_dl, opt)

1014.05615234375
248.6985321044922
552.512939453125
460.6492614746094
360.4355773925781
288.8053894042969
207.04139709472656
31.735305786132812
126.95266723632812
89.17733001708984
