In [1]:
import torch
import numpy as np

In [4]:
# 1d tensor with 3 elements
x = torch.empty(3)
print(x)

tensor([2.0500e-35, 0.0000e+00, 5.0447e-44])


In [5]:
# 2d tensor
x = torch.empty(3, 2)
print(x)

tensor([[2.0500e-35, 0.0000e+00],
        [3.3631e-44, 0.0000e+00],
        [       nan, 0.0000e+00]])


In [7]:
#creating tensor with random values
x = torch.randn(2,2)
print(x)

tensor([[ 0.7261,  0.4863],
        [-0.3610, -0.2736]])


In [10]:
x = torch.zeros(2,2)
print(x)
print(x.dtype)

tensor([[0., 0.],
        [0., 0.]])
torch.float32


In [11]:
# we can make changes to the dtype of tensors in pytorch
x = torch.ones(2,2, dtype=torch.int)
print(x)
print(x.dtype)

tensor([[1, 1],
        [1, 1]], dtype=torch.int32)
torch.int32


In [15]:
#Basic operations

x = torch.rand(2,2)
y = torch.rand(2,2)

z = x+y

print(z)
z = torch.mul(x,y)
print(z)

#inplace operation in _

y.add_(x)

tensor([[1.0905, 1.2392],
        [0.9625, 1.1074]])
tensor([[0.1178, 0.3583],
        [0.1598, 0.2638]])


tensor([[1.0905, 1.2392],
        [0.9625, 1.1074]])

In [18]:
#view

x = torch.rand(4,4)
print(x)
y = x.view(-1, 8)
print(y)

tensor([[0.1569, 0.5569, 0.6483, 0.3967],
        [0.3442, 0.7636, 0.2079, 0.1969],
        [0.1392, 0.7447, 0.4946, 0.8619],
        [0.2081, 0.2300, 0.3563, 0.7139]])
tensor([[0.1569, 0.5569, 0.6483, 0.3967, 0.3442, 0.7636, 0.2079, 0.1969],
        [0.1392, 0.7447, 0.4946, 0.8619, 0.2081, 0.2300, 0.3563, 0.7139]])


In [20]:
# numpy->tensor and vice versa

#Things to keep in mind tensor runs on gpu and normal nd array runs on cpu ,
#so if they share the same location then doing operation on one will effect the other same as call by reference concept

import numpy as np

a = torch.ones(2)
b = a.numpy()
print(b)

a = torch.from_numpy(b)
print(a)

[1. 1.]
tensor([1., 1.])


In [21]:
#cuda to create tensor on gpu

# if torch.cuda.is_available():
#   device = torch.device("cuda")
#   x = torch.ones(5, device=device)
#   y = torch.ones(5)
#   y = y.to(device)
#   z = x + y
#   z.numpy() # this will create error as numpy array runs on cpu and not gpu so we have to move it to cpu
#   z = z.to("cpu")


In [22]:
# requires_grad to True just let's it knows that later we might have to calculate gradient of this tensor

x = torch.ones(5, requires_grad=True)
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


# Autograd for optmimzation to calculate gradient in pytorch

In [38]:
x = torch.randn(5)
print(x)
#now suppose we want to calculate gradient of some function wrt x then we have to specify in it's argument required_grad= True

x = torch.randn(3, requires_grad=True)
print(x)

#now if we perform any operation on this tensor

y = x + 2

"""
x
    +.     y 
2

In back propogation it will calculate dy/dx  and is added backward 
"""

print(y)


z = y * y * 2
print(z)

# z = z.mean()
# print(z)

z.backward() # dz/dx

print(x.grad)

tensor([ 1.0807, -1.6904,  0.0497, -0.4585, -0.8382])
tensor([ 0.4099,  0.1375, -0.4269], requires_grad=True)
tensor([2.4099, 2.1375, 1.5731], grad_fn=<AddBackward0>)
tensor([11.6154,  9.1382,  4.9490], grad_fn=<MulBackward0>)


RuntimeError: ignored

In [39]:
# to resolve this error 
# z is a scaler value so we don't need any vector but if z is a vector 
#then to apply backward we need to have vector of same size as it does jacobian multiplication

v = torch.tensor([0.1, 1.0, 0.001], dtype = torch.float32)

z.backward(v)
print(x.grad)

tensor([9.6397e-01, 8.5502e+00, 6.2922e-03])


In [47]:
# prevent pytorch from tracking the history of gradient
"""
option 1:x.requires_grad_(False)

option 2: x.detach()

option 3: with torch.no_grad()
"""
x = torch.randn(3, requires_grad = True)
#print(x)
# x.requires_grad_(False)

# y = x.detach()
# print(y)

y = x + 2
print(y)

with torch.no_grad():
  y = x + 2

print(y) 


tensor([2.5563, 2.4875, 2.9160], grad_fn=<AddBackward0>)
tensor([2.5563, 2.4875, 2.9160])


In [48]:
# there is an issue of accumulation of gradient  during training step let's see

weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
  model_output = (weights*3).sum()
  model_output.backward()

  print(weights.grad)
  

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [50]:
# Above weights are getting accumulated and making our dataset hampered
# to resolve this in every epoch make the gradients zero

weights = torch.ones(4, requires_grad=True)
for epoch in range(3):
  model_output = (weights*3).sum()
  model_output.backward()

  print(weights.grad)

  weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [52]:
# pytorch inbuilt optimizers

"""

weights = torch.ones(4, requires_grad=True)

optimizer = torch.optim.SGD(weights, lr=0.01)

optimizer.step()

optimizer.zero_grad()

 Same thing should be done with inbuild optimizers as well
"""

'\n\nweights = torch.ones(4, requires_grad=True)\n\noptimizer = torch.optim.SGD(weights, lr=0.01)\n\noptimizer.step()\n\noptimizer.zero_grad()\n\n Same thing should be done with inbuild optimizers as well\n'

In [57]:
"""
Backpropogation

x -> a(x) -> b(y) -> z

to calculate dz/dx we apply chain rule i.e dz/dy * dy/dx

kind of calculating gradient from back to front to get the final gradient we are interested in


######### COMPUTATIONAL GRAPH #########

x
    (f = x * y) -------> z
y

here we want to compute local gradients.  dz/dy = d(x*y)/ dy = x and dz/dx = d(x*y)/ dx = y


3 step process:

1. Forward Pass
2. Compute local gradients
3. Do backpropogation to update the weights

"""


x = torch.tensor(1.0)
y= torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad = True)

# forward pass to compute the loss

y_hat = w*x

loss = (y_hat - y)**2

print(loss)


#backward pass

#whole gradient computation
loss.backward()

print(w.grad)


## update the weights and again do forward and backward pass

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


In [5]:
#gradient descent using autograd
x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

# model prediction
def forward(x):
  return w*x

# loss MSE
def loss(y, y_predicted):
  return ((y_predicted-y)**2).mean()

# gradient
# MSE = 1/N *(W*x-y)**2
# dJ/dw = 1/N 2x(w*x-y)

def gradient(x, y, y_predicted):
  return np.dot(2*x, y_predicted-y).mean()



print(f'Prediction before training: f(5) = {forward(5):.3f}')


#training

lr = 0.01
n_iters = 14

for epoch in range(n_iters):
  #prediction
  y_pred = forward(x)

  #loss 
  l = loss(y, y_pred)

  #gradient
  dw = gradient(x, y, y_pred)

  #update weights

  w = w - lr*dw

  if epoch % 1==0:
    print(f'epoch{epoch+1}: w={w:.3f}, loss={l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
epoch1: w=1.200, loss=30.00000000
epoch2: w=1.680, loss=4.79999924
epoch3: w=1.872, loss=0.76800019
epoch4: w=1.949, loss=0.12288000
epoch5: w=1.980, loss=0.01966083
epoch6: w=1.992, loss=0.00314574
epoch7: w=1.997, loss=0.00050331
epoch8: w=1.999, loss=0.00008053
epoch9: w=1.999, loss=0.00001288
epoch10: w=2.000, loss=0.00000206
epoch11: w=2.000, loss=0.00000033
epoch12: w=2.000, loss=0.00000005
epoch13: w=2.000, loss=0.00000001
epoch14: w=2.000, loss=0.00000000
Prediction after training: f(5) = 10.000


In [25]:
# doing same thing with pytorch
#gradient descent using autograd
x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


# model prediction
def forward(x):
  return w*x

# loss MSE
def loss(y, y_predicted):
  return ((y_predicted-y)**2).mean()

# gradient
# MSE = 1/N *(W*x-y)**2
# dJ/dw = 1/N 2x(w*x-y)

# def gradient(x, y, y_predicted):
#   return np.dot(2*x, y_predicted-y).mean()



print(f'Prediction before training: f(5) = {forward(5):.3f}')


#training

lr = 0.01
n_iters = 70


for epoch in range(n_iters):
  #prediction
  y_pred = forward(x)

  #loss 
  l = loss(y, y_pred)

  #gradient
  # dw = gradient(x, y, y_pred)
  
  #pytorch for gradient
  l.backward() # gradient of loss wrt to w

  #update weights
  with torch.no_grad():
    w -= lr * w.grad
  
  # empty the gradient for avoiding it's accumulation
  w.grad.zero_()

  if epoch % 1==0:
    print(f'epoch{epoch+1}: w={w:.3f}, loss={l:.8f}')



print(f'Prediction after training: f(5) = {forward(5):.3f}')


Prediction before training: f(5) = 0.000
epoch1: w=0.300, loss=30.00000000
epoch2: w=0.555, loss=21.67499924
epoch3: w=0.772, loss=15.66018772
epoch4: w=0.956, loss=11.31448650
epoch5: w=1.113, loss=8.17471695
epoch6: w=1.246, loss=5.90623236
epoch7: w=1.359, loss=4.26725292
epoch8: w=1.455, loss=3.08308983
epoch9: w=1.537, loss=2.22753215
epoch10: w=1.606, loss=1.60939169
epoch11: w=1.665, loss=1.16278565
epoch12: w=1.716, loss=0.84011245
epoch13: w=1.758, loss=0.60698116
epoch14: w=1.794, loss=0.43854395
epoch15: w=1.825, loss=0.31684780
epoch16: w=1.851, loss=0.22892261
epoch17: w=1.874, loss=0.16539653
epoch18: w=1.893, loss=0.11949898
epoch19: w=1.909, loss=0.08633806
epoch20: w=1.922, loss=0.06237914
epoch21: w=1.934, loss=0.04506890
epoch22: w=1.944, loss=0.03256231
epoch23: w=1.952, loss=0.02352631
epoch24: w=1.960, loss=0.01699772
epoch25: w=1.966, loss=0.01228084
epoch26: w=1.971, loss=0.00887291
epoch27: w=1.975, loss=0.00641066
epoch28: w=1.979, loss=0.00463169
epoch29: w=1

Making the whole model pipeline in pytorch

1. Design the model (input, output_size, fwd pass)
2. construct loss and optimizer
3. training loop

- forward pass: compute prediction
- backward pass : gradients
- update weights 

In [44]:
import torch.nn as nn

# doing same thing with pytorch
#gradient descent using autograd
x = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

# w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


# model prediction
# def forward(x):
#   return w*x

n_samples, n_features = x.shape
print(n_samples, n_features)

x_test = torch.tensor([5], dtype=torch.float32)
input_size = n_features
output_size = n_features

model = nn.Linear(input_size, output_size)

# loss MSE
# def loss(y, y_predicted):
#   return ((y_predicted-y)**2).mean()

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

# gradient
# MSE = 1/N *(W*x-y)**2
# dJ/dw = 1/N 2x(w*x-y)

# def gradient(x, y, y_predicted):
#   return np.dot(2*x, y_predicted-y).mean()



print(f'Prediction before training: f(5) = {model(x_test).item():.3f}')


#training

lr = 0.01
n_iters = 10


for epoch in range(n_iters):
  #prediction
  y_pred = forward(x)

  #loss 
  l = loss(y, y_pred)

  #gradient
  # dw = gradient(x, y, y_pred)
  
  #pytorch for gradient
  l.backward() # gradient of loss wrt to w


  optimizer.step()

  #update weights
  # with torch.no_grad():
  #   w -= lr * w.grad
  
  # empty the gradient for avoiding it's accumulation
  # w.grad.zero_()

  optimizer.zero_grad()

  if epoch % 1==0:
    [w,b] = model.parameters()
    print(f'epoch{epoch+1}: w={w[0][0].item():.3f}, loss={l:.8f}')



print(f'Prediction after training: f(5) = {model(x_test).item():.3f}')


4 1
Prediction before training: f(5) = 5.056
epoch1: w=0.950, loss=8.45141983
epoch2: w=1.107, loss=8.27035809
epoch3: w=1.241, loss=5.97533464
epoch4: w=1.355, loss=4.31717920
epoch5: w=1.452, loss=3.11916208
epoch6: w=1.534, loss=2.25359511
epoch7: w=1.604, loss=1.62822247
epoch8: w=1.663, loss=1.17639065
epoch9: w=1.714, loss=0.84994221
epoch10: w=1.757, loss=0.61408287
Prediction after training: f(5) = 9.091


# Creating a custom model now on the given framework

In [45]:
import torch.nn as nn

# doing same thing with pytorch

x = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)


n_samples, n_features = x.shape
print(n_samples, n_features)

x_test = torch.tensor([5], dtype=torch.float32)
input_size = n_features
output_size = n_features

#model = nn.Linear(input_size, output_size)
# it's more in depth custom implementation
class LinearRegression(nn.Module):

  def __init__(self, input_dimension, output_dimension):
    super(LinearRegression, self).__init__()
    #define layers
    self.lin = nn.Linear(input_dimension, output_dimension)

  def forward(self, x):
    return self.lin(x)

model = LinearRegression(input_size, output_size)

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

print(f'Prediction before training: f(5) = {model(x_test).item():.3f}')


#training

lr = 0.01
n_iters = 10


for epoch in range(n_iters):
  #prediction
  y_pred = forward(x)

  #loss 
  l = loss(y, y_pred)

  #gradient
  # dw = gradient(x, y, y_pred)
  
  #pytorch for gradient
  l.backward() # gradient of loss wrt to w


  optimizer.step()

  #update weights
  # with torch.no_grad():
  #   w -= lr * w.grad
  
  # empty the gradient for avoiding it's accumulation
  # w.grad.zero_()

  optimizer.zero_grad()

  if epoch % 1==0:
    [w,b] = model.parameters()
    print(f'epoch{epoch+1}: w={w[0][0].item():.3f}, loss={l:.8f}')



print(f'Prediction after training: f(5) = {model(x_test).item():.3f}')


4 1
Prediction before training: f(5) = 1.891
epoch1: w=0.560, loss=0.44367492
epoch2: w=0.776, loss=15.55922222
epoch3: w=0.959, loss=11.24153709
epoch4: w=1.115, loss=8.12201118
epoch5: w=1.248, loss=5.86815357
epoch6: w=1.361, loss=4.23974037
epoch7: w=1.457, loss=3.06321263
epoch8: w=1.538, loss=2.21317077
epoch9: w=1.608, loss=1.59901571
epoch10: w=1.666, loss=1.15528870
Prediction after training: f(5) = 7.425
