## Introduction to tensors

In [5]:
import torch
import torch.nn as nn


In [17]:
x=torch.tensor(4.)
y=torch.tensor([1,2,3.])
z=torch.tensor([[1,2,3],[4,5,6.0]])

In [19]:
print(x.shape)
print(y.shape)
print(z.shape)


torch.Size([])
torch.Size([3])
torch.Size([2, 3])


## Tensor Operations and Gradient

In [59]:
x=torch.tensor(2.)
w=torch.tensor(3.,requires_grad=True)
b=torch.tensor(4.,requires_grad=True)# requires_grad=True only for floating point tensors
y=w*x+b
print(y)

tensor(10., grad_fn=<AddBackward0>)


In [61]:

#What makes PyTorch unique is that we can automatically compute the derivative of y w.r.t. the tensors that have requires_grad set to True i.e. w and b. This feature of PyTorch is called autograd (automatic gradients).

In [63]:

y.backward()

In [67]:
print(x.grad,"dy/dx") #required_grad=False
print(w.grad)
print(b.grad)


None dy/dx
tensor(2.)
tensor(1.)


In [69]:
#Note:
#Leaf Tensor: A tensor that is created directly and requires gradients.
#Non-leaf Tensor: A tensor that results from an operation on other tensors.

In [71]:
#retain_grad
x=torch.tensor(2.,requires_grad=True)
y=x*3 #non leaf tensor
z=y+x #non leaf tensor

z.backward()
print(x.grad)
print(y.grad)



tensor(4.)
None


  print(y.grad)


In [73]:
#retain_grad
x=torch.tensor(2.,requires_grad=True)
y=x*3 #non leaf tensor
z=y+x #non leaf tensor
y.retain_grad()  #enable gradient storage for y
z.retain_grad()
z.backward()
print(x.grad)
print(y.grad)
print(z.grad)



tensor(4.)
tensor(1.)
tensor(1.)


## Interoperability with Numpy

In [76]:
import numpy as np


In [78]:
x=np.array([1,2,3,4.])

In [82]:
x.dtype

dtype('float64')

In [84]:
#converting np array to tensor
x=torch.from_numpy(x)

In [86]:
x.dtype

torch.float64

In [88]:
#converting tensor to np array
x=x.numpy()

In [90]:
x.dtype

dtype('float64')

why pytorch since numpy is already there
1.autograd
2.gpu support


## Linear regression model from scratch

#### Train Data

In [95]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [99]:
#converting into tensors
inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)

In [102]:
w=torch.randn(2,3,requires_grad=True)
b=torch.randn(2,requires_grad=True)

#### Model

In [108]:

def model(x):
   return x@w.t()+b

In [118]:
preds=model(inputs)
print(preds)
print(targets)

tensor([[-116.7470, -212.8531],
        [-158.5101, -278.8947],
        [-203.5486, -289.4684],
        [ -88.8564, -246.7457],
        [-169.6721, -249.3427]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


#### Loss

In [114]:

def mse_loss(t1,t2):
    diff=t1-t2
    return torch.sum(diff*diff)/diff.numel()
    

In [126]:
loss=mse_loss(preds,targets)

#### Compute grads and adjust w and b

In [128]:
loss.backward()


In [130]:
print(w)
print(w.grad)

tensor([[-0.1089, -1.0932, -0.8022],
        [-1.8227, -0.5261, -1.0564]], requires_grad=True)
tensor([[-18517.8809, -21363.1602, -12930.6641],
        [-29266.2324, -31310.9258, -19452.3398]])


A key insight from calculus is that the gradient indicates the rate of change of the loss, or the slope of the loss function w.r.t. the weights and biases.

If a gradient element is positive:

increasing the element's value slightly will increase the loss.
decreasing the element's value slightly will decrease the loss.

If a gradient element is negative:

increasing the element's value slightly will decrease the loss.
decreasing the element's value slightly will increase the loss.



In [142]:
w.grad.zero_()
b.grad.zero_()

tensor([0., 0.])

In [144]:
for i in range(100):
    preds=model(inputs)
    loss=mse_loss(preds,targets)
    loss.backward()
    with torch.no_grad():
        w-=w.grad*1e-4
        b-=b.grad*1e-4
        w.grad.zero_()
        b.grad.zero_()
    if(i%10==0):
        print("epoch",i,loss)

epoch 0 tensor(89687.6875, grad_fn=<DivBackward0>)
epoch 10 tensor(1092.9558, grad_fn=<DivBackward0>)
epoch 20 tensor(114.9611, grad_fn=<DivBackward0>)
epoch 30 tensor(56.1026, grad_fn=<DivBackward0>)
epoch 40 tensor(37.5847, grad_fn=<DivBackward0>)
epoch 50 tensor(28.8260, grad_fn=<DivBackward0>)
epoch 60 tensor(23.3318, grad_fn=<DivBackward0>)
epoch 70 tensor(19.2545, grad_fn=<DivBackward0>)
epoch 80 tensor(16.0034, grad_fn=<DivBackward0>)
epoch 90 tensor(13.3431, grad_fn=<DivBackward0>)


In [146]:
print(model(inputs))
print(targets)

tensor([[ 57.2380,  70.7701],
        [ 80.3922,  97.9595],
        [122.6998, 138.3851],
        [ 22.3406,  38.5136],
        [ 97.8212, 113.7257]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## Linear Regression using Pytorch built-ins

In [19]:
import torch.nn as nn
import numpy as np


In [21]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

#### Dataset and DataLoader

In [24]:
from torch.utils.data import TensorDataset

In [26]:
train_ds=TensorDataset(inputs,targets)

In [28]:
train_ds[:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [30]:
from torch.utils.data import DataLoader


In [32]:
batch_size=5
train_dl=DataLoader(train_ds,batch_size,shuffle=True)

In [34]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[102.,  43.,  37.],
        [ 87., 134.,  58.],
        [ 87., 134.,  58.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.]])
tensor([[ 22.,  37.],
        [119., 133.],
        [119., 133.],
        [119., 133.],
        [ 22.,  37.]])


#### nn.Linear

In [36]:

model=nn.Linear(3,2) #nn.linear takes no  of inputs and no of outputs
print(model.bias)
print(model.weight)

Parameter containing:
tensor([0.4833, 0.1308], requires_grad=True)
Parameter containing:
tensor([[-0.0437, -0.1376, -0.3278],
        [ 0.2117,  0.3588,  0.4039]], requires_grad=True)


In [38]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.0437, -0.1376, -0.3278],
         [ 0.2117,  0.3588,  0.4039]], requires_grad=True),
 Parameter containing:
 tensor([0.4833, 0.1308], requires_grad=True)]

In [42]:
preds=model(inputs)
preds

tensor([[-26.0203,  56.9950],
        [-36.5800,  76.8229],
        [-40.7670,  90.0587],
        [-22.0191,  52.1002],
        [-38.6859,  77.4587],
        [-26.0203,  56.9950],
        [-36.5800,  76.8229],
        [-40.7670,  90.0587],
        [-22.0191,  52.1002],
        [-38.6859,  77.4587],
        [-26.0203,  56.9950],
        [-36.5800,  76.8229],
        [-40.7670,  90.0587],
        [-22.0191,  52.1002],
        [-38.6859,  77.4587]], grad_fn=<AddmmBackward0>)

#### loss function

In [48]:

#mse loss
import torch.nn.functional as F
loss=F.mse_loss(preds,targets)
print(loss)

tensor(7264.1772, grad_fn=<MseLossBackward0>)


#### optimizer 

In [56]:
opt=torch.optim.SGD(model.parameters(),lr=1e-5)

## Train the model

In [59]:
def fit(num_epochs,model,loss_fn,opt):
    for epoch in range(num_epochs):
        for inputs,labels in train_dl:
            preds=model(inputs)
            loss=loss_fn(preds,labels)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if(epoch+1)%10==0:
            print('epoch {}/{} ,loss:{}'.format(epoch+1,num_epochs,loss.item()))

In [61]:
fit(100,model,F.mse_loss,opt)

epoch 10/100 ,loss:234.9389190673828
epoch 20/100 ,loss:244.00439453125
epoch 30/100 ,loss:174.8479766845703
epoch 40/100 ,loss:171.16860961914062
epoch 50/100 ,loss:92.26490783691406
epoch 60/100 ,loss:67.65383911132812
epoch 70/100 ,loss:51.402191162109375
epoch 80/100 ,loss:61.560752868652344
epoch 90/100 ,loss:60.9277458190918
epoch 100/100 ,loss:49.32697677612305


In [65]:
#predictions
preds=model(inputs)
preds

tensor([[ 58.8452,  71.4282],
        [ 79.8057,  99.1861],
        [121.8324, 134.6933],
        [ 29.7186,  43.0430],
        [ 92.7711, 113.0525],
        [ 58.8452,  71.4282],
        [ 79.8057,  99.1861],
        [121.8324, 134.6933],
        [ 29.7186,  43.0430],
        [ 92.7711, 113.0525],
        [ 58.8452,  71.4282],
        [ 79.8057,  99.1861],
        [121.8324, 134.6933],
        [ 29.7186,  43.0430],
        [ 92.7711, 113.0525]], grad_fn=<AddmmBackward0>)