In [48]:
import torch 
import numpy as np

In [49]:
# Input and output
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [50]:
#usually datasets are in numpy format, so converting them into tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [51]:
# requires_grad=True lets us differentiate the variable in the future using backword() function
w = torch.randn(2,3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.4848, -1.0568, -1.5461],
        [ 0.5604,  1.1633,  1.0867]], requires_grad=True)
tensor([-1.5144,  0.7359], requires_grad=True)


In [52]:
#defining a linear model '@' multiplies the tensor matrices
def model(x):
    return x @ w.t() + b

In [53]:
preds = model(inputs)
preds # initial prediction with the randomized input 

tensor([[-103.4129,  166.3140],
        [-149.3480,  223.6507],
        [-190.6249,  268.4001],
        [ -54.7123,  148.1277],
        [-177.7455,  227.1476]], grad_fn=<AddBackward0>)

In [54]:
#defining the loss function MSE=sum(y-y_hat)^2/n
def mse(t1, t2):
    diff = t1-t2
    return torch.sum(diff*diff)/diff.numel()

In [55]:
#calculating the loss for the predicted outputs
loss = mse(preds, targets)
print(loss)

tensor(32574.1094, grad_fn=<DivBackward0>)


In [56]:
#Finding the slope downwards 
loss.backward()

In [57]:
# w.grad gives us the dy/dx for w variable 
print(w.grad)

tensor([[-17346.4551, -20538.2461, -12409.1621],
        [  9753.8311,  10110.1143,   6305.2817]])


In [58]:
# Slowly moving downwards the slope using the learning parameter
with torch.no_grad():
    w -= w.grad*1e-5
    b -= b.grad*1e-5

In [59]:
# observe the change in loss
loss = mse(preds, targets)
print(loss)

tensor(32574.1094, grad_fn=<DivBackward0>)


In [60]:
# setting the gradient vector to zero because pytorch accumulates the gradients
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [61]:
preds = model(inputs)

In [62]:
preds

tensor([[ -71.6513,  149.7075],
        [-107.5451,  201.8413],
        [-140.8128,  242.7085],
        [ -23.5940,  131.4973],
        [-137.3712,  206.2969]], grad_fn=<AddBackward0>)

In [63]:
loss = mse(preds, targets)

In [64]:
print(loss)

tensor(22431.2617, grad_fn=<DivBackward0>)


In [65]:
loss.backward()

In [66]:
w.grad
b.grad

tensor([-172.3949,   94.4103])

In [67]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [68]:
print(w,b)

tensor([[ 0.7989, -0.6815, -1.3197],
        [ 0.3825,  0.9795,  0.9719]], requires_grad=True) tensor([-1.5106,  0.7338], requires_grad=True)


In [69]:
preds = model(inputs)

In [70]:
loss = mse(preds, targets)

In [71]:
loss

tensor(15590.4766, grad_fn=<DivBackward0>)

In [72]:
for i in range(0,1000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [73]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(17.8577, grad_fn=<DivBackward0>)


In [74]:
preds, targets


(tensor([[ 57.4333,  70.4861],
         [ 78.2492, 100.1789],
         [127.2584, 133.7932],
         [ 23.6911,  37.2759],
         [ 93.2146, 118.2746]], grad_fn=<AddBackward0>),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

In [75]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [76]:
# Dataset loader
from torch.utils.data import TensorDataset

In [77]:
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [78]:
from torch.utils.data import DataLoader
from torch import nn

In [79]:
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [80]:
model = nn.Linear(3,2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[ 0.1591,  0.3752, -0.2354],
        [ 0.3384,  0.4767, -0.0783]], requires_grad=True)
Parameter containing:
tensor([ 0.0370, -0.0266], requires_grad=True)


In [81]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.1591,  0.3752, -0.2354],
         [ 0.3384,  0.4767, -0.0783]], requires_grad=True),
 Parameter containing:
 tensor([ 0.0370, -0.0266], requires_grad=True)]

In [82]:
preds = model(inputs)

In [83]:
preds

tensor([[26.6722, 53.2516],
        [32.4734, 67.7103],
        [50.5086, 88.7551],
        [23.6944, 52.0941],
        [30.5620, 63.6092],
        [26.4562, 53.1133],
        [31.8629, 67.1553],
        [50.4323, 89.0152],
        [23.9104, 52.2324],
        [30.1675, 63.1925],
        [26.0617, 52.6966],
        [32.2574, 67.5720],
        [51.1191, 89.3101],
        [24.0889, 52.5108],
        [30.7780, 63.7475]], grad_fn=<AddmmBackward>)

In [84]:
import torch.nn.functional as F

In [85]:
loss_fn = F.mse_loss

In [86]:
loss = loss_fn(model(inputs), targets)

In [87]:
loss

tensor(1975.9208, grad_fn=<MseLossBackward>)

In [88]:
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [89]:
import sys

In [90]:
def fitting(n_epoch, model, loss_fn, opt, train_dl):
    
    min_loss = 10000000000
    
    for epoch in range(n_epoch):
        
        for x,y in train_dl:
            
            pred = model(x)
            
            loss = loss_fn(pred, y)
            
            loss.backward()
            
            opt.step()
            
            opt.zero_grad()
            
            if loss <= min_loss:
                min_loss = loss
        
        if (epoch+1)%10 == 0:
            print('loss and min loss',loss, min_loss)

In [91]:
fitting(100, model, loss_fn, opt, train_dl)

loss and min loss tensor(550.9150, grad_fn=<MseLossBackward>) tensor(34.4619, grad_fn=<MseLossBackward>)
loss and min loss tensor(248.5423, grad_fn=<MseLossBackward>) tensor(13.0725, grad_fn=<MseLossBackward>)
loss and min loss tensor(341.3757, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(216.6019, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(130.9000, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(91.0495, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(49.3795, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(48.3378, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(84.0615, grad_fn=<MseLossBackward>) tensor(9.6681, grad_fn=<MseLossBackward>)
loss and min loss tensor(94.7350, grad_fn=<MseLossBackward>) tenso

In [92]:
preds = model(inputs)

In [93]:
preds

tensor([[ 58.3231,  71.7207],
        [ 78.6731,  96.6002],
        [123.6701, 139.4445],
        [ 28.2929,  45.1386],
        [ 91.6584, 107.3477],
        [ 57.1499,  70.6955],
        [ 77.8439,  95.9150],
        [123.6304, 139.6582],
        [ 29.4661,  46.1638],
        [ 92.0024, 107.6878],
        [ 57.4939,  71.0356],
        [ 77.4998,  95.5750],
        [124.4993, 140.1297],
        [ 27.9488,  44.7985],
        [ 92.8316, 108.3729]], grad_fn=<AddmmBackward>)

In [94]:
model(torch.tensor([[75, 63, 44.]]))

tensor([[54.1661, 68.0232]], grad_fn=<AddmmBackward>)