In [1]:
import numpy as np
import torch

In [2]:
# implemnting linear regression in pyTorch
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [3]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [5]:
w = torch.randn(2,3,requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.1612,  1.3322, -0.6382],
        [ 0.0486,  1.0041,  0.3128]], requires_grad=True)
tensor([ 1.0710, -1.2302], requires_grad=True)


In [6]:
# @ represents matrix multiplication in pytorch and .t returns transpose of matrix
def model(x):
    return x @ w.t() + b

In [7]:
preds = model(inputs)
print(preds)

tensor([[ 51.1153,  83.0381],
        [ 62.7867, 111.5670],
        [128.5402, 155.6824],
        [ 18.2981,  58.4716],
        [ 73.1609, 120.4084]], grad_fn=<AddBackward0>)


In [8]:
print(targets) # since we haven't defined a loss function, the predictions are very poor!

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [9]:
# compute loss
def mse(t1, t2):
    diff = t1-t2
    return torch.sum(diff*diff) / (diff.numel()) #number of elements
loss = mse(preds, targets)
print(loss)

tensor(260.9834, grad_fn=<DivBackward0>)


In [10]:
loss.backward()

In [11]:
print(w)
print(w.grad)

tensor([[-0.1612,  1.3322, -0.6382],
        [ 0.0486,  1.0041,  0.3128]], requires_grad=True)
tensor([[-724.0974, -735.0787, -609.6135],
        [1234.8066, 1180.2760,  689.1089]])


In [12]:
# adjust the weights by subtracting a small quantity proportional to the gradient
with torch.no_grad(): 
    #tells torch not to track, calculate, or modify gradients while updating weight and bias
    w -= w.grad*1e-5
    b -= b.grad*1e-5

In [13]:
print(w)
print(w.grad)
print(b)
print(b.grad)

tensor([[-0.1540,  1.3395, -0.6321],
        [ 0.0362,  0.9923,  0.3060]], requires_grad=True)
tensor([[-724.0974, -735.0787, -609.6135],
        [1234.8066, 1180.2760,  689.1089]])
tensor([ 1.0711, -1.2304], requires_grad=True)
tensor([-9.4198, 13.8335])


In [14]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss) # already a big change in loss

tensor(216.9861, grad_fn=<DivBackward0>)


In [15]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-1315.6941, -1328.0032, -1131.3207],
        [ 2264.7231,  2141.4233,  1242.8352]])
tensor([-17.2674,  25.2395])


In [16]:
# adjust the weights by subtracting a small quantity proportional to the gradient
with torch.no_grad(): 
    #tells torch not to track, calculate, or modify gradients while updating weight and bias
    w -= w.grad*1e-5
    b -= b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()

In [17]:
print(w)
print(b)

tensor([[-0.1408,  1.3528, -0.6208],
        [ 0.0136,  0.9708,  0.2935]], requires_grad=True)
tensor([ 1.0712, -1.2306], requires_grad=True)


In [18]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss) # already a big change in loss

tensor(158.8743, grad_fn=<DivBackward0>)


In [19]:
# Train over multiple Epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad*1e-5 #the learning rate (a hyper parameter to tune)
        b -= b.grad*1e-5
        w.grad.zero_()
        b.grad.zero_()

In [20]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(99.9222, grad_fn=<DivBackward0>)


In [21]:
print(preds)

tensor([[ 58.2802,  71.3012],
        [ 73.1745,  96.9915],
        [137.4462, 139.7262],
        [ 25.5349,  43.5407],
        [ 83.8195, 108.6873]], grad_fn=<AddBackward0>)


In [22]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Linear regression using PyTorch built-in functions

In [23]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)


In [24]:
from torch.utils.data import TensorDataset 
# TensorDataset allows us to combine features/labels and using the pytorch
# api to do the training that we have done above 
train_ds = TensorDataset(inputs,targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [25]:
from torch.utils.data import DataLoader
# Dataloaders can split the data into branches of predetermined sie while training. It also provides other utilities such as shuffling and random sampling of the data

batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [26]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[103.,  43.,  36.],
        [ 92.,  87.,  64.],
        [ 91.,  88.,  64.],
        [ 88., 134.,  59.],
        [ 69.,  96.,  70.]])
tensor([[ 20.,  38.],
        [ 82., 100.],
        [ 81., 101.],
        [118., 132.],
        [103., 119.]])


In [27]:
# we can use the nn.Linear class from pytorch to define the model 
from torch import nn
model = nn.Linear(3,2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.4293,  0.2626, -0.3217],
        [ 0.5766, -0.0741, -0.5435]], requires_grad=True)
Parameter containing:
tensor([-0.5482,  0.2032], requires_grad=True)


In [28]:
?nn.Linear

[1;31mInit signature:[0m [0mnn[0m[1;33m.[0m[0mLinear[0m[1;33m([0m[0min_features[0m[1;33m:[0m [0mint[0m[1;33m,[0m [0mout_features[0m[1;33m:[0m [0mint[0m[1;33m,[0m [0mbias[0m[1;33m:[0m [0mbool[0m [1;33m=[0m [1;32mTrue[0m[1;33m)[0m [1;33m->[0m [1;32mNone[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

Args:
    in_features: size of each input sample
    out_features: size of each output sample
    bias: If set to ``False``, the layer will not learn an additive bias.
        Default: ``True``

Shape:
    - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
      additional dimensions and :math:`H_{in} = \text{in\_features}`
    - Output: :math:`(N, *, H_{out})` where all but the last dimension
      are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

Attributes:
    w

In [29]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.4293,  0.2626, -0.3217],
         [ 0.5766, -0.0741, -0.5435]], requires_grad=True),
 Parameter containing:
 tensor([-0.5482,  0.2032], requires_grad=True)]

In [30]:
preds = model(inputs)
preds

tensor([[-28.1227,  13.9598],
        [-37.0901,  11.3687],
        [-21.3612,   8.9132],
        [-44.9458,  35.7224],
        [-27.4745,  -5.1714],
        [-28.8147,  14.6105],
        [-37.6745,  10.8993],
        [-22.1122,   8.9464],
        [-44.2539,  35.0716],
        [-27.3669,  -6.2915],
        [-28.7071,  13.4904],
        [-37.7821,  12.0195],
        [-20.7768,   9.3826],
        [-45.0534,  36.8425],
        [-26.7826,  -5.8221]], grad_fn=<AddmmBackward>)

In [31]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

In [32]:
# we can use the built in loss function too
import torch.nn.functional as F

loss_fn = F.mse_loss
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(10412.9551, grad_fn=<MseLossBackward>)


In [33]:
# Instead of manually manipulating the weights and biases, we can use pysparks api
# here, we will use the SGD -> Stochastic gradient descent 

opt = torch.optim.SGD(model.parameters(), lr=1e-5) #note learning rate parameter

In [34]:
def fit(num_epochs, model, loss_fn, opt, train_dl):
    for epoch in range(num_epochs):
        for xb,yb in train_dl:
            # 1. generate predictions
            pred=model(xb)

            # 2. Calculate loss
            loss = loss_fn(pred, yb)

            # 3. Compute grads
            loss.backward()

            # 4. update parameters using gradients
            opt.step()

            #5. Reset grads to zero
            opt.zero_grad()
        #print progress
        if (epoch+1) %10 ==0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [35]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 531.4619
Epoch [20/100], Loss: 280.5080
Epoch [30/100], Loss: 271.6322
Epoch [40/100], Loss: 290.0078
Epoch [50/100], Loss: 251.7290
Epoch [60/100], Loss: 129.2810
Epoch [70/100], Loss: 145.3180
Epoch [80/100], Loss: 24.2143
Epoch [90/100], Loss: 40.9212
Epoch [100/100], Loss: 38.3131


In [36]:
preds = model(inputs)
preds

tensor([[ 57.8222,  72.7014],
        [ 78.9335,  95.8695],
        [124.2176, 140.3302],
        [ 25.7388,  49.8391],
        [ 93.4631, 103.3211],
        [ 56.5901,  71.7616],
        [ 78.1602,  94.9742],
        [124.1965, 140.4580],
        [ 26.9709,  50.7788],
        [ 93.9220, 103.3656],
        [ 57.0489,  71.8061],
        [ 77.7014,  94.9297],
        [124.9909, 141.2255],
        [ 25.2799,  49.7946],
        [ 94.6952, 104.2608]], grad_fn=<AddmmBackward>)

In [37]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])