# Using Pytorch (Linear Regression)

In [1]:
import numpy as np
import torch

In [2]:
# creating tensors
# a tensor is a number, vector, matrix, or any n-dimensional array
x = torch.tensor(3.)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)

In [3]:
# print and see the tensors
print(x)
print(w)
print(b)

tensor(3.)
tensor(4., requires_grad=True)
tensor(5., requires_grad=True)


In [4]:
# let's combine these tensors
y = w * x + b
print(y)

tensor(17., grad_fn=<AddBackward0>)


In [5]:
# we can use torch to computer the derivative of y w.r.t. the tensors that have requires_grad set to True
y.backward() # compute the gradients

In [6]:
# display the gradients
print('dy/dw:', w.grad)
print('dy/db:', b.grad)

dy/dw: tensor(3.)
dy/db: tensor(1.)


# Problem
- A model that predicts crop yields or apples and oranges
- We want to find a set of weights and biases using a set of training data and make predictions from then

```
yield_apple = w11 * temp + w12 * rainfall + w13 * humidity + b1

yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b1
```

# Training Data
- The data are 2 matrices - the input and the traget

In [10]:
# input (temp, rainfall, humidity)
inputs = np.array([
    [73, 67, 43],
    [91, 88, 64],
    [87, 134, 58],
    [102, 43, 37],
    [69, 96, 70]], dtype = 'float32'
)

In [11]:
# targets (apples, oranges)
targets = np.array([
    [56, 70],
    [81, 101],
    [119, 133],
    [22, 37],
    [103, 119]], dtype = 'float32'
)

In [12]:
# now our inputs and targets are ready, we can convert them to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Linear Regression
- We pick some random values to be the weights and biases of the models

In [13]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)

print(w)
print(b)

tensor([[ 0.9699, -0.8580, -1.2345],
        [-0.3736,  0.1180, -0.6072]], requires_grad=True)
tensor([-1.3427,  1.2633], requires_grad=True)


In [14]:
# model
def model(x):
    return x @ w.t() + b

In [15]:
# predictions
preds = model(inputs)
print(preds)

tensor([[ -41.1086,  -44.2140],
        [ -67.5922,  -61.2124],
        [-103.5314,  -50.6479],
        [  15.0156,  -54.2363],
        [-103.1998,  -55.6929]], grad_fn=<AddBackward0>)


In [16]:
# print the targets (actual)
print(targets)  

# notice that the predictions are way off the actual targets
# since we use random weights and biases

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Using Loss Function
- We will use the MSE

In [18]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [19]:
loss = mse(preds, targets)
print(loss)

tensor(23552.3047, grad_fn=<DivBackward0>)


- The above value is the loss that shows how bad the model is at predicting the target variables. Generally, the lower the loss, the better.

- We can also compute the loss gradients w.r.t the weights and biases

In [20]:
loss.backward()

In [21]:
print(w)
print(w.grad)

tensor([[ 0.9699, -0.8580, -1.2345],
        [-0.3736,  0.1180, -0.6072]], requires_grad=True)
tensor([[-10982.2500, -13899.4238,  -8256.9609],
        [-12087.2471, -13445.9062,  -8309.7246]])


In [22]:
print(b)
print(b.grad)

tensor([-1.3427,  1.2633], requires_grad=True)
tensor([-136.2833, -145.2007])


## Some Calculus Knowledge
- Gradient is the rate of change of the loss
- Positive gradient means increasing/decreasing the element's value will increase/decrease the loss
- In work the other way round for negative gradient.

In [23]:
# reset the gradient to zero
# torch will accumulate the gradients
w.grad.zero_()
b.grad.zero_()

print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


# Adjusting `w` and `b` using Gradient Descent
Workflows as follow
- Generate predictions
- Calculate the loss
- Compute gradients w.r.t `w` and `b`
- Adjust the weights
- Reset to zero

In [24]:
# generate the predictions
preds = model(inputs)
print(preds)

tensor([[ -41.1086,  -44.2140],
        [ -67.5922,  -61.2124],
        [-103.5314,  -50.6479],
        [  15.0156,  -54.2363],
        [-103.1998,  -55.6929]], grad_fn=<AddBackward0>)


In [25]:
# compute the loss
loss = mse(preds, targets)
print(loss)

tensor(23552.3047, grad_fn=<DivBackward0>)


In [26]:
# compute the gradients
loss.backward()

In [27]:
# adjust weights and reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [28]:
print(w)

tensor([[ 1.0797, -0.7190, -1.1519],
        [-0.2527,  0.2524, -0.5241]], requires_grad=True)


In [29]:
# recalculate the loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(16467.3594, grad_fn=<DivBackward0>)


- We can see that with the new weights and biases, we can reduce the loss

## Using Multiple Epochs

In [30]:
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [31]:
# calculate the loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(610.6926, grad_fn=<DivBackward0>)


In [32]:
# see the predictions
print(preds)

tensor([[ 67.4894,  73.4702],
        [ 80.7470,  95.2535],
        [105.4114, 140.1996],
        [ 81.5402,  53.3925],
        [ 63.8350, 100.4080]], grad_fn=<AddBackward0>)


In [33]:
# see the actual targets (quite good than random)
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Perform Linear Regression Using Torch's Builtins

In [34]:
import torch.nn as nn

inputs = np.array([
    [73, 67, 43],
    [91, 88, 64],
    [87, 134, 58],
    [102, 43, 37],
    [69, 96, 70],
    [74, 66, 43],
    [91, 87, 65],
    [88, 134, 59],
    [101, 44, 37],
], dtype = 'float32')

targets = np.array([
    [56, 70],
    [81, 101],
    [119, 133],
    [22, 37],
    [103, 119],
    [57, 69],
    [80, 102],
    [118, 132],
    [21, 38],
], dtype = 'float32')

In [35]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [36]:
from torch.utils.data import TensorDataset

train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [37]:
# define the data loader
batch_size = 5  
train_dl = torch.utils.data.DataLoader(train_ds, batch_size, shuffle=True)
next(iter(train_dl))

[tensor([[ 73.,  67.,  43.],
         [ 87., 134.,  58.],
         [ 74.,  66.,  43.],
         [101.,  44.,  37.],
         [ 69.,  96.,  70.]]),
 tensor([[ 56.,  70.],
         [119., 133.],
         [ 57.,  69.],
         [ 21.,  38.],
         [103., 119.]])]

In [38]:
# define model
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.1982,  0.1240, -0.1796],
        [-0.0325, -0.3151,  0.1114]], requires_grad=True)
Parameter containing:
tensor([-0.5711, -0.4596], requires_grad=True)


In [39]:
# optimizer
opt =  torch.optim.SGD(model.parameters(), lr=1e-5)

In [40]:
# import nn.functional
import torch.nn.functional as F

# define the loss function
loss_fn = F.mse_loss

# see the loss
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(11864.3721, grad_fn=<MseLossBackward0>)


In [43]:
# train the model
def fit(num_epochs, model, loss_fn, opt):
    for epoch in range(num_epochs):
        for xb, yb in train_dl:
            # generate predictions
            pred = model(xb)
            loss = loss_fn(pred, yb)
            # compute gradients
            loss.backward()
            # update parameters using gradients
            opt.step()
            # reset the gradients to zero
            opt.zero_grad()
    print('Training loss: ', loss_fn(model(inputs), targets))

In [44]:
fit(100, model, loss_fn, opt)

Training loss:  tensor(15.7548, grad_fn=<MseLossBackward0>)


In [45]:
# generate predictions
preds = model(inputs)
print(preds)

tensor([[ 57.0276,  70.6110],
        [ 78.4118,  98.3808],
        [122.0059, 134.1201],
        [ 24.8582,  41.7886],
        [ 93.4030, 112.5369],
        [ 55.8097,  69.6003],
        [ 77.7269,  98.1593],
        [122.0261, 134.5751],
        [ 26.0760,  42.7993]], grad_fn=<AddmmBackward0>)


In [46]:
# compare with real targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.]])
