In [59]:
# Import Numpy & PyTorch
import numpy as np
import torch

## Linear Regression Model using PyTorch built-ins

Let's re-implement the same model using some built-in functions and classes from PyTorch.

And now using two different targets: Apples and Oranges

In [60]:
# Imports
import torch.nn as nn

In [61]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70], [73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70], [73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70]], dtype='float32')
# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], [22, 37], [103, 119], 
                    [56, 70], [81, 101], [119, 133], [22, 37], [103, 119], 
                    [56, 70], [81, 101], [119, 133], [22, 37], [103, 119]], dtype='float32')

In [62]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

### Dataset and DataLoader

We'll create a `TensorDataset`, which allows access to rows from `inputs` and `targets` as tuples. We'll also create a DataLoader, to split the data into batches while training. It also provides other utilities like shuffling and sampling.

In [63]:
# Import tensor dataset & data loader
from torch.utils.data import TensorDataset, DataLoader

In [64]:
# Define dataset
from torch.utils.data import TensorDataset, DataLoader
dataset = TensorDataset(inputs, targets)
dataset[0:4]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.]]))

In [65]:
# Define data loader
batch_size = 6
dl = DataLoader(dataset, batch_size, shuffle=True)
next(iter(dl))

[tensor([[ 87., 134.,  58.],
         [ 91.,  88.,  64.],
         [ 73.,  67.,  43.],
         [ 69.,  96.,  70.],
         [ 87., 134.,  58.],
         [ 69.,  96.,  70.]]), tensor([[119., 133.],
         [ 81., 101.],
         [ 56.,  70.],
         [103., 119.],
         [119., 133.],
         [103., 119.]])]

### nn.Linear
Instead of initializing the weights & biases manually, we can define the model using `nn.Linear`.

In [66]:
# Define model
model = nn.Linear(3, 2)
print(model.weight, end="\n---------\n")
print(model.bias)

Parameter containing:
tensor([[ 0.1074,  0.3729,  0.2638],
        [-0.2969,  0.3819, -0.4918]], requires_grad=True)
---------
Parameter containing:
tensor([-0.3037,  0.1127], requires_grad=True)


### Optimizer
Instead of manually manipulating the weights & biases using gradients, we can use the optimizer `optim.SGD`.

In [67]:
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

### Loss Function
Instead of defining a loss function manually, we can use the built-in loss function `mse_loss`.

In [68]:
# Import nn.functional
import torch.nn.functional as F 

In [69]:
# Define loss function
loss_fn = F.mse_loss

In [70]:
loss = loss_fn(model(inputs) , targets)
print(loss)

tensor(7004.7100, grad_fn=<MseLossBackward>)


### Train the model

We are ready to train the model now. We can define a utility function `fit` which trains the model for a given number of epochs.

In [71]:
# Define a utility function to train the model
def fit(num_epochs, model, loss_fn, opt):
    for epoch in range(num_epochs):
        for xb,yb in dl:
            # Generate predictions
            pred = model(xb)
            loss = loss_fn(pred, yb)
            # Perform gradient descent
            loss.backward()
            opt.step()
            opt.zero_grad()
    print('Training loss: ', loss_fn(model(inputs), targets))

In [72]:
# Train the model for 100 epochs
fit(100 , model , loss_fn, opt)

Training loss:  tensor(42.2733, grad_fn=<MseLossBackward>)


In [73]:
# Generate predictions
preds = model(inputs)
print('\n', preds)
#preds = model(?)
#preds


 tensor([[ 58.7020,  72.2617],
        [ 81.9182,  96.2712],
        [120.1566, 144.7002],
        [ 27.5752,  43.8031],
        [ 97.3310, 107.1942],
        [ 58.7020,  72.2617],
        [ 81.9182,  96.2712],
        [120.1566, 144.7002],
        [ 27.5752,  43.8031],
        [ 97.3310, 107.1942],
        [ 58.7020,  72.2617],
        [ 81.9182,  96.2712],
        [120.1566, 144.7002],
        [ 27.5752,  43.8031],
        [ 97.3310, 107.1942]], grad_fn=<AddmmBackward>)


In [74]:
# Compare with targets
print(targets)
#targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


Now we can define the model, optimizer and loss function exactly as before.

In [75]:
fit(100 , model , loss_fn, opt)

Training loss:  tensor(21.1775, grad_fn=<MseLossBackward>)


In [76]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70], [73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70], [73, 67, 43], [91, 88, 64], [87, 134, 58], [102, 43, 37], [69, 96, 70]], dtype='float32')
# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], [22, 37], [103, 119], 
                    [56, 70], [81, 101], [119, 133], [22, 37], [103, 119], 
                    [56, 70], [81, 101], [119, 133], [22, 37], [103, 119]], dtype='float32')

x_shape = inputs.shape

In [77]:
# weights and biases
weights = np.random.rand(2, 3)
biases = np.random.rand(15, 2)
print("Weights  :  ", weights, sep='\n')
print("Biases  :  ", biases, sep="\n")

Weights  :  
[[0.98936605 0.83483543 0.22719947]
 [0.01721337 0.3809413  0.40602036]]
Biases  :  
[[0.19362099 0.83139655]
 [0.38277295 0.62878917]
 [0.01459183 0.30568334]
 [0.58025494 0.91827318]
 [0.79490981 0.51945183]
 [0.2026691  0.29850829]
 [0.53139414 0.78533795]
 [0.16150794 0.39880053]
 [0.64915393 0.26922375]
 [0.5781346  0.5812849 ]
 [0.39213989 0.55009083]
 [0.72353984 0.3492637 ]
 [0.27355861 0.96837253]
 [0.46708099 0.97111864]
 [0.22310623 0.12221717]]


In [78]:
# Define the model
def model(x):
    return x @ np.transpose(weights) + biases

In [79]:
# Generate predictions
pred = model(inputs)

# Compare with targets
print("Predictions : ", pred, sep="\n")
print("\nTargets : ", targets, sep="\n")

Predictions : 
[[138.12089393  45.06991516]
 [178.42136779  61.70334333]
 [211.13495537  76.39856181]
 [145.79989625  34.07726597]
 [165.10933177  66.69896452]
 [138.12994204  44.5370269 ]
 [178.56998897  61.85989211]
 [211.28187149  76.491679  ]
 [145.86879524  33.42821654]
 [164.89255656  66.76079759]
 [138.31941283  44.78860944]
 [178.76213468  61.42381786]
 [211.39392216  77.061251  ]
 [145.6867223   34.13011143]
 [164.53752819  66.30172987]]

Targets : 
[[ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]
 [ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]
 [ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]]


In [80]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return np.sum(diff * diff) / len(diff)

In [81]:
# Compute loss
loss = mse(pred, targets)
print(loss)

10410.91585201959


In [82]:
# compute gradients
biases_grad = (pred - targets) * 2/x_shape[0]
weights_grad = (np.matmul(np.transpose((pred - targets)), inputs)) * 2/x_shape[0]

print("Weights gradient  :  ",weights_grad, sep="\n")
print("\nBiases gradient  :  ",biases_grad, sep="\n")

Weights gradient  :  
[[15920.42706595 15087.36280851  9616.22360809]
 [-5702.68660998 -7146.76049168 -4261.5749247 ]]

Biases gradient  :  
[[10.94945252 -3.32401131]
 [12.98951571 -5.23955422]
 [12.28466072 -7.54685842]
 [16.50665283 -0.38969787]
 [ 8.28124424 -6.9734714 ]
 [10.95065894 -3.39506308]
 [13.00933186 -5.21868105]
 [12.30424953 -7.5344428 ]
 [16.51583937 -0.47623779]
 [ 8.25234087 -6.96522699]
 [10.97592171 -3.36151874]
 [13.03495129 -5.27682428]
 [12.31918962 -7.45849987]
 [16.49156297 -0.38265181]
 [ 8.20500376 -7.02643602]]


In [83]:
# Adjust weights
weights -= weights_grad * 1e-4
biases -= biases_grad * 1e-4

In [84]:
print("Weights  :  ", weights, sep='\n')
print("\nBiases  :  ", biases, sep="\n")

Weights  :  
[[-0.60267665 -0.67390085 -0.73442289]
 [ 0.58748203  1.09561735  0.83217785]]

Biases  :  
[[0.19252604 0.83172895]
 [0.381474   0.62931312]
 [0.01336336 0.30643803]
 [0.57860427 0.91831215]
 [0.79408169 0.52014917]
 [0.20157403 0.29884779]
 [0.5300932  0.78585981]
 [0.16027752 0.39955398]
 [0.64750234 0.26927137]
 [0.57730937 0.58198142]
 [0.3910423  0.55042698]
 [0.72223635 0.34979138]
 [0.27232669 0.96911838]
 [0.46543184 0.97115691]
 [0.22228573 0.12291982]]


In [85]:
# Calculate loss
pred = model(inputs)
loss = mse(pred, targets)
print(loss)

63396.16567525911


In [86]:
# repeating same for 200 times
for i in range(200):
    pred = model(inputs)
    loss = mse(pred, targets)
    
    biases_grad = ((((inputs@np.transpose(weights)) + biases) - targets)) * 2/x_shape[0]
    weights_grad = (np.matmul(np.transpose((((inputs@np.transpose(weights)) + biases) - targets)), inputs)) * 2/x_shape[0]

    weights -= weights_grad * 1e-4
    biases -= biases_grad * 1e-4

In [87]:
# Calculate loss
pred = model(inputs)
loss = mse(pred, targets)
print(loss)

3.628399338406837e+169


In [88]:
# Print predictions
print(pred)

[[-4.46912783e+84  1.88097861e+84]
 [-5.87263960e+84  2.47169244e+84]
 [-6.94011979e+84  2.92097639e+84]
 [-4.44956874e+84  1.87274653e+84]
 [-5.62974770e+84  2.36946344e+84]
 [-4.46912783e+84  1.88097861e+84]
 [-5.87263960e+84  2.47169244e+84]
 [-6.94011979e+84  2.92097639e+84]
 [-4.44956874e+84  1.87274653e+84]
 [-5.62974770e+84  2.36946344e+84]
 [-4.46912783e+84  1.88097861e+84]
 [-5.87263960e+84  2.47169244e+84]
 [-6.94011979e+84  2.92097639e+84]
 [-4.44956874e+84  1.87274653e+84]
 [-5.62974770e+84  2.36946344e+84]]


In [89]:
# Print targets
print(targets)

[[ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]
 [ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]
 [ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]]
