In [1]:
import torch
import numpy as np

In [2]:
#yield_apple  = w11 * temp + w12 * rainfall + w13 * humidity + b1
#yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [3]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [5]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.2274,  1.8687, -1.4486],
        [ 2.3208,  1.0258,  0.5520]], requires_grad=True)
tensor([2.3793, 0.7653], requires_grad=True)


In [6]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

In [7]:
inputs @ w.t() + b

tensor([[ 48.6910, 262.6494],
        [ 53.4193, 337.5576],
        [148.9791, 372.1505],
        [  5.9410, 302.0215],
        [ 64.6791, 298.0179]], grad_fn=<AddBackward0>)

In [8]:
#Our model is simply a function that performs a matrix multiplication of the inputs and the weights w (transposed) 
#and adds the bias b (replicated for each observation).
def model(x):
    return x @ w.t() + b

In [9]:
#Sending Inputs to model and storing in predictions
preds = model(inputs)
print(preds)

tensor([[ 48.6910, 262.6494],
        [ 53.4193, 337.5576],
        [148.9791, 372.1505],
        [  5.9410, 302.0215],
        [ 64.6791, 298.0179]], grad_fn=<AddBackward0>)


In [10]:
#Comparing with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [11]:
diff = preds - targets
torch.sum(diff*diff) / diff.numel()  #diff.numel() returns the total variables in diff #10 in this case

tensor(25598.9336, grad_fn=<DivBackward0>)

## Loss Function

In [12]:
#Mean Squared Error
def mse(t1, t2):
    diff = t1-t2
    return (torch.sum(diff*diff) / diff.numel())

In [13]:
#Compute Loss
loss = mse(preds, targets)
print(loss)

tensor(25598.9336, grad_fn=<DivBackward0>)


## Compute Gradient

#### With PyTorch, we can automatically compute the gradient or derivative of the loss w.r.t. to the weights and biases because they have requires_grad set to True. We'll see how this is useful in just a moment.

In [14]:
# Compute gradients
loss.backward()

#### The gradients are stored in the .grad property of the respective tensors. Note that the derivative of the loss w.r.t. the weights matrix is itself a matrix with the same dimensions.

In [15]:
# Gradients for weights
print(w)
print(w.grad)  #Derivative if loss wrt weight matrix

tensor([[-0.2274,  1.8687, -1.4486],
        [ 2.3208,  1.0258,  0.5520]], requires_grad=True)
tensor([[ -943.4759,  -653.7902,  -723.4622],
        [19156.1348, 18870.4785, 11926.2783]])


In [16]:
print(b)
print(b.grad)

tensor([2.3793, 0.7653], requires_grad=True)
tensor([-11.8581, 222.4794])


## Adjust weights and bias to reduce Loss

In [17]:
w, w.grad

(tensor([[-0.2274,  1.8687, -1.4486],
         [ 2.3208,  1.0258,  0.5520]], requires_grad=True),
 tensor([[ -943.4759,  -653.7902,  -723.4622],
         [19156.1348, 18870.4785, 11926.2783]]))

In [18]:
print(w,b)
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5

tensor([[-0.2274,  1.8687, -1.4486],
        [ 2.3208,  1.0258,  0.5520]], requires_grad=True) tensor([2.3793, 0.7653], requires_grad=True)


In [19]:
w,b

(tensor([[-0.2179,  1.8752, -1.4414],
         [ 2.1293,  0.8371,  0.4327]], requires_grad=True),
 tensor([2.3794, 0.7630], requires_grad=True))

#### We multiply the gradients with a very small number (10^-5 in this case) to ensure that we don't modify the weights by a very large amount. We want to take a small step in the downhill direction of the gradient, not a giant leap. This number is called the learning rate of the algorithm.

#### We use torch.no_grad to indicate to PyTorch that we shouldn't track, calculate, or modify gradients while updating the weights and biases.

In [20]:
# Let's verify that the loss is actually lower
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(17702.3867, grad_fn=<DivBackward0>)


##### Before we proceed, we reset the gradients to zero by invoking the .zero_() method. We need to do this because PyTorch accumulates gradients. Otherwise, the next time we invoke .backward on the loss, the new gradient values are added to the existing gradients, which may lead to unexpected results.

In [23]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


# Training the model using Gradient Descent

In [24]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 50.1290, 230.8917],
        [ 55.3163, 295.8844],
        [151.0957, 323.2788],
        [  7.4523, 269.9530],
        [ 66.4643, 258.3339]], grad_fn=<AddBackward0>)


In [25]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(17702.3867, grad_fn=<DivBackward0>)


In [26]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ -795.6608,  -497.1357,  -626.0848],
        [15881.8154, 15363.9932,  9759.9502]])
tensor([-10.1085, 183.6684])


In [27]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [28]:
print(w)
print(b)

tensor([[-0.2100,  1.8802, -1.4351],
        [ 1.9704,  0.6835,  0.3351]], requires_grad=True)
tensor([2.3795, 0.7612], requires_grad=True)


In [29]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(12376.3613, grad_fn=<DivBackward0>)


# Training for multiple epoches

In [48]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [49]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(66.0910, grad_fn=<DivBackward0>)


In [50]:
# Predictions
preds

tensor([[ 58.4366,  70.8894],
        [ 74.4872,  97.5370],
        [134.2136, 139.1536],
        [ 25.2869,  39.3087],
        [ 86.5071, 112.4957]], grad_fn=<AddBackward0>)

In [51]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

# Linear Regression using PyTorch built-in

In [52]:
import torch.nn as nn

In [53]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [54]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.],
        [ 88., 134.,  59.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 73.,  66.,  44.],
        [ 92.,  87.,  64.],
        [ 87., 135.,  57.],
        [103.,  43.,  36.],
        [ 68.,  97.,  70.]])

## Datasets and DataLoader

In [55]:
from torch.utils.data import TensorDataset

In [56]:
# Define dataset
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

The TensorDataset allows us to access a small section of the training data using the array indexing notation ([0:3] in the above code). It returns a tuple with two elements. The first element contains the input variables for the selected rows, and the second contains the targets.

We'll also create a DataLoader, which can split the data into batches of a predefined size while training. It also provides other utilities like shuffling and random sampling of the data.

In [57]:
from torch.utils.data import DataLoader

In [58]:
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [59]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 68.,  96.,  71.],
        [ 68.,  97.,  70.],
        [ 88., 134.,  59.]])
tensor([[ 22.,  37.],
        [103., 119.],
        [104., 118.],
        [102., 120.],
        [118., 132.]])


# nn.Linear
### Instead of initializing the weights & biases manually, we can define the model using the nn.Linear class from PyTorch, which does it automatically.

In [60]:
# Define model
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.4755,  0.4488,  0.3343],
        [-0.0903, -0.4431, -0.4009]], requires_grad=True)
Parameter containing:
tensor([ 0.4806, -0.1805], requires_grad=True)


PyTorch models also have a helpful .parameters method, which returns a list containing all the weights and bias matrices present in the model. For our linear regression model, we have one weight matrix and one bias matrix.

In [61]:
# Parameters
list(model.parameters())

[Parameter containing:
 tensor([[-0.4755,  0.4488,  0.3343],
         [-0.0903, -0.4431, -0.4009]], requires_grad=True),
 Parameter containing:
 tensor([ 0.4806, -0.1805], requires_grad=True)]

In [62]:
# Generate predictions
preds = model(inputs)
preds

tensor([[ 10.2090, -53.7016],
        [ 18.0935, -73.0517],
        [ 38.6349, -90.6690],
        [-16.3581, -43.2804],
        [ 34.1512, -77.0151],
        [  9.2847, -53.3488],
        [ 17.9790, -73.0095],
        [ 38.4936, -91.1602],
        [-15.4338, -43.6332],
        [ 34.9610, -77.3257],
        [ 10.0945, -53.6594],
        [ 17.1692, -72.6989],
        [ 38.7494, -90.7112],
        [-17.1679, -42.9699],
        [ 35.0755, -77.3679]], grad_fn=<AddmmBackward>)

# Loss Function
### Instead of defining a loss function manually, we can use the built-in loss function mse_loss.

In [63]:
# Import nn.functional
import torch.nn.functional as F

In [64]:
# Define loss function
loss_fn = F.mse_loss

In [65]:
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(15912.9043, grad_fn=<MseLossBackward>)


## Optimizer

In [66]:
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

## Training the Model

In [67]:
# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    
    # Repeat for given number of epochs
    for epoch in range(num_epochs):
        
        # Train with batches of data
        for xb,yb in train_dl:
            
            # 1. Generate predictions
            pred = model(xb)
            
            # 2. Calculate loss
            loss = loss_fn(pred, yb)
            
            # 3. Compute gradients
            loss.backward()
            
            # 4. Update parameters using gradients
            opt.step()
            
            # 5. Reset the gradients to zero
            opt.zero_grad()
        
        # Print the progress
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [68]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 359.4655
Epoch [20/100], Loss: 467.5421
Epoch [30/100], Loss: 215.8321
Epoch [40/100], Loss: 138.5650
Epoch [50/100], Loss: 102.1024
Epoch [60/100], Loss: 39.9891
Epoch [70/100], Loss: 43.9483
Epoch [80/100], Loss: 18.3223
Epoch [90/100], Loss: 43.1550
Epoch [100/100], Loss: 24.8045


In [69]:
# Generate predictions
preds = model(inputs)
preds

tensor([[ 57.3986,  71.8694],
        [ 81.3312,  97.4459],
        [119.0937, 136.6812],
        [ 22.8314,  46.6983],
        [ 99.5592, 107.9231],
        [ 56.1773,  70.9225],
        [ 81.0449,  96.9149],
        [119.3170, 136.9957],
        [ 24.0527,  47.6452],
        [100.4942, 108.3391],
        [ 57.1123,  71.3385],
        [ 80.1099,  96.4990],
        [119.3801, 137.2122],
        [ 21.8964,  46.2823],
        [100.7805, 108.8700]], grad_fn=<AddmmBackward>)

In [70]:
# Compare with targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

Indeed, the predictions are quite close to our targets. We have a trained a reasonably good model to predict crop yields for apples and oranges by looking at the average temperature, rainfall, and humidity in a region. We can use it to make predictions of crop yields for new regions by passing a batch containing a single row of input.

In [71]:
model(torch.tensor([[75, 63, 44.]]))

tensor([[53.8040, 68.5485]], grad_fn=<AddmmBackward>)