In [1]:
! python --version

Python 3.10.2


In [2]:
# importing libraries
import torch 

# using cube if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"device = {device}")

device = cpu


# Gradient Descent and Linear Regression

In [3]:
# imports
import numpy as np

In [4]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [5]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [6]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.7633, -0.3882, -0.4263],
        [ 1.7489, -0.0851,  0.6030]], requires_grad=True)
tensor([-1.3949, -0.1030], requires_grad=True)


In [7]:
def model(x):
    # w.t() for transpose
    # b is broadcasted
    # @ matrix multipliaction
    return x @ w.t() + b

In [8]:
preds = model(inputs)
print(preds)

tensor([[-101.4544,  147.7909],
        [-132.2981,  190.1463],
        [-144.5430,  175.6177],
        [-111.7154,  196.9325],
        [-121.1693,  154.6086]], grad_fn=<AddBackward0>)


In [9]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [10]:
# loss function: MSE
def mse(t1, t2):
    diff = t1 - t2 
    return torch.sum(diff * diff) / diff.numel()

In [11]:
# compute loss
loss = mse(preds, targets)
print(loss)

tensor(25053.5684, grad_fn=<DivBackward0>)


In [12]:
loss.backward()

In [13]:
# compute gradient
print(w)
print(w.grad)

tensor([[-0.7633, -0.3882, -0.4263],
        [ 1.7489, -0.0851,  0.6030]], requires_grad=True)
tensor([[-16587.8379, -18380.8906, -11269.2871],
        [  7253.7803,   5812.6323,   3986.4609]])


In [14]:
with torch.no_grad(): # scope where no grad is calculated
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5 


In [15]:
loss = mse(preds, targets)
print(loss)

tensor(25053.5684, grad_fn=<DivBackward0>)


In [16]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [17]:
# Train for 100 epochs
for i in range(1000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [18]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(3.4934, grad_fn=<DivBackward0>)


In [19]:
torch.abs(preds - targets)

tensor([[1.0997, 0.4497],
        [0.0299, 1.3967],
        [2.4708, 2.1584],
        [0.0653, 0.8305],
        [3.8984, 2.2172]], grad_fn=<AbsBackward0>)

In [20]:
preds, targets

(tensor([[ 57.0997,  70.4497],
         [ 81.0299,  99.6033],
         [121.4708, 135.1584],
         [ 21.9347,  37.8305],
         [ 99.1016, 116.7828]], grad_fn=<AddBackward0>),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

In [21]:
import torch.nn as nn

In [22]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [23]:
from torch.utils.data import TensorDataset

# define TensorDataset for loading data
train_ds = TensorDataset(inputs, targets)
train_ds[0: 3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [24]:
from torch.utils.data import DataLoader

# DataLoader for batch
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [25]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 88., 134.,  59.],
        [ 87., 134.,  58.],
        [101.,  44.,  37.],
        [ 91.,  88.,  64.],
        [ 69.,  96.,  70.]])
tensor([[118., 132.],
        [119., 133.],
        [ 21.,  38.],
        [ 81., 101.],
        [103., 119.]])


In [26]:
# Define model
model = nn.Linear(3, 2)
print(model)

Linear(in_features=3, out_features=2, bias=True)


In [27]:
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[ 0.4895, -0.3942, -0.3062],
        [ 0.5481, -0.5058,  0.2986]], requires_grad=True)
Parameter containing:
tensor([ 0.2135, -0.5437], requires_grad=True)


In [28]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.4895, -0.3942, -0.3062],
         [ 0.5481, -0.5058,  0.2986]], requires_grad=True),
 Parameter containing:
 tensor([ 0.2135, -0.5437], requires_grad=True)]

In [29]:
preds = model(inputs)
preds

tensor([[ -3.6301,  18.4135],
        [ -9.5267,  23.9262],
        [-27.7789,  -3.3245],
        [ 21.8609,  44.6554],
        [-25.2849,   9.6137],
        [ -2.7465,  19.4674],
        [ -9.4387,  24.7306],
        [-27.5956,  -2.4779],
        [ 20.9773,  43.6015],
        [-26.0806,   9.3642],
        [ -3.5421,  19.2179],
        [ -8.6431,  24.9801],
        [-27.8669,  -4.1288],
        [ 22.6565,  44.9049],
        [-26.1686,   8.5598]], grad_fn=<AddmmBackward0>)

In [30]:
# Import nn.functional
import torch.nn.functional as F

# Define loss function
loss_fn = F.mse_loss

loss = loss_fn(model(inputs), targets)
print(loss)

tensor(8874.5498, grad_fn=<MseLossBackward0>)


In [31]:
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [32]:
# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    # Repeat for the number of epochs
    for epoch in range(num_epochs):
        # Train with batches of data
        for xb, yb in train_dl:
            # 1. Genrate Prediction
            pred = model(xb)
            # 2. Calculate loss
            loss = loss_fn(pred, yb)
            # 3. Computing Gradients
            loss.backward()
            # 4. Update parameters using gradient
            opt.step()
            # 5. Reset the gradient to zero
            opt.zero_grad()
        # Print the progres
        if (epoch + 1) % 10 == 0:
            print(f"Epoch[{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

In [36]:
fit(100, model, loss_fn, opt, train_dl)

Epoch[10/100], Loss: 12.5337
Epoch[20/100], Loss: 7.7559
Epoch[30/100], Loss: 8.8463
Epoch[40/100], Loss: 5.2823
Epoch[50/100], Loss: 9.9194
Epoch[60/100], Loss: 7.0625
Epoch[70/100], Loss: 0.9762
Epoch[80/100], Loss: 3.4159
Epoch[90/100], Loss: 7.6235
Epoch[100/100], Loss: 3.6276


In [37]:
preds = model(inputs)
with torch.no_grad():
    print(torch.abs(preds - targets))

tensor([[1.1868, 0.4079],
        [0.8272, 1.0665],
        [2.8121, 1.1043],
        [0.0353, 1.3341],
        [4.9853, 1.9947],
        [1.0943, 0.3255],
        [0.3050, 2.0900],
        [3.9225, 2.6485],
        [2.2459, 1.4165],
        [5.1819, 0.0643],
        [0.2909, 1.3845],
        [3.1084, 1.1489],
        [4.2899, 0.1277],
        [1.1613, 0.7248],
        [2.7041, 1.9123]])


In [38]:
model(torch.tensor([[75, 63, 44.]]))

tensor([[53.2120, 67.3947]], grad_fn=<AddmmBackward0>)