In [1]:
import pandas as pd
import numpy as np
import torch



In [2]:
X = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

y = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')
X.shape, y.shape

((5, 3), (5, 2))

In [3]:
X = torch.from_numpy(X).float()
y = torch.from_numpy(y).float()
y /= 100
X.dtype, y.dtype

(torch.float32, torch.float32)

In [4]:
target_dim, input_dim = 2, X.shape[1]
W = torch.randn(target_dim, input_dim, requires_grad=True)
b = torch.randn(target_dim, requires_grad=True)

In [5]:
print(W)
print(b)

tensor([[0.1654, 1.1967, 0.7668],
        [2.5681, 0.1412, 0.4123]], requires_grad=True)
tensor([-1.1954,  0.4474], requires_grad=True)


In [6]:
def model(x):
    return x @ W.t() + b

In [7]:
preds = model(X)
preds

tensor([[124.0277, 215.1101],
        [168.2379, 272.9599],
        [218.0219, 266.7075],
        [ 95.5029, 283.7243],
        [178.7738, 220.0639]], grad_fn=<AddBackward0>)

In [8]:
y

tensor([[0.5600, 0.7000],
        [0.8100, 1.0100],
        [1.1900, 1.3300],
        [0.2200, 0.3700],
        [1.0300, 1.1900]])

In [9]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [10]:
# loss = mse(preds, y)
# loss

In [11]:
# loss.backward()

In [12]:
# W.grad

In [13]:
# with torch.no_grad():
#     W -= W.grad * 1e-5
#     b -= b.grad * 1e-5

In [14]:
# loss = mse(preds, y)
# loss

In [15]:
# W.grad.zero_()
# b.grad.zero_()
# print(W.grad)
# print(b.grad)

In [16]:
X, y, W

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.],
         [ 69.,  96.,  70.]]), tensor([[0.5600, 0.7000],
         [0.8100, 1.0100],
         [1.1900, 1.3300],
         [0.2200, 0.3700],
         [1.0300, 1.1900]]), tensor([[0.1654, 1.1967, 0.7668],
         [2.5681, 0.1412, 0.4123]], requires_grad=True))

In [17]:
for i in range(200):
    preds = model(X)
    loss = mse(preds, y)
    if i % 20 == 0: print(loss)
    loss.backward()
    with torch.no_grad():
        W -= W.grad * 1e-5
        b -= b.grad * 1e-5
        W.grad.zero_()
        b.grad.zero_()
    

tensor(44951.3125, grad_fn=<DivBackward0>)
tensor(960.2449, grad_fn=<DivBackward0>)
tensor(732.9926, grad_fn=<DivBackward0>)
tensor(569.3517, grad_fn=<DivBackward0>)
tensor(442.4097, grad_fn=<DivBackward0>)
tensor(343.9268, grad_fn=<DivBackward0>)
tensor(267.5171, grad_fn=<DivBackward0>)
tensor(208.2276, grad_fn=<DivBackward0>)
tensor(162.2172, grad_fn=<DivBackward0>)
tensor(126.5065, grad_fn=<DivBackward0>)


In [18]:
mse(model(X), y)

tensor(98.7849, grad_fn=<DivBackward0>)

In [19]:
model(X)

tensor([[-1.3580,  4.9597],
        [ 0.9897,  2.7195],
        [ 3.9040, -9.4076],
        [-9.6878, 24.5777],
        [ 6.8503, -9.8714]], grad_fn=<AddBackward0>)

In [20]:
W

tensor([[-0.1858,  0.0730,  0.1980],
        [ 0.4215, -0.2548, -0.2132]], requires_grad=True)

In [21]:
b

tensor([-1.2016,  0.4268], requires_grad=True)

In [22]:
preds = model(X)
loss = mse(preds, y)
print(loss)

tensor(98.7849, grad_fn=<DivBackward0>)


In [23]:
import torch.nn as nn

In [24]:
X = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

y = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

X = torch.from_numpy(X)
y = torch.from_numpy(y)

In [25]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader


In [26]:
train_ds = TensorDataset(X, y)
train_ds[:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [27]:
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [28]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 91.,  88.,  64.],
        [ 73.,  66.,  44.],
        [103.,  43.,  36.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.]])
tensor([[ 81., 101.],
        [ 57.,  69.],
        [ 20.,  38.],
        [ 57.,  69.],
        [ 80., 102.]])


In [29]:
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)


Parameter containing:
tensor([[-0.2769, -0.0572,  0.4659],
        [ 0.1356,  0.2986, -0.0422]], requires_grad=True)
Parameter containing:
tensor([-0.1357, -0.1884], requires_grad=True)


In [30]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.2769, -0.0572,  0.4659],
         [ 0.1356,  0.2986, -0.0422]], requires_grad=True),
 Parameter containing:
 tensor([-0.1357, -0.1884], requires_grad=True)]

In [31]:
preds = model(X)
preds

tensor([[-4.1506e+00,  2.7900e+01],
        [-5.5304e-01,  3.5724e+01],
        [-4.8733e+00,  4.9170e+01],
        [-1.3602e+01,  2.4919e+01],
        [ 7.8761e+00,  3.4877e+01],
        [-4.3702e+00,  2.7737e+01],
        [-2.9937e-02,  3.5384e+01],
        [-4.6843e+00,  4.9263e+01],
        [-1.3383e+01,  2.5082e+01],
        [ 8.6188e+00,  3.4699e+01],
        [-3.6275e+00,  2.7559e+01],
        [-7.7271e-01,  3.5561e+01],
        [-5.3964e+00,  4.9511e+01],
        [-1.4345e+01,  2.5096e+01],
        [ 8.0957e+00,  3.5040e+01]], grad_fn=<AddmmBackward>)

In [32]:
import torch.nn.functional as F

In [33]:
loss_fn = F.mse_loss
loss = loss_fn(model(X), y)
loss

tensor(5598.3745, grad_fn=<MseLossBackward>)

In [34]:
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [35]:
def fit(num_epochs, model, loss_f, optimiser, train_loader):
    for epoch in range(num_epochs):
        
        for xb, yb in train_loader:
            pred = model(xb)
            loss = loss_f(pred, yb)
            loss.backward()
            optimiser.step()
            optimiser.zero_grad()
        if (epoch + 1) % 10 == 0:
            print ("epoch {}, loss {}".format(epoch + 1, loss.item()))

In [36]:
fit(100, model, loss_fn, opt, train_dl)

epoch 10, loss 254.0002899169922
epoch 20, loss 215.2991485595703
epoch 30, loss 55.84253692626953
epoch 40, loss 74.88805389404297
epoch 50, loss 44.941749572753906
epoch 60, loss 41.134117126464844
epoch 70, loss 41.69641876220703
epoch 80, loss 43.10174560546875
epoch 90, loss 26.26836585998535
epoch 100, loss 30.720767974853516
