# Dependencies

In [24]:
import numpy as np

import torch
from torch.optim import SGD
from torch.nn import Linear, MSELoss

import matplotlib.pyplot as plt

from sklearn import datasets

# Linear Regression

## Implementation 1
<ul>
    <li style="font-family: consolas;">prediction : <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">gradient &nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">loss &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">update &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    
</ul>

In [25]:
# f(x) = 2x
train_x = np.array([1, 2, 3, 4, 5] , dtype= np.float32)
train_y = np.array([2, 4, 6, 8, 10], dtype= np.float32)

# initial weight
w = 0.0

# feed-forward
def forward(x):
    return w * x

# MSE loss
def loss(y_pred, train_y):
    return ((y_pred - train_y) ** 2).mean()

# backward
def gradient(x):
    # MSE   = 1/N * (w*x - y) ** 2
    # dl/dw = 1/N * 2x * (y - w*x)
    return (2 * x * (w * x - train_y)).mean()

# hyper parameters
lr = 0.01
epoch = 20

for i in range(epoch):

    # forward
    y_pred = forward(train_x)

    # backward
    l = loss(y_pred, train_y)
    dw = gradient(train_x)

    # update parameters
    w -= lr * dw

    # test
    y_pred = forward(6)

    # log
    print(f"epoch: {i+1:>2} -> f(6)={y_pred:>7.3f} | loss={l:>9.5f} | w_old= {w + lr * dw:.3f} | step= {lr * dw:.5f} | w_new= {w:.3f}")

epoch:  1 -> f(6)=  2.640 | loss= 44.00000 | w_old= 0.000 | step= -0.44000 | w_new= 0.440
epoch:  2 -> f(6)=  4.699 | loss= 26.76960 | w_old= 0.440 | step= -0.34320 | w_new= 0.783
epoch:  3 -> f(6)=  6.305 | loss= 16.28662 | w_old= 0.783 | step= -0.26770 | w_new= 1.051
epoch:  4 -> f(6)=  7.558 | loss=  9.90878 | w_old= 1.051 | step= -0.20880 | w_new= 1.260
epoch:  5 -> f(6)=  8.535 | loss=  6.02850 | w_old= 1.260 | step= -0.16287 | w_new= 1.423
epoch:  6 -> f(6)=  9.298 | loss=  3.66774 | w_old= 1.423 | step= -0.12704 | w_new= 1.550
epoch:  7 -> f(6)=  9.892 | loss=  2.23145 | w_old= 1.550 | step= -0.09909 | w_new= 1.649
epoch:  8 -> f(6)= 10.356 | loss=  1.35762 | w_old= 1.649 | step= -0.07729 | w_new= 1.726
epoch:  9 -> f(6)= 10.718 | loss=  0.82597 | w_old= 1.726 | step= -0.06029 | w_new= 1.786
epoch: 10 -> f(6)= 11.000 | loss=  0.50252 | w_old= 1.786 | step= -0.04702 | w_new= 1.833
epoch: 11 -> f(6)= 11.220 | loss=  0.30573 | w_old= 1.833 | step= -0.03668 | w_new= 1.870
epoch: 12 

## Implementation 2
<ul>
    <li style="font-family: consolas;">prediction : <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">gradient &nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">loss &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">update &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
</ul>

In [26]:
# f(x) = 2x
train_x = torch.tensor([1, 2, 3, 4, 5] , dtype= torch.float32)
train_y = torch.tensor([2, 4, 6, 8, 10], dtype= torch.float32)

# initial weight
w = torch.tensor(0.0, dtype= torch.float32, requires_grad= True)

# feed-forward
def forward(x):
    return w * x

# MSE loss
def loss(y_pred, train_y):
    return ((y_pred - train_y) ** 2).mean()

# hyper parameters
lr = 0.01
epoch = 20

for i in range(epoch):

    # forward
    y_pred = forward(train_x)

    # backward
    l = loss(train_y, y_pred)
    l.backward()
    
    # update parameters
    with torch.no_grad():
        w -= lr * w.grad

    # test
    y_pred = forward(6)

    # log
    print(f"epoch: {i+1:>2} -> f(6)={y_pred:>7.3f} | loss={l:>9.5f} | w_old= {w + lr * w.grad:.3f} | step= {lr * w.grad:.5f} | w_new= {w:.3f}")

    # remove previous gradients
    w.grad.zero_()

epoch:  1 -> f(6)=  2.640 | loss= 44.00000 | w_old= 0.000 | step= -0.44000 | w_new= 0.440
epoch:  2 -> f(6)=  4.699 | loss= 26.76960 | w_old= 0.440 | step= -0.34320 | w_new= 0.783
epoch:  3 -> f(6)=  6.305 | loss= 16.28662 | w_old= 0.783 | step= -0.26770 | w_new= 1.051
epoch:  4 -> f(6)=  7.558 | loss=  9.90878 | w_old= 1.051 | step= -0.20880 | w_new= 1.260
epoch:  5 -> f(6)=  8.535 | loss=  6.02850 | w_old= 1.260 | step= -0.16287 | w_new= 1.423
epoch:  6 -> f(6)=  9.298 | loss=  3.66774 | w_old= 1.423 | step= -0.12704 | w_new= 1.550
epoch:  7 -> f(6)=  9.892 | loss=  2.23145 | w_old= 1.550 | step= -0.09909 | w_new= 1.649
epoch:  8 -> f(6)= 10.356 | loss=  1.35762 | w_old= 1.649 | step= -0.07729 | w_new= 1.726
epoch:  9 -> f(6)= 10.718 | loss=  0.82597 | w_old= 1.726 | step= -0.06029 | w_new= 1.786
epoch: 10 -> f(6)= 11.000 | loss=  0.50252 | w_old= 1.786 | step= -0.04702 | w_new= 1.833
epoch: 11 -> f(6)= 11.220 | loss=  0.30573 | w_old= 1.833 | step= -0.03668 | w_new= 1.870
epoch: 12 

## Implementation 3
<ul>
    <li style="font-family: consolas;">prediction : <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">gradient &nbsp;&nbsp;: <span style="color: cyan">Auto</span></li>
    <li style="font-family: consolas;">loss &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">Auto</span></li>
    <li style="font-family: consolas;">update &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">Auto</span></li>
</ul>

In [27]:
# f(x) = 2x
train_x = torch.tensor([1, 2, 3, 4, 5] , dtype= torch.float32)
train_y = torch.tensor([2, 4, 6, 8, 10], dtype= torch.float32)

# initial weight
w = torch.tensor(0.0, dtype= torch.float32, requires_grad= True)

# feed-forward
def forward(x):
    return w * x

# hyper parameters
lr = 0.01
epoch = 20
loss = MSELoss()
optimizer = SGD([w], lr)

for i in range(epoch):

    # forward
    y_pred = forward(train_x)

    # backward
    l = loss(y_pred, train_y)
    l.backward()

    # update parameters
    optimizer.step()

    # test
    y_pred = forward(6)

    # log
    print(f"epoch: {i+1:>2} -> f(6)={y_pred:>7.3f} | loss={l:>9.5f} | w_old= {w + lr * w.grad:.3f} | step= {lr * w.grad:.5f} | w_new= {w:.3f}")
    
    # remove previous gradients
    optimizer.zero_grad()

epoch:  1 -> f(6)=  2.640 | loss= 44.00000 | w_old= 0.000 | step= -0.44000 | w_new= 0.440
epoch:  2 -> f(6)=  4.699 | loss= 26.76960 | w_old= 0.440 | step= -0.34320 | w_new= 0.783
epoch:  3 -> f(6)=  6.305 | loss= 16.28662 | w_old= 0.783 | step= -0.26770 | w_new= 1.051
epoch:  4 -> f(6)=  7.558 | loss=  9.90878 | w_old= 1.051 | step= -0.20880 | w_new= 1.260
epoch:  5 -> f(6)=  8.535 | loss=  6.02850 | w_old= 1.260 | step= -0.16287 | w_new= 1.423
epoch:  6 -> f(6)=  9.298 | loss=  3.66774 | w_old= 1.423 | step= -0.12704 | w_new= 1.550
epoch:  7 -> f(6)=  9.892 | loss=  2.23145 | w_old= 1.550 | step= -0.09909 | w_new= 1.649
epoch:  8 -> f(6)= 10.356 | loss=  1.35762 | w_old= 1.649 | step= -0.07729 | w_new= 1.726
epoch:  9 -> f(6)= 10.718 | loss=  0.82597 | w_old= 1.726 | step= -0.06029 | w_new= 1.786
epoch: 10 -> f(6)= 11.000 | loss=  0.50252 | w_old= 1.786 | step= -0.04702 | w_new= 1.833
epoch: 11 -> f(6)= 11.220 | loss=  0.30573 | w_old= 1.833 | step= -0.03668 | w_new= 1.870
epoch: 12 

## Implementation 4
<ul>
    <li style="font-family: consolas;">prediction : <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">gradient &nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">loss &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">update &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
</ul>

In [28]:
# f(x) = 2x

# row: num of samples - column: num of features
train_x = torch.tensor([[1], [2], [3], [4], [5]] , dtype= torch.float32)
train_y = torch.tensor([[2], [4], [6], [8], [10]], dtype= torch.float32)

model = Linear(train_x.shape[1], train_y.shape[1], bias= False)

# initial weight
with torch.no_grad():
    model.weight.fill_(0.0)

# hyper parameters
lr = 0.01
epoch = 20
loss = MSELoss()
optimizer = SGD(model.parameters(), lr)

for i in range(epoch):

    # forward
    y_pred = model(train_x)

    # backward
    l = loss(y_pred, train_y)
    l.backward()

    # update parameters
    optimizer.step()

    # test
    y_pred = model(torch.tensor([[6]], dtype= torch.float32))

    # log
    print(f"epoch: {i+1:>2} -> f(6)={y_pred.item():>7.3f} | loss={l:>9.5f} | w_old= {model.weight.item() + lr * model.weight.grad.item():>6.3f} | step= {lr * model.weight.grad.item():.5f} | w_new= {model.weight.item():.3f}")

    # remove previous gradients
    optimizer.zero_grad()

epoch:  1 -> f(6)=  2.640 | loss= 44.00000 | w_old= -0.000 | step= -0.44000 | w_new= 0.440
epoch:  2 -> f(6)=  4.699 | loss= 26.76960 | w_old=  0.440 | step= -0.34320 | w_new= 0.783
epoch:  3 -> f(6)=  6.305 | loss= 16.28662 | w_old=  0.783 | step= -0.26770 | w_new= 1.051
epoch:  4 -> f(6)=  7.558 | loss=  9.90878 | w_old=  1.051 | step= -0.20880 | w_new= 1.260
epoch:  5 -> f(6)=  8.535 | loss=  6.02850 | w_old=  1.260 | step= -0.16287 | w_new= 1.423
epoch:  6 -> f(6)=  9.298 | loss=  3.66774 | w_old=  1.423 | step= -0.12704 | w_new= 1.550
epoch:  7 -> f(6)=  9.892 | loss=  2.23145 | w_old=  1.550 | step= -0.09909 | w_new= 1.649
epoch:  8 -> f(6)= 10.356 | loss=  1.35762 | w_old=  1.649 | step= -0.07729 | w_new= 1.726
epoch:  9 -> f(6)= 10.718 | loss=  0.82597 | w_old=  1.726 | step= -0.06029 | w_new= 1.786
epoch: 10 -> f(6)= 11.000 | loss=  0.50252 | w_old=  1.786 | step= -0.04702 | w_new= 1.833
epoch: 11 -> f(6)= 11.220 | loss=  0.30573 | w_old=  1.833 | step= -0.03668 | w_new= 1.870

## Implementation 5 [with custom model]
<ul>
    <li style="font-family: consolas;">prediction : <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">gradient &nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">loss &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
    <li style="font-family: consolas;">update &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">torch</span></li>
</ul>

In [29]:
# f(x) = 2x

# row: num of samples - column: num of features
train_x = torch.tensor([[1], [2], [3], [4], [5]] , dtype= torch.float32)
train_y = torch.tensor([[2], [4], [6], [8], [10]], dtype= torch.float32)

# custom model
class LinearRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()

        self.regressor = Linear(input_dim, output_dim, bias= False)
        
        # initial weight
        with torch.no_grad():
            self.regressor.weight.fill_(0.0)
    
    def forward(self, x):
        return self.regressor(x)

model = LinearRegression(train_x.shape[1], train_y.shape[1])

# hyper parameters
lr = 0.01
epoch = 20
loss = MSELoss()
optimizer = SGD(model.parameters(), lr)

for i in range(epoch):

    # forward
    y_pred = model(train_x)

    # backward
    l = loss(y_pred, train_y)
    l.backward()

    # update parameters
    optimizer.step()

    # test
    y_pred = model(torch.tensor([[6]], dtype= torch.float32))
    
    # log
    print(f"epoch: {i+1:>2} -> f(6)={y_pred.item():>7.3f} | loss={l:>9.5f} | w_old= {model.regressor.weight.item() + lr * model.regressor.weight.grad.item():>6.3f} | step= {lr * model.regressor.weight.grad.item():.5f} | w_new= {model.regressor.weight.item():.3f}")
    
    # remove previous gradients
    optimizer.zero_grad()

epoch:  1 -> f(6)=  2.640 | loss= 44.00000 | w_old= -0.000 | step= -0.44000 | w_new= 0.440
epoch:  2 -> f(6)=  4.699 | loss= 26.76960 | w_old=  0.440 | step= -0.34320 | w_new= 0.783
epoch:  3 -> f(6)=  6.305 | loss= 16.28662 | w_old=  0.783 | step= -0.26770 | w_new= 1.051
epoch:  4 -> f(6)=  7.558 | loss=  9.90878 | w_old=  1.051 | step= -0.20880 | w_new= 1.260
epoch:  5 -> f(6)=  8.535 | loss=  6.02850 | w_old=  1.260 | step= -0.16287 | w_new= 1.423
epoch:  6 -> f(6)=  9.298 | loss=  3.66774 | w_old=  1.423 | step= -0.12704 | w_new= 1.550
epoch:  7 -> f(6)=  9.892 | loss=  2.23145 | w_old=  1.550 | step= -0.09909 | w_new= 1.649
epoch:  8 -> f(6)= 10.356 | loss=  1.35762 | w_old=  1.649 | step= -0.07729 | w_new= 1.726
epoch:  9 -> f(6)= 10.718 | loss=  0.82597 | w_old=  1.726 | step= -0.06029 | w_new= 1.786
epoch: 10 -> f(6)= 11.000 | loss=  0.50252 | w_old=  1.786 | step= -0.04702 | w_new= 1.833
epoch: 11 -> f(6)= 11.220 | loss=  0.30573 | w_old=  1.833 | step= -0.03668 | w_new= 1.870

## Plot

In [30]:
# generate artificial data
n_samples, n_features = [100, 1]
x, y = datasets.make_regression(n_samples, n_features, noise= 5, random_state= 42)

# convert numpy.ndarray to torch.Tensor
train_x = torch.from_numpy(x.astype(np.float32))
train_y = torch.from_numpy(y.astype(np.float32)).view(-1, 1)

In [31]:
# custom model
class LinearRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim) -> None:
        super(LinearRegression, self).__init__()

        self.node = torch.nn.Linear(input_dim, output_dim, bias= False)
    
    def forward(self, x):
        return self.node(x)
    
model = LinearRegression(n_features, 1)

In [32]:
# plot stuff
W = torch.linspace(-100, 100, 500)
L = torch.zeros(size= (500, ))

for i, val in enumerate(W):
    with torch.no_grad():
        model.node.weight.fill_(val)
        L[i] = loss(model(train_x), train_y)

state = []

In [33]:
# initial weight
with torch.no_grad():
    model.node.weight.fill_(-25)

# hyper parameters
epoch = 21
lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr= lr)
loss = torch.nn.MSELoss()

for i in range(epoch):

    # forward
    y_pred = model(train_x)

    # backward
    l = loss(y_pred, train_y)
    l.backward()

    # save new y_pred every 5 epochs
    if i % 5 == 0:
        state.append([i, model.node.weight.item(), l.item(), y_pred.detach().numpy()])

    # update parameters
    optimizer.step()
    optimizer.zero_grad()

    # log
    print(f"epoch: {i:>2} -> loss: {l.item():>10.5f}")

epoch:  0 -> loss: 3847.11401
epoch:  1 -> loss: 3240.04541
epoch:  2 -> loss: 2729.26758
epoch:  3 -> loss: 2299.50806
epoch:  4 -> loss: 1937.91528
epoch:  5 -> loss: 1633.67761
epoch:  6 -> loss: 1377.69702
epoch:  7 -> loss: 1162.31921
epoch:  8 -> loss:  981.10413
epoch:  9 -> loss:  828.63281
epoch: 10 -> loss:  700.34589
epoch: 11 -> loss:  592.40747
epoch: 12 -> loss:  501.58997
epoch: 13 -> loss:  425.17764
epoch: 14 -> loss:  360.88556
epoch: 15 -> loss:  306.79138
epoch: 16 -> loss:  261.27737
epoch: 17 -> loss:  222.98265
epoch: 18 -> loss:  190.76219
epoch: 19 -> loss:  163.65236
epoch: 20 -> loss:  140.84265


In [None]:
# plot
rows = epoch // 5 + 1
fig, axs = plt.subplots(nrows= rows, ncols= 2, figsize= (10, 20), layout= 'compressed')

for row in range(rows):
    axs[row, 0].plot(train_x, train_y, 'ro')
    axs[row, 0].plot(train_x, state[row][3], 'b')
    axs[row, 0].set_title(f"epoch: {state[row][0]}")
    axs[row, 1].plot(state[row][1], state[row][2], 'ro')
    axs[row, 1].plot(W, L, 'b')
    axs[row, 1].set_title("loss function")

plt.show()