📝 **Author:** Amirhossein Heydari - 📧 **Email:** amirhosseinheydari78@gmail.com - 📍 **Linktree:** [linktr.ee/mr_pylin](https://linktr.ee/mr_pylin)

---

# Dependencies

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn import datasets

# Linear Regression
<div style="display: flex; margin-top: 50px;">
    <div style="width: 20%;">
        <table style="margin-left: auto; margin-right: auto;">
            <caption>Dataset</caption>
            <tr>
                <th>#</th>
                <th><span style="color: cyan;">x<sub>1</span></th>
                <th><span style="color: #FF9999;">y</span></th>
            </tr>
            <tr>
                <th><span style="color: #99DD00">1</span></th>
                <td>1</td>
                <td>2</td>
            </tr>
            <tr>
                <th><span style="color: #99DD00">2</span></th>
                <td>2</td>
                <td>4</td>
            </tr>
            <tr>
                <th><span style="color: #99DD00">3</span></th>
                <td>3</td>
                <td>6</td>
            </tr>
            <tr>
                <th><span style="color: #99DD00">4</span></th>
                <td>4</td>
                <td>8</td>
            </tr>
            <tr>
                <th><span style="color: #99DD00">5</span></th>
                <td>5</td>
                <td>10</td>
            </tr>
            <tr>
                <th><span style="color: #99DD00">6</span></th>
                <td>6</td>
                <td>12</td>
            </tr>
        </table>
    </div>
    <div style="width: 80%;">
        <figure style="text-align: center;">
            <img src="../assets/images/original/linear-regression.svg" alt="linear-regression.svg" style="width: 100%;">
            <figcaption style="text-align: center;">Linear Regression Model</figcaption>
        </figure>
    </div>
</div>

In [2]:
dataset = torch.tensor(
    [
        [1, 2],
        [2, 4],
        [3, 6],
        [4, 8],
        [5, 10],
        [6, 12]
    ]
)

In [3]:
trainset = dataset[:4]

x_train = trainset[:, 0].type(torch.float32)
y_train = trainset[:, 1].type(torch.float32)

# log
print(f"x_train: {x_train}")
print(f"y_train: {y_train}")

x_train: tensor([1., 2., 3., 4.])
y_train: tensor([2., 4., 6., 8.])


In [4]:
testset = dataset[4:]

x_test = testset[:, 0].type(torch.float32)
y_test = testset[:, 1].type(torch.float32)

# log
print(f"x_test: {x_test}")
print(f"y_test: {y_test}")

x_test: tensor([5., 6.])
y_test: tensor([10., 12.])


In [None]:
# plot
plt.scatter(dataset[:, 0][:4], dataset[:, 1][:4], c='blue')
plt.scatter(dataset[:, 0][4:], dataset[:, 1][4:], c='red')
plt.title("f(x) = 2x")
plt.xlabel('x')
plt.ylabel('f(x)')
plt.show()

## Implementation 1
<ul>
    <li style="font-family: consolas;">feed-forward &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">compute gradient : <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">compute loss &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">update weights &nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    
</ul>

In [6]:
# initial weight
w = torch.tensor([0.0])

# feed-forward
def forward(x):
    return w * x

# MSE loss
def loss(y_pred, y_train):
    return ((y_pred - y_train) ** 2).mean()

# backward
def gradient(x, y_pred, y_train):
    # MSE   = 1/N * (w*x - y) ** 2
    # dl/dw = 1/N * 2x * (w*x - y)
    return (2 * x * (y_pred - y_train)).mean()

# hyper parameters
lr = 0.01
epoch = 20

for i in range(epoch):

    # forward
    y_pred = forward(x_train)

    # backward
    l = loss(y_pred, y_train)
    dw = gradient(x_train, y_pred, y_train)

    # update parameters
    w -= lr * dw

    # test
    y_pred = forward(x_test)

    # log
    print(f"epoch: {i:>2} -> loss={l:>9.5f} | w_old= {(w + lr * dw).item():.3f} | step= {(lr * dw).item():.5f} | w_new= {w.item():.3f} | y_test= {y_pred.round(decimals=4)}")

epoch:  0 -> loss= 30.00000 | w_old= 0.000 | step= -0.30000 | w_new= 0.300 | y_test= tensor([1.5000, 1.8000])
epoch:  1 -> loss= 21.67500 | w_old= 0.300 | step= -0.25500 | w_new= 0.555 | y_test= tensor([2.7750, 3.3300])
epoch:  2 -> loss= 15.66019 | w_old= 0.555 | step= -0.21675 | w_new= 0.772 | y_test= tensor([3.8587, 4.6305])
epoch:  3 -> loss= 11.31449 | w_old= 0.772 | step= -0.18424 | w_new= 0.956 | y_test= tensor([4.7799, 5.7359])
epoch:  4 -> loss=  8.17472 | w_old= 0.956 | step= -0.15660 | w_new= 1.113 | y_test= tensor([5.5629, 6.6755])
epoch:  5 -> loss=  5.90623 | w_old= 1.113 | step= -0.13311 | w_new= 1.246 | y_test= tensor([6.2285, 7.4742])
epoch:  6 -> loss=  4.26725 | w_old= 1.246 | step= -0.11314 | w_new= 1.359 | y_test= tensor([6.7942, 8.1531])
epoch:  7 -> loss=  3.08309 | w_old= 1.359 | step= -0.09617 | w_new= 1.455 | y_test= tensor([7.2751, 8.7301])
epoch:  8 -> loss=  2.22753 | w_old= 1.455 | step= -0.08175 | w_new= 1.537 | y_test= tensor([7.6838, 9.2206])
epoch:  9 

## Implementation 2
<ul>
    <li style="font-family: consolas;">feed-forward &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">compute gradient : <span style="color: cyan">PyTorch</span></li>
    <li style="font-family: consolas;">compute loss &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">update weights &nbsp;&nbsp;: <span style="color: red">Manual</span></li>
</ul>

In [7]:
# initial weight
w = torch.tensor([0.0], requires_grad=True)

# feed-forward
def forward(x):
    return w * x

# MSE loss
def loss(y_pred, y_train):
    return ((y_pred - y_train) ** 2).mean()

# hyper parameters
lr = 0.01
epoch = 20

for i in range(epoch):

    # forward
    y_pred = forward(x_train)

    # backward
    l = loss(y_pred, y_train)
    l.backward()

    # update parameters
    with torch.no_grad():
        w -= lr * w.grad

    # test
    with torch.no_grad():
        y_pred = forward(x_test)

    # log
    print(f"epoch: {i:>2} -> loss={l:>9.5f} | w_old= {(w + lr * w.grad).item():.3f} | step= {(lr * w.grad).item():.5f} | w_new= {w.item():.3f} | y_test= {y_pred.round(decimals=4).detach()}")

    # remove previous gradients
    w.grad.zero_()

epoch:  0 -> loss= 30.00000 | w_old= 0.000 | step= -0.30000 | w_new= 0.300 | y_test= tensor([1.5000, 1.8000])
epoch:  1 -> loss= 21.67500 | w_old= 0.300 | step= -0.25500 | w_new= 0.555 | y_test= tensor([2.7750, 3.3300])
epoch:  2 -> loss= 15.66019 | w_old= 0.555 | step= -0.21675 | w_new= 0.772 | y_test= tensor([3.8587, 4.6305])
epoch:  3 -> loss= 11.31449 | w_old= 0.772 | step= -0.18424 | w_new= 0.956 | y_test= tensor([4.7799, 5.7359])
epoch:  4 -> loss=  8.17472 | w_old= 0.956 | step= -0.15660 | w_new= 1.113 | y_test= tensor([5.5629, 6.6755])
epoch:  5 -> loss=  5.90623 | w_old= 1.113 | step= -0.13311 | w_new= 1.246 | y_test= tensor([6.2285, 7.4742])
epoch:  6 -> loss=  4.26725 | w_old= 1.246 | step= -0.11314 | w_new= 1.359 | y_test= tensor([6.7942, 8.1531])
epoch:  7 -> loss=  3.08309 | w_old= 1.359 | step= -0.09617 | w_new= 1.455 | y_test= tensor([7.2751, 8.7301])
epoch:  8 -> loss=  2.22753 | w_old= 1.455 | step= -0.08175 | w_new= 1.537 | y_test= tensor([7.6838, 9.2206])
epoch:  9 

## Implementation 3
<ul>
    <li style="font-family: consolas;">feed-forward &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: red">Manual</span></li>
    <li style="font-family: consolas;">compute gradient : <span style="color: cyan">PyTorch</span></li>
    <li style="font-family: consolas;">compute loss &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">PyTorch</span></li>
    <li style="font-family: consolas;">update weights &nbsp;&nbsp;: <span style="color: cyan">PyTorch</span></li>
</ul>

In [8]:
# initial weight
w = torch.tensor([0.0], requires_grad=True)

# feed-forward
def forward(x):
    return w * x

# hyper parameters
lr = 0.01
epoch = 20
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w], lr)

for i in range(epoch):

    # forward
    y_pred = forward(x_train)

    # backward
    l = criterion(y_pred, y_train)
    l.backward()

    # update parameters
    optimizer.step()

    # test
    with torch.no_grad():
        y_pred = forward(x_test)

    # log
    print(f"epoch: {i:>2} -> loss={l:>9.5f} | w_old= {(w + lr * w.grad).item():.3f} | step= {(lr * w.grad).item():.5f} | w_new= {w.item():.3f} | y_test= {y_pred.round(decimals=4).detach()}")

    # remove previous gradients
    optimizer.zero_grad()

epoch:  0 -> loss= 30.00000 | w_old= 0.000 | step= -0.30000 | w_new= 0.300 | y_test= tensor([1.5000, 1.8000])
epoch:  1 -> loss= 21.67500 | w_old= 0.300 | step= -0.25500 | w_new= 0.555 | y_test= tensor([2.7750, 3.3300])
epoch:  2 -> loss= 15.66019 | w_old= 0.555 | step= -0.21675 | w_new= 0.772 | y_test= tensor([3.8587, 4.6305])
epoch:  3 -> loss= 11.31449 | w_old= 0.772 | step= -0.18424 | w_new= 0.956 | y_test= tensor([4.7799, 5.7359])
epoch:  4 -> loss=  8.17472 | w_old= 0.956 | step= -0.15660 | w_new= 1.113 | y_test= tensor([5.5629, 6.6755])
epoch:  5 -> loss=  5.90623 | w_old= 1.113 | step= -0.13311 | w_new= 1.246 | y_test= tensor([6.2285, 7.4742])
epoch:  6 -> loss=  4.26725 | w_old= 1.246 | step= -0.11314 | w_new= 1.359 | y_test= tensor([6.7942, 8.1531])
epoch:  7 -> loss=  3.08309 | w_old= 1.359 | step= -0.09617 | w_new= 1.455 | y_test= tensor([7.2751, 8.7301])
epoch:  8 -> loss=  2.22753 | w_old= 1.455 | step= -0.08175 | w_new= 1.537 | y_test= tensor([7.6838, 9.2206])
epoch:  9 

## Implementation 4
<ul>
    <li style="font-family: consolas;">feed-forward &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">PyTorch</span></li>
    <li style="font-family: consolas;">compute gradient : <span style="color: cyan">PyTorch</span></li>
    <li style="font-family: consolas;">compute loss &nbsp;&nbsp;&nbsp;&nbsp;: <span style="color: cyan">PyTorch</span></li>
    <li style="font-family: consolas;">update weights &nbsp;&nbsp;: <span style="color: cyan">PyTorch</span></li>
</ul>

In [9]:
# reshape dataset [row: num of samples - column: num of features]
x_train, x_test, y_train, y_test = map(lambda x: x.reshape(-1, 1), [x_train, x_test, y_train, y_test])

# linear regression model
model = torch.nn.Linear(in_features=1, out_features=1, bias=False)

# initial weight [educational purpose]
with torch.no_grad():
    model.weight.fill_(0.0)

# hyper parameters
lr = 0.01
epoch = 20
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr)

for i in range(epoch):

    # forward
    y_pred = model(x_train)

    # backward
    l = criterion(y_pred, y_train)
    l.backward()

    # update parameters
    optimizer.step()

    # test
    with torch.no_grad():
        y_pred = forward(x_test)

    # log
    print(f"epoch: {i:>2} -> loss={l:>9.5f} | w_old= {(model.weight + lr * model.weight.grad).item():.3f} | step= {(lr * model.weight.grad).item():.5f} | w_new= {model.weight.item():.3f} | y_test= {y_pred.squeeze().round(decimals=4).detach()}")

    # remove previous gradients
    optimizer.zero_grad()

epoch:  0 -> loss= 30.00000 | w_old= 0.000 | step= -0.30000 | w_new= 0.300 | y_test= tensor([ 9.6124, 11.5349])
epoch:  1 -> loss= 21.67500 | w_old= 0.300 | step= -0.25500 | w_new= 0.555 | y_test= tensor([ 9.6124, 11.5349])
epoch:  2 -> loss= 15.66019 | w_old= 0.555 | step= -0.21675 | w_new= 0.772 | y_test= tensor([ 9.6124, 11.5349])
epoch:  3 -> loss= 11.31449 | w_old= 0.772 | step= -0.18424 | w_new= 0.956 | y_test= tensor([ 9.6124, 11.5349])
epoch:  4 -> loss=  8.17472 | w_old= 0.956 | step= -0.15660 | w_new= 1.113 | y_test= tensor([ 9.6124, 11.5349])
epoch:  5 -> loss=  5.90623 | w_old= 1.113 | step= -0.13311 | w_new= 1.246 | y_test= tensor([ 9.6124, 11.5349])
epoch:  6 -> loss=  4.26725 | w_old= 1.246 | step= -0.11314 | w_new= 1.359 | y_test= tensor([ 9.6124, 11.5349])
epoch:  7 -> loss=  3.08309 | w_old= 1.359 | step= -0.09617 | w_new= 1.455 | y_test= tensor([ 9.6124, 11.5349])
epoch:  8 -> loss=  2.22753 | w_old= 1.455 | step= -0.08175 | w_new= 1.537 | y_test= tensor([ 9.6124, 11

## Example: All In One

In [None]:
# generate artificial data
n_samples, n_features = [100, 1]

x, y = datasets.make_regression(n_samples, n_features, noise=5, random_state=42)

# convert numpy.ndarray to torch.Tensor
x_train = torch.from_numpy(x.astype(np.float32))
y_train = torch.from_numpy(y.astype(np.float32)).view(-1, 1)

# plot
plt.scatter(x, y)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.show()

In [11]:
# linear regression model
model = torch.nn.Linear(n_features, 1)

model

Linear(in_features=1, out_features=1, bias=True)

In [12]:
# plot stuff
W = torch.linspace(-100, 100, 500)
L = torch.zeros(size=(500, ))

for i, val in enumerate(W):
    with torch.no_grad():
        model.weight.fill_(val)
        L[i] = loss(model(x_train), y_train)

state = []

In [13]:
# initial weight [educational purpose]
with torch.no_grad():
    model.weight.fill_(-25)

# hyper parameters
epoch = 21
lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
loss = torch.nn.MSELoss()

# training loop
model.train()
for i in range(epoch):

    # forward
    y_pred = model(x_train)

    # backward
    l = loss(y_pred, y_train)
    l.backward()

    # save new y_pred every 5 epochs [plot stuff]
    if i % 5 == 0:
        state.append([i, model.weight.item(), l.item(), y_pred.detach().numpy()])

    # update parameters
    optimizer.step()
    optimizer.zero_grad()

    # log
    print(f"epoch: {i:>2} -> loss: {l.item():>10.5f}")

epoch:  0 -> loss: 3847.01245
epoch:  1 -> loss: 3232.73926
epoch:  2 -> loss: 2718.63623
epoch:  3 -> loss: 2288.07520
epoch:  4 -> loss: 1927.24048
epoch:  5 -> loss: 1624.64624
epoch:  6 -> loss: 1370.73364
epoch:  7 -> loss: 1157.54248
epoch:  8 -> loss:  978.43713
epoch:  9 -> loss:  827.88342
epoch: 10 -> loss:  701.26111
epoch: 11 -> loss:  594.71045
epoch: 12 -> loss:  505.00458
epoch: 13 -> loss:  429.44403
epoch: 14 -> loss:  365.76859
epoch: 15 -> loss:  312.08508
epoch: 16 -> loss:  266.80606
epoch: 17 -> loss:  228.60028
epoch: 18 -> loss:  196.35005
epoch: 19 -> loss:  169.11688
epoch: 20 -> loss:  146.11205


In [None]:
# plot
rows = epoch // 5 + 1
fig, axs = plt.subplots(nrows=rows, ncols=2, figsize=(10, 20), layout='compressed')

for row in range(rows):
    axs[row, 0].plot(x_train, y_train, 'ro')
    axs[row, 0].plot(x_train, state[row][3], 'b')
    axs[row, 0].set(title=f"epoch: {state[row][0]}", xlabel="x", ylabel="f(x)")
    axs[row, 1].plot(state[row][1], state[row][2], 'ro')
    axs[row, 1].plot(W, L, 'b')
    axs[row, 1].set(title="loss function", xlabel="w", ylabel="L(w)")

plt.show()