<a href="https://colab.research.google.com/github/mraihanramadhan/Machine-Learning/blob/main/Week9_Chapter3_Linear%20Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Library

In [None]:
%matplotlib inline
import math
import time
import numpy as np
import torch
import random
from d2l import torch as d2l

from IPython import display
import torchvision
from torch.utils import data
from torchvision import transforms
from torch import nn

d2l.use_svg_display()

Linear Regression

Vectorization For Speed

In [12]:
n = 10000
a = torch.ones(n)
b = torch.ones(n)

In [13]:
class Timer:  #@save
    """Record multiple running times."""
    def __init__(self):
        self.times = []
        self.start()

    def start(self):
        """Start the timer."""
        self.tik = time.time()

    def stop(self):
        """Stop the timer and record the time in a list."""
        self.times.append(time.time() - self.tik)
        return self.times[-1]

    def avg(self):
        """Return the average time."""
        return sum(self.times) / len(self.times)

    def sum(self):
        """Return the sum of time."""
        return sum(self.times)

    def cumsum(self):
        """Return the accumulated time."""
        return np.array(self.times).cumsum().tolist()

In [14]:
c = torch.zeros(n)
timer = Timer()
for i in range(n):
    c[i] = a[i] + b[i]
f'{timer.stop():.5f} sec'

'0.08807 sec'

In [15]:
timer.start()
d = a + b
f'{timer.stop():.5f} sec'

'0.00014 sec'

The Normal Distribution and Squared Loss

In [16]:
def normal(x, mu, sigma):
    p = 1 / math.sqrt(2 * math.pi * sigma**2)
    return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)

In [None]:
# Use numpy again for visualization
x = np.arange(-7, 7, 0.01)

# Mean and standard deviation pairs
params = [(0, 1), (0, 2), (3, 1)]
d2l.plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x',
         ylabel='p(x)', figsize=(4.5, 2.5),
         legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])

Linear Regression Scratch

Generating The Dataset

In [18]:
def synthetic_data(w, b, num_examples):  #@save
    """Generate y = Xw + b + noise."""
    X = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

In [19]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

In [20]:
print('features:', features[0], '\nlabel:', labels[0])


features: tensor([-2.3164, -0.7222]) 
label: tensor([2.0259])


In [None]:
d2l.set_figsize()
# The semicolon is for displaying the plot only
d2l.plt.scatter(features[:, (1)].detach().numpy(),
                labels.detach().numpy(), 1);

Reading The Dataset

In [22]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    # The examples are read at random, in no particular order
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i:min(i +
                                                   batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]

In [23]:
batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[-0.1737, -0.2889],
        [-0.2117, -1.1520],
        [ 0.9959,  0.5439],
        [ 1.3548,  0.2116],
        [-0.2688, -0.7844],
        [-0.2812,  1.7249],
        [ 0.7900,  0.4125],
        [ 1.1198, -0.1361],
        [ 0.7047, -0.1821],
        [-0.1776,  1.6861]]) 
 tensor([[ 4.8539],
        [ 7.6917],
        [ 4.3561],
        [ 6.1903],
        [ 6.3237],
        [-2.2397],
        [ 4.3704],
        [ 6.9028],
        [ 6.2365],
        [-1.8910]])


Initializing Model Parameter

In [24]:
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

Defining The Model

In [25]:
def linreg(X, w, b):  #@save
    """The linear regression model."""
    return torch.matmul(X, w) + b

Defining The Loss Function

In [26]:
def squared_loss(y_hat, y):  #@save
    """Squared loss."""
    return (y_hat - y.reshape(y_hat.shape))**2 / 2

Deffining The Optimization Algorithm

In [27]:
def sgd(params, lr, batch_size):  #@save
    """Minibatch stochastic gradient descent."""
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

Training

In [28]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

In [29]:
for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)  # Minibatch loss in `X` and `y`
        # Compute gradient on `l` with respect to [`w`, `b`]
        l.sum().backward()
        sgd([w, b], lr, batch_size)  # Update parameters using their gradient
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

epoch 1, loss 0.035038
epoch 2, loss 0.000125
epoch 3, loss 0.000048


In [44]:
print(f'error in estimating w: {true_w - w.reshape(true_w.shape)}')
print(f'error in estimating b: {true_b - b}')

error in estimating w: tensor([ 0.0004, -0.0015], grad_fn=<SubBackward0>)
error in estimating b: tensor([0.0006], grad_fn=<RsubBackward1>)
