# 1. Linear Regression Implementation from Scratch

In [1]:
import torch
import random

## 1.1 Generating the dataset

The true parameters generating our dataset will be $w=[2,−3.4]^{T}$ and $b=4.2$, and our synthetic labels will be assigned according to the following linear model with the noise term ϵ.

In [2]:
def synthetic_data(w, b, num_examples):
    # Random X
    X = torch.normal(0, 1, (num_examples, len(w)))
    # Calculate Y
    y = torch.matmul(X, w) + b
    # Add noise
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

print(f'Example\nFeatures: {features[0]}\nLabel: {labels[0]}')

Example
Features: tensor([-0.3170,  1.4806])
Label: tensor([-1.4679])


## 1.2 Reading the dataset

In [3]:
def data_iter(batch_size, features, labels):
    n = len(features)
    indices = list(range(n))
    random.shuffle(indices)
    for i in range(0, n, batch_size):
        batch_indices = torch.tensor(
            indices[i: min(i + batch_size, n)])
        yield features[batch_indices], labels[batch_indices]

In [4]:
batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print('Features:\n', X, '\nLabels\n', y)
    break

Features:
 tensor([[ 0.5343, -0.5600],
        [-0.0330,  1.4853],
        [-0.5817, -0.3254],
        [-0.1958, -2.4194],
        [ 0.5866, -0.4351],
        [ 0.9328,  0.9186],
        [ 0.7513, -0.1594],
        [-0.5685,  0.6291],
        [-0.6125, -0.6574],
        [ 0.1852, -0.2952]]) 
Labels
 tensor([[ 7.1756],
        [-0.9245],
        [ 4.1347],
        [12.0276],
        [ 6.8513],
        [ 2.9368],
        [ 6.2365],
        [ 0.9198],
        [ 5.2258],
        [ 5.5598]])


## 1.3 Initializing model parameters

In [5]:
w = torch.normal(0, 0.01, size=(2,1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

## 1.4 Defining the model

In [6]:
def linreg(X, w, b):
    return torch.matmul(X, w) + b

## 1.5 Defining the Loss Function

In [7]:
def squared_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape))**2 / 2

## 1.6 Defining the Optimization Algorithm 

Because our loss is calculated as a sum over the minibatch of examples, we normalize our step size by the batch size (batch_size), so that the magnitude of a typical step size does not depend heavily on our choice of the batch size.

    torch.no_grad(): context manager that disabled gradient calculation (temporarily sets all of the requires_grad flags to False)
    
    torch.Tensor.grad: This attribute is None by default and becomes a Tensor the first time a call to backward() computes gradients for self. The attribute will then contain the gradients computed and future calls to backward() will accumulate (add) gradients into it.

In [8]:
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()

## 1.7 Training

In [9]:
lr = 0.03
epochs = 3

for epoch in range(epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = squared_loss(linreg(X, w, b), y)
        l.sum().backward()
        sgd([w, b], lr, batch_size)
    with torch.no_grad():
        train_l = squared_loss(linreg(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

epoch 1, loss 0.032362
epoch 2, loss 0.000112
epoch 3, loss 0.000052


# 2. Concise implementation of Linear regression

In [17]:
import numpy as np
from torch.utils import data
from torch import nn

## 2.1 Reading the dataset

* **torch.utils.data.DataLoader:** Data loader. Combines a dataset and a sampler, and provides an iterable over the given dataset.

* **torch.utils.data.Dataset:** An abstract class representing a Dataset.

In [16]:
def load_array(data_arrays, batch_size, is_train=True):
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

batch_size = 10
data_iter = load_array((features, labels), batch_size)

In [15]:
next(iter(data_iter))

[tensor([[-0.0825, -0.1205],
         [-0.5382, -0.5049],
         [-0.5245,  0.2022],
         [ 1.7646, -0.6491],
         [ 0.4116,  0.1704],
         [-1.9438,  0.2198],
         [ 0.7152,  0.3333],
         [-0.9721, -0.1958],
         [ 1.1412, -0.2552],
         [-0.1022,  0.6544]]), tensor([[ 4.4286],
         [ 4.8262],
         [ 2.4580],
         [ 9.9233],
         [ 4.4351],
         [-0.4274],
         [ 4.5028],
         [ 2.9197],
         [ 7.3523],
         [ 1.7669]])]

## 2.2 Defining the Model

* **torch.nn.Sequential(*args):** A sequential container. Modules will be added to it in the order they are passed in the constructor. Alternatively, an ordered dict of modules can also be passed in.

* **torch.nn.Linear(in_features, out_features, bias=True):** Applies a linear transformation to the incoming data: $y=xW+b$

In [36]:
lr = nn.Sequential(nn.Linear(2, 1))

## 2.3 Initializing Model Parameters

In [37]:
net[0].weight.data.normal_(0, 0.01)

tensor([[0.0057, 0.0164]])

In [38]:
net[0].bias.data.fill_(0)

tensor([0.])

## 2.4 Defining the Loss Function

In [39]:
loss = nn.MSELoss()

## 2.5 Defining the Optimization Algorithm

In [40]:
trainer = torch.optim.SGD(net.parameters(), lr=0.03)

## 2.6 Training

* **step(closure):** Performs a single optimization step (parameter update).

* **zero_grad():** Sets the gradients of all optimized torch.Tensor s to zero.

In [41]:
epochs = 3
for epoch in range(epochs):
    for X, y in data_iter:
        l = loss(net(X) ,y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 0.000208
epoch 2, loss 0.000104
epoch 3, loss 0.000105
