In [16]:
%matplotlib inline
import torch
from pytorch.d2l import torch as d2l

### Implementing Linear Regression from Scratch

In [17]:
# the class Module stores the learnable parameters, so we initialize the weights and bias there.
class LinearRegressionScratch(d2l.Module):
    """The linear regression model implemented from scratch"""
    # the weights are intiialized by drawing random numbers from a normal distribution with mean 0 and sigma 0.01
    # the number of inputs, the learning rate, and the standard deviation for the normal distribution
    # this has only one feature
    def __init__(self, num_inputs, lr, sigma=0.01):
        super().__init__()
        self.save_hyperparameters()
        # mean, std, shape, requires gradient or not
        self.w = torch.normal(0, sigma, (num_inputs, 1), requires_grad=True)
        self.b = torch.zeros(1, requires_grad=True)

In [18]:
# forward returns the value of the prediction y given the current parameters. in this case it is just a matrix multiplication added to a broadcasted scalar
@d2l.add_to_class(LinearRegressionScratch)
def forward(self, X):
    return torch.matmul(X, self.w) + self.b

In [19]:
# calculating the loss based on the prediction and the actual label
@d2l.add_to_class(LinearRegressionScratch)
def loss(self, y_hat, y):
    l = (y_hat - y) ** 2 / 2
    return l.mean

In [20]:
# defining the optimization algorithm, the stochastic gradient descent
# at each step, we use a minibatch randomly drawn from the dataset, estimate the gradient of the loss with respect to the parameters, and then udpate the parameters in the direction that reduces the loss
# this class is based on the template of the class DataModule
class SGD(d2l.HyperParameters):
    """Minibatch stochastic gradient descent"""

    def __init__(self, params, lr):
        self.save_hyperparameters()

    # params is [weights, bias], and param.grad calculates their gradient

    def step(self):
        for param in self.params:
            param -= self.lr * param.grad
    
    # sets all the gradients to 0, which must be run before a backpropagation step
    def zero_grad(self):
        for param in self.params:
            if param.grad is not None:
                param.grad.zero_()

In [21]:
# defining the configure_optimizers method in Module, which just returns an instance of the SGD class
@d2l.add_to_class(LinearRegressionScratch)
def configure_optimizers(self):
    return SGD([self.w, self.b], self.lr)

#### Implementing the main training loop
1. in each epoch, we iterate through the entire training dataset, passing once through every example
2. in each iteration, we grab a minibatch of training examples and compute the loss through the model's training step method
3. we then compute the gradients with respect to each aprameter
4. finally we call the optimization algorithm to update the model parameters
```
initialize parameters (w, b)
repeat until done
    compute gradient with parameters (w, b)
    update parameters (w, b)
```
We also pass the validation dataloader once in each epoch to measure the model performance.

In [22]:
@d2l.add_to_class(d2l.Trainer)
def prepare_batch(self, batch):
    return batch

In [23]:
@d2l.add_to_class(d2l.Trainer)
# for each epoch
def fit_epoch(self):
    self.model.train()
    # iterating over each minibatch
    for batch in self.train_dataloader:
        loss = self.model.training_step(self.prepare_batch(batch))
        self.optim.zero_grad()
        with torch.no_grad():
            loss.backward()
            if self.gradient_clip_val > 0:
                self.clip_gradients(self.gradient_clip_val, self.model)
            self.optim.step()
        if self.val_dataloader is None:
            return
        self.model.eval()
        for batch in self.val_dataloader:
            with torch.no_grad():
                self.model.validation_step(self.prepare_batch(batch))
            self.val_batch_idx += 1

In [24]:
# now to generate some artificial data using SyntheticRegressionData
# think there is a type somewhere in my code
model = LinearRegressionScratch(2, lr=0.03)
data = d2l.SyntheticRegressionData(w=torch.tensor([2, -3.4]), b=4.2)
trainer = d2l.Trainer(max_epochs=3)
trainer.fit(model, data)

AttributeError: 'builtin_function_or_method' object has no attribute 'to'