In [None]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

###  actual model parameters

In [None]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2

### Generating dataset

In [None]:
features, labels = d2l.synthetic_data(true_w, true_b, 1000)   # refer to Note #3 for synthetic_data method

In [None]:
features.shape, labels.shape

### Selecting the dataset in batches for minibatch stochastic gradient descent using pytorch data util

In [None]:
def load_array(data_arrays, batch_size, is_train=True): #@save
    """
    creates pytorch data iterator for the given batch size
    """
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=True)

#### testing the batch_size loading of data

In [None]:
batch_size = 5
batch_data_loader = load_array([features, labels], batch_size)

In [None]:
type(batch_data_loader)

In [None]:
data_iter = iter(batch_data_loader)

In [None]:
type(data_iter)    # iterator, gives batch_size of data on each loop

In [None]:
next(data_iter)

### defining the linear model using pytorch avaiable API's

#### using "Sequential" class from NeuralNet ("nn") module.
#### Sequential class defines container for several layers to be chained together.
#### a Sequential instance passes the given input through the first layer, and then in turn passing the output as the second layerʼs input and so on until all the chained layers.
#### we need only one layer in this model. 
#### We also need the layer to be fully connected. ( every input connected to the output)

#### "Linear" class from nn module can be used to create fully connected layer, the arguments indicates the dimensions of input and output layer

In [None]:
from torch import nn # Neural Networks

net = nn.Sequential(nn.Linear(2, 1))  # 2 inputs (x1, x2) and one output

### selecting initial model parameters 

#### from the neuralnet object, we can directly set the weight and bias params, using the attributes

In [None]:
net[0].weight.data.normal_(0, 0.1)   # '0' indicates the first layer in the network
net[0].bias.data.fill_(0)

### defining the Loss function

#### using Mean Squared Error loss from the nn library, 
#### note: by deafult, it uses the reduction method as 'mean', meaning normalizing the loss by the length of input.
#### to change this, we can use the keywordargument 'reduction' as sum (nn.MSELoss(reduction=sum)

In [None]:
loss = nn.MSELoss()  # using the MeanSquaredError 

### defining the model optimizing algorithm

#### we can use the 'optim' module of pytorch to get the Stochasitic Gradient descent algorithm
#### it can take key value pairs of hyper parameters.

In [None]:
trainer = torch.optim.SGD(net.parameters(), lr=0.03) # learning rate

In [None]:
type(trainer)

#### the step method from SGD can be used for updating model params for each batch

In [None]:
help(trainer.step)

In [None]:
num_epochs = 3

In [None]:
def train(num_epochs=3):
    for epoch in range(num_epochs):
        for X, y in data_iter:
            l = loss(net(X), y)
            trainer.zero_grad()
            l.backward()
            trainer.step()
        training_loss = loss(net(features), labels)

        print(f'epoch {epoch + 1}, loss {training_loss:f}')

In [None]:
train()

#### the final parameters gets updated in the neural network object

In [None]:
def estimate_error():
    w_estimated = net[0].weight.data
    b_estimated = net[0].bias.data
    
    print('error in estimating w:', true_w - w_estimated.reshape(true_w.shape))
    print('error in estimating b:', true_b - b_estimated)

In [None]:
estimate_error()

### With using HuberLoss method

#### HuberLoss - uses a squared term if the absolute element-wise error falls below delta and a delta-scaled L1 term otherwise.

In [None]:
loss = nn.HuberLoss()

In [None]:
train()

In [None]:
estimate_error()