# Linear Regression ([Book](https://d2l.ai/chapter_linear-regression/linear-regression.html#linear-regression))

## Assumptions

- Target value y (better said its conditional mean $ E[Y | X = x]$ ) is a linear combination of features **x** of sample x
- Observation noise, which causes deviation of y from its expected value, follows a gaussian
- Notation: superscript for ith sample, subscript for ith feature of a sample

## Loss Function (or how to measure the performance of our model)

- It quantifies the distance between real and predicted values
- The loss function over the entire model, we call it L, is the average of losses over every single example
- Our goal is to find the minimum of L
- Detail: in case of linearity, L is a function of the weights $w$ and the bias $b$

## Gradient descent (or how to iteratively reducing the error)

The goal is to find the optimal $\hat w$ and $\hat b$ and the steps of the algorithm are
1. We select a batch of training examples of dimension $B$
2. We evaluate the gradient (over $w$ and $b$) of the loss of each example in the batch
3. We take the mean of all gradient evaluations
4. We update the parameters $w$ and $b$ in direction of the negative gradient with a step size $ \eta $

## Probabilistic Interpretation

- SGD also obtained from considering as objective function not the loss, but the likelihood


## Code example : Linear Regression On Synthetic Data

## Dataset

In [1]:
import sys
sys.path.append('/home/flavio/code/main')
from core.datamodule import Dataset
import torch
import warnings
warnings.filterwarnings("ignore")

DATA_PARAMS = {
    'use_weighted_sampler': False,
}

class SyntheticRegressionData(Dataset): 
    """Synthetic data generator for linear regression."""
    def __init__(self, w, b, noise=0.01, num_train=900, num_test=1000, num_val=100):
        X_train = torch.randn(num_train, len(w)) #design matrix X (of features)
        train_noise = torch.randn(num_train, 1) * noise
        y_train = torch.matmul(X_train, w.reshape((-1, 1))) + b + train_noise #vector of labels
        train_data = torch.utils.data.TensorDataset(*[X_train, y_train])
        
        X_test = torch.randn(num_test, len(w)) #design matrix X (of features)
        test_noise = torch.randn(num_test, 1) * noise
        y_test = torch.matmul(X_test, w.reshape((-1, 1))) + b + test_noise #vector of labels
        test_data = torch.utils.data.TensorDataset(*[X_test, y_test])
        
        X_val = torch.randn(num_val, len(w)) #design matrix X (of features)
        val_noise = torch.randn(num_val, 1) * noise
        y_val = torch.matmul(X_val, w.reshape((-1, 1))) + b + val_noise #vector of labels
        val_data = torch.utils.data.TensorDataset(*[X_val, y_val])
        self.w = w
        self.b = b
        super().__init__(load=False,params=DATA_PARAMS,train_data=train_data,test_data=test_data,val_data=val_data)
       
w = torch.rand(3) 
b = torch.rand(1) 
batch_size = 5
dataset = SyntheticRegressionData(w=w,b=b)
#Extract next minibatch
X, y = next(iter(dataset.val_dataloader(batch_size)))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([5, 3]) 
y shape: torch.Size([5, 1])


In [2]:
import numpy as np
# class TXTDataLoader(Dataset):
#     def __init__(self,txtfile):
#         super().__init__()
#         data = np.loadtxt(txtfile, delimiter=',')
#         X = data[:,:-1]
#         y = data[:,-1]
#         self.X = torch.tensor(X).type(torch.float32)
#         self.y = torch.tensor(y).type(torch.float32)
#         self.num_train = 70
#         self.num_test = 30
#         self.batch_size = 30
# data = DataLoader("data/lin_reg.txt")

## Model

In [3]:
import torch
from torch import nn
from core.trainer import Trainer
from core.utils import *
from core.model import Model

class LinearRegressionScratch(Model): 
    """The linear regression model implemented from scratch."""
    def __init__(self, input_dim):
        super().__init__()
        self.save_parameters()
        self.w = torch.zeros((input_dim, 1), requires_grad= True) 
        self.b = torch.zeros((1,1), requires_grad= True)

    #That's basically all our model amounts to when computing a label
    def forward(self, X):
        return torch.matmul(X, self.w) + self.b
    
    def parameters(self):
        return (self.w, self.b)

# The loss function is computed over all the samples in considered minibatch
def loss(y_hat, y) -> nn.Module:
    return torch.mean(torch.pow(y_hat - y, 2) / 2)

class LinearRegressionTrainer(Trainer):
    def train_step(self, model, batch) -> None:
        #Forward Propagation
        X = torch.tensor(*batch[:-1]) #features
        y_hat = model(X) #extraction of X and forward propagation
        y = batch[-1] #labels
        loss = self.loss_function(y_hat, y)
        
        #Backward Propagation
        error = (y_hat - y)
        n = len(model.w) #number of features
        m = self.batch_size #number of examples
        
        dj_db = (1 / m) * error.sum()
        
        dj_dw = torch.zeros((n,1))
        for k in range(n):
            dj_dw[k] = (1 / m) * ((error * X[:,k]).sum()).item()
        
        self.w = model.w - self.lr * dj_dw
        self.b = model.b - self.lr * dj_db
        return loss

TRAIN_PARAMS = {
    'max_epochs': 15,
    'learning_rate': 0.005,
    'batch_size': 128,
    'patience': 5,
    'metrics': 'accuracy',
    'optim_function': torch.optim.Adam,
    'weight_decay': 0.001,
    'loss_function': loss
}
        
trainer = LinearRegressionTrainer(TRAIN_PARAMS)
model = LinearRegressionScratch(3)
trainer.fit(model,dataset)
w,b = model.parameters()
print(f"w = {w}")
print(f"b = {b}")
print(f'error in estimating w: {dataset.w - model.w.reshape(dataset.w.shape)}')

EPOCH 1 SCORE: 0.000 LOSS: 0.351
EPOCH 2 SCORE: 0.000 LOSS: 0.304
EPOCH 3 SCORE: 0.000 LOSS: 0.263
EPOCH 4 SCORE: 0.000 LOSS: 0.227
EPOCH 5 SCORE: 0.000 LOSS: 0.198
EPOCH 6 SCORE: 0.000 LOSS: 0.173
EPOCH 7 SCORE: 0.000 LOSS: 0.151
EPOCH 8 SCORE: 0.000 LOSS: 0.134
EPOCH 9 SCORE: 0.000 LOSS: 0.119
EPOCH 10 SCORE: 0.000 LOSS: 0.107
EPOCH 11 SCORE: 0.000 LOSS: 0.098
EPOCH 12 SCORE: 0.000 LOSS: 0.091
EPOCH 13 SCORE: 0.000 LOSS: 0.086
EPOCH 14 SCORE: 0.000 LOSS: 0.081
EPOCH 15 SCORE: 0.000 LOSS: 0.079
w = tensor([[0.2979],
        [0.3044],
        [0.5113]], requires_grad=True)
b = tensor([[0.4514]], requires_grad=True)
error in estimating w: tensor([0.0424, 0.0069, 0.4195], grad_fn=<SubBackward0>)


In [4]:
class LinearRegression(Model):
    """The linear regression model implemented with high-level APIs."""
    def __init__(self, input_dim, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Linear(input_dim, 1, bias = True)
        self.net.weight.data.normal_(0, 0.01)
        self.net.bias.data.fill_(0)

    def forward(self, X):
        return self.net(X)

    def loss(self, y_hat, y):
        fn = nn.MSELoss()
        return fn(y_hat, y)

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), self.lr)

    def get_w_b(self):
        return (self.net.weight.data, self.net.bias.data)   

In [5]:
#Generate design matrix and labels with a priori defined weights and bias
# data = SyntheticRegressionData(w, b = 1)