# Linear Regression ([Book](https://d2l.ai/chapter_linear-regression/linear-regression.html#linear-regression))

## Assumptions

- Target value y (better said its conditional mean $ E[Y | X = x]$ ) is a linear combination of features **x** of sample x
- Observation noise, which causes deviation of y from its expected value, follows a gaussian
- Notation: superscript for ith sample, subscript for ith feature of a sample

## Loss Function (or how to measure the performance of our model)

- It quantifies the distance between real and predicted values
- The loss function over the entire model, we call it L, is the average of losses over every single example
- Our goal is to find the minimum of L
- Detail: in case of linearity, L is a function of the weights $w$ and the bias $b$

## Gradient descent (or how to iteratively reducing the error)

The goal is to find the optimal $\hat w$ and $\hat b$ and the steps of the algorithm are
1. We select a batch of training examples of dimension $B$
2. We evaluate the gradient (over $w$ and $b$) of the loss of each example in the batch
3. We take the mean of all gradient evaluations
4. We update the parameters $w$ and $b$ in direction of the negative gradient with a step size $ \eta $

## Probabilistic Interpretation

- SGD also obtained from considering as objective function not the loss, but the likelihood


## Code example : Linear Regression On Synthetic Data

## Dataset

In [1]:
import sys
sys.path.append('/home/flavio/code/machine-deep-learning')
from core.datamodule import Dataset
import torch
import warnings
warnings.filterwarnings("ignore")

DATA_PARAMS = {
    'use_weighted_sampler': False,
}

class SyntheticRegressionData(Dataset): 
    """Synthetic data generator for linear regression."""
    def __init__(self, w, b, noise=0.01, num_train=900, num_test=1000, num_val=100):
        X_train = torch.randn(num_train, len(w)) #design matrix X (of features)
        train_noise = torch.randn(num_train, 1) * noise
        y_train = torch.matmul(X_train, w.reshape((-1, 1))) + b + train_noise #vector of labels
        train_data = torch.utils.data.TensorDataset(*[X_train, y_train])
        
        X_test = torch.randn(num_test, len(w)) #design matrix X (of features)
        test_noise = torch.randn(num_test, 1) * noise
        y_test = torch.matmul(X_test, w.reshape((-1, 1))) + b + test_noise #vector of labels
        test_data = torch.utils.data.TensorDataset(*[X_test, y_test])
        
        X_val = torch.randn(num_val, len(w)) #design matrix X (of features)
        val_noise = torch.randn(num_val, 1) * noise
        y_val = torch.matmul(X_val, w.reshape((-1, 1))) + b + val_noise #vector of labels
        val_data = torch.utils.data.TensorDataset(*[X_val, y_val])
        self.w = w
        self.b = b
        super().__init__(load=False,params=DATA_PARAMS,train_data=train_data,test_data=test_data,val_data=val_data)
       
w = torch.rand(3) 
b = torch.rand(1) 
batch_size = 5
dataset = SyntheticRegressionData(w=w,b=b)
#Extract next minibatch
X, y = next(iter(dataset.val_dataloader(batch_size)))
print('X shape:', X.shape, '\ny shape:', y.shape)

X shape: torch.Size([5, 3]) 
y shape: torch.Size([5, 1])


## Model

In [2]:
import torch
from torch import nn
from core.trainer import Trainer
from core.utils import *
from core.model import Model

lr = 0.005 # since the model is implemented from scratch, the learning rate is needed here

class LinearRegressionScratch(Model): 
    """The linear regression model implemented from scratch."""
    def __init__(self, input_dim):
        super().__init__()
        self.save_parameters()
        self.w = torch.zeros((input_dim, 1), requires_grad= True) 
        self.b = torch.zeros((1,1), requires_grad= True)

    #That's basically all our model amounts to when computing a label
    def forward(self, X):
        return torch.matmul(X, self.w) + self.b
    
    # The loss function is computed over all the samples in considered minibatch
    def loss_fn(self, y_hat, y) -> nn.Module:
        return torch.mean(torch.pow(y_hat - y, 2) / 2)

    def parameters(self):
        return (self.w, self.b)
    
    def train_step(self, batch) -> None:
        #Forward Propagation
        X = torch.tensor(*batch[:-1]) #features
        y_hat = self(X) #extraction of X and forward propagation
        y = batch[-1] #labels
        loss = self.loss_fn(y_hat, y)
        
        #Backward Propagation
        error = (y_hat - y)
        n = len(self.w) #number of features
        m = len(batch[0]) #number of examples
        
        dj_db = (1 / m) * error.sum()
        
        dj_dw = torch.zeros((n,1))
        for k in range(n):
            dj_dw[k] = (1 / m) * ((error * X[:,k]).sum()).item()
        
        self.w = self.w - lr * dj_dw
        self.b = self.b - lr * dj_db
        return loss
    
    def eval_step(self,batch):
        inputs = torch.tensor(*batch[:-1]) #one sample on each row -> X.shape = (m, d_in)
        labels = batch[-1].type(torch.long) # labels -> shape = (m)
        logits = self(inputs)
        loss = self.loss_fn(logits, labels)
        return loss
    
class LinearRegressionScratchTrainer(Trainer):
    def fit(self, model, data):
        #stuff for dataset
        self.batch_size = self.params['batch_size']
        train_dataloader = data.train_dataloader(self.batch_size)
        val_dataloader = data.val_dataloader(self.batch_size)
        max_epochs = self.params['max_epochs']   
        for epoch in range(1, max_epochs + 1):
            self.fit_epoch(epoch, model,train_dataloader, val_dataloader)  
            
    def fit_epoch(self, epoch, model, train_dataloader, val_dataloader):
        for batch in train_dataloader:
            # Forward propagation
            loss = model.train_step(batch)
            
        epoch_loss = 0           
        for batch in val_dataloader:
            #Forward propagation
            loss = model.eval_step(batch)
            #Logging
            epoch_loss += loss.item()
            
        epoch_loss /= len(val_dataloader) 
        print(f"EPOCH {epoch} LOSS: {epoch_loss:.3f}")
        

TRAIN_PARAMS = {
    'max_epochs': 15,
    'batch_size': 32
}
        
trainer = LinearRegressionScratchTrainer(TRAIN_PARAMS)
model = LinearRegressionScratch(3)
trainer.fit(model,dataset)
w,b = model.parameters()
print(f"w = {w}")
print(f"b = {b}")
print(f'error in estimating w: {dataset.w - model.w.reshape(dataset.w.shape)}')

EPOCH 1 LOSS: 0.226
EPOCH 2 LOSS: 0.166
EPOCH 3 LOSS: 0.104
EPOCH 4 LOSS: 0.096
EPOCH 5 LOSS: 0.086
EPOCH 6 LOSS: 0.072
EPOCH 7 LOSS: 0.087
EPOCH 8 LOSS: 0.084
EPOCH 9 LOSS: 0.081
EPOCH 10 LOSS: 0.106
EPOCH 11 LOSS: 0.129
EPOCH 12 LOSS: 0.108
EPOCH 13 LOSS: 0.131
EPOCH 14 LOSS: 0.130
EPOCH 15 LOSS: 0.128
w = tensor([[0.7507],
        [0.2550],
        [0.8899]], grad_fn=<SubBackward0>)
b = tensor([[0.5027]], grad_fn=<SubBackward0>)
error in estimating w: tensor([-0.0252,  0.1276,  0.0032], grad_fn=<SubBackward0>)


In [3]:
class LinearRegression(Model):
    """The linear regression model implemented with high-level APIs."""
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Linear(input_dim, 1, bias = True)
        self.net.weight.data.normal_(0, 0.01)
        self.net.bias.data.fill_(0)

    def forward(self, X):
        return self.net(X)

    def loss_fn(self, y_hat, y):
        fn = nn.MSELoss()
        return fn(y_hat, y)
    
    def train_step(self, batch) -> None:
        inputs = torch.tensor(*batch[:-1]) # one sample on each row -> X.shape = (m, d_in)
        logits = self(inputs) # inference
        labels = batch[-1] # labels -> shape = (m)
        loss = self.loss_fn(logits, labels)
        return loss
    
    def eval_step(self,batch):
        with torch.no_grad():
            loss = self.train_step(batch)
        return loss
    
class LinearRegressionTrainer(Trainer):
    def fit(self, model, data):
        #stuff for dataset
        self.batch_size = self.params['batch_size']
        train_dataloader = data.train_dataloader(self.batch_size)
        val_dataloader = data.val_dataloader(self.batch_size)
        self.lr = self.params['learning_rate']
        max_epochs = self.params['max_epochs']  
        optim = self.params['optim_function'](model.parameters(), lr=self.lr, weight_decay=self.params['weight_decay']) 
        for epoch in range(1, max_epochs + 1):
            self.fit_epoch(epoch, model,optim,train_dataloader, val_dataloader)  
            
    def fit_epoch(self, epoch, model, optim, train_dataloader, val_dataloader):
        
        # Training Mode
        model.train() 
        for batch in train_dataloader:
            # Forward propagation
            loss = model.train_step(batch)
            # Backward Propagation
            optim.zero_grad()
            with torch.no_grad():
                loss.backward() #here we calculate the chained derivatives (every parameters will have .grad changed)
                optim.step() 
                
        # Evaluation Mode        
        model.eval()    
        epoch_loss = 0           
        for batch in val_dataloader:
            #Forward propagation
            loss = model.eval_step(batch)
            #Logging
            epoch_loss += loss.item()
            
        epoch_loss /= len(val_dataloader) 
        print(f"EPOCH {epoch} LOSS: {epoch_loss:.3f}")
    
TRAIN_PARAMS = {
    'max_epochs': 10,
    'learning_rate': 0.005,
    'batch_size': 32,
    'optim_function': torch.optim.SGD,
    'weight_decay': 0.001
}
        
trainer = LinearRegressionTrainer(TRAIN_PARAMS)
model = LinearRegression(3)
trainer.fit(model,dataset)
w,b = model.parameters()
print(f"w = {w}")
print(f"b = {b}")
print(f'error in estimating w: {dataset.w - model.net.weight.reshape(dataset.w.shape)}')

EPOCH 1 LOSS: 0.956
EPOCH 2 LOSS: 0.468
EPOCH 3 LOSS: 0.282
EPOCH 4 LOSS: 0.168
EPOCH 5 LOSS: 0.092
EPOCH 6 LOSS: 0.041
EPOCH 7 LOSS: 0.023
EPOCH 8 LOSS: 0.016
EPOCH 9 LOSS: 0.008
EPOCH 10 LOSS: 0.004
w = Parameter containing:
tensor([[0.6987, 0.3571, 0.8458]], requires_grad=True)
b = Parameter containing:
tensor([0.5377], requires_grad=True)
error in estimating w: tensor([0.0269, 0.0256, 0.0473], grad_fn=<SubBackward0>)
