In [70]:
import pandas as pd 
import numpy  as np
import matplotlib.pyplot as plt

import torch 
import statsmodels.api

from sklearn.metrics import *

In [71]:
data:pd.DataFrame = statsmodels.api.datasets.get_rdataset('mtcars').data

In [72]:
X = data.drop( columns=['mpg']).values
y = data['mpg'].values.reshape(-1, 1)

In [73]:
X_ = torch.tensor(X, dtype=torch.float32)
y_ = torch.tensor(y, dtype=torch.float32)

In [74]:
X_.shape[0]//2 

16

In [75]:
X_[:,:6] = (X_[:,:6] - X_[:, :6].mean(dim=0))/X_[:,:6].std(dim=0)

In [76]:
tensors_ = torch.utils.data.TensorDataset(X_, y_)

In [77]:
train = torch.utils.data.DataLoader(
    dataset=tensors_, batch_size=10, shuffle=True
)

## Regularização L2 (ridge)

Uma forma de evitar o overfiting é regularizar o modelo. Ridge reduz o valor de toodos os pesos do modelo, quanto maior o alpha mais se aproxima de 0

$$
loss =  \frac{1}{N}\sum ( y_i - \hat y_i)^2 + \lambda \sum w_i^2
$$

In [78]:
class ModelRidge:
    weight:torch.Tensor = None
    bias  :torch.Tensor = None
    lambda_:float       = None

    def __init__(self, n_features, seed):
        torch.manual_seed(seed = seed)
        self.weight = torch.randn(n_features, dtype=torch.float32)
        self.bias   = torch.zeros(1)

    def modelo(self, x):    
        return x @ self.weight.reshape(-1, 1) + self.bias
    
    def MSE_loss(self, input_:torch.Tensor, output_:torch.Tensor):
        return (( input_ - output_ ).pow(2).mean()) + (self.lambda_ * self.weight.pow(2).sum())
    

    def fit(self, train, learn_rate=0.001,n_epoch=10, lambda_=0.001, verbose=True, lambda_weight=False):
        self.weight.requires_grad_()
        self.bias.requires_grad_()
        self.lambda_ = lambda_

        for epoch in range(n_epoch):
            for enum_, (xbatch, ybatch) in enumerate(train, 1):
                pred = self.modelo(xbatch) 
                
                loss = self.MSE_loss(ybatch, pred)
                loss.backward()

            with torch.no_grad():
                self.weight -= self.weight.grad * learn_rate
                self.bias   -= self.bias.grad * learn_rate

                self.weight.grad.zero_()
                self.bias.grad.zero_()

            if (verbose and lambda_weight):
                if epoch < 10:
                    print(f'loss : {loss.item():.2f}', 'L2 : ', lambda_ * self.weight.pow(2).sum())
            if (verbose and not(lambda_weight)):
                if epoch < 10:
                    print('loss : ', loss.item())
    def predict(self, x):
        with torch.no_grad():
            pred_ = self.modelo(x)
            
        return pred_    

In [79]:
Ridge = ModelRidge(n_features=10, seed=10)

In [80]:
Ridge.fit(train, n_epoch=3, lambda_weight=True, lambda_=0.03)

loss : 763.01 L2 :  tensor(0.3233, grad_fn=<MulBackward0>)
loss : 406.24 L2 :  tensor(0.3452, grad_fn=<MulBackward0>)
loss : 593.59 L2 :  tensor(0.3835, grad_fn=<MulBackward0>)


In [81]:
pred_ = Ridge.predict(X_)

In [82]:
(y_ - pred_).pow(2).mean()

tensor(191.9641)

In [83]:
pred_[[0,1],:], y_[[0, 1],:]

(tensor([[12.7712],
         [12.8664]]),
 tensor([[21.],
         [21.]]))

In [84]:
r2_score(y_, pred_)

-4.4552333725066715

In [85]:
Ridge.weight

tensor([-0.8179, -1.2197, -0.5074, -1.0723,  0.6665, -0.2504, -0.5954, -0.7020,
         0.4350,  2.7706], requires_grad=True)

## Lasso

$$
loss =  \frac{1}{N}\sum ( y_i - \hat y_i)^2 + \lambda \sum |w_i|
$$

In [86]:
class ModelLasso:
    weight:torch.Tensor = None
    bias  :torch.Tensor = None
    lambda_:float       = None

    def __init__(self, n_features, seed):
        torch.manual_seed(seed = seed)
        self.weight = torch.randn(n_features, dtype=torch.float32)
        self.bias   = torch.zeros(1)

    def modelo(self, x):    
        return x @ self.weight.reshape(-1, 1) + self.bias
    
    def MSE_loss(self, input_:torch.Tensor, output_:torch.Tensor):
        return (( input_ - output_ ).pow(2).mean()) + (self.lambda_ * self.weight.abs().sum())
    

    def fit(self, train, learn_rate=0.001,n_epoch=10, lambda_=0.001, verbose=True, lambda_weight=False):
        self.weight.requires_grad_()
        self.bias.requires_grad_()
        self.lambda_ = lambda_

        for epoch in range(n_epoch):
            for enum_, (xbatch, ybatch) in enumerate(train, 1):
                pred = self.modelo(xbatch) 
                
                loss = self.MSE_loss(ybatch, pred)
                loss.backward()

            with torch.no_grad():
                self.weight -= self.weight.grad * learn_rate
                self.bias   -= self.bias.grad * learn_rate

                self.weight.grad.zero_()
                self.bias.grad.zero_()

            if (verbose and lambda_weight):
                if epoch < 10:
                    print(f'loss : {loss.item():.2f}', 'L2 : ', lambda_ * self.weight.pow(2).sum())
            
            if (verbose and not(lambda_weight)):
                if epoch < 10:
                    print('loss : ', loss.item())
    def predict(self, x):
        with torch.no_grad():
            pred_ = self.modelo(x)
            
        return pred_    

In [87]:
lasso = ModelLasso(n_features=10, seed=10)

In [88]:
lasso.fit(train, learn_rate=0.01)

loss :  762.6995239257812
loss :  370.15570068359375
loss :  480.9087829589844
loss :  370.49810791015625
loss :  547.6058349609375
loss :  807.0325317382812
loss :  555.5657958984375
loss :  240.39576721191406
loss :  489.7578125
loss :  188.908935546875


In [89]:
lasso.weight

tensor([-1.8797, -1.7856, -2.2671, -1.6928, -0.7114,  0.9555,  0.3122, -0.7511,
         1.6775, -0.7309], requires_grad=True)

In [90]:
pred_ = Ridge.predict(X_)

In [91]:
(y_ - pred_).mean()

tensor(10.8217)

In [92]:
r2_score(y_, pred_)

-4.4552333725066715