In [333]:
import pandas as pd 
import numpy  as np
import matplotlib.pyplot as plt

import torch 
import statsmodels.api

from sklearn.metrics import *

In [334]:
data:pd.DataFrame = statsmodels.api.datasets.get_rdataset('mtcars').data

In [335]:
X = data.drop( columns=['mpg']).values
y = data['mpg'].values.reshape(-1, 1)

In [336]:
X_ = torch.tensor(X, dtype=torch.float32)
y_ = torch.tensor(y, dtype=torch.float32)

In [337]:
X_.shape[0]//2 

16

In [338]:
X_[:,:] = (X_[:,:] - X_[:, :].mean(dim=0))/X_[:,:].std(dim=0)

In [339]:
tensors_ = torch.utils.data.TensorDataset(X_, y_)

In [340]:
train = torch.utils.data.DataLoader(
    dataset=tensors_, batch_size=32, shuffle=True
)

## Regularização L2 (ridge)

Uma forma de evitar o overfiting é regularizar o modelo. Ridge reduz o valor de toodos os pesos do modelo, quanto maior o alpha mais se aproxima de 0

$$
loss =  \frac{1}{N}\sum ( y_i - \hat y_i)^2 + \lambda \sum w_i^2
$$

In [341]:
class ModelRidge:
    weight:torch.Tensor = None
    bias  :torch.Tensor = None
    lambda_:float       = None

    def __init__(self, n_features, seed):
        torch.manual_seed(seed = seed)
        self.weight = torch.randn(n_features, dtype=torch.float32)
        self.bias   = torch.zeros(1)

    def modelo(self, x):    
        return x @ self.weight.reshape(-1, 1) + self.bias
    
    def MSE_loss(self, input_:torch.Tensor, output_:torch.Tensor):
        return (( input_ - output_ ).pow(2).mean())
    

    def fit(self, train, learn_rate=0.001,n_epoch=10, lambda_=0.001, verbose=True, lambda_weight=False):
        self.weight.requires_grad_()
        self.bias.requires_grad_()
        self.lambda_ = lambda_

        for epoch in range(n_epoch):
            for enum_, (xbatch, ybatch) in enumerate(train, 1):
                pred = self.modelo(xbatch) 
                
                loss = self.MSE_loss(ybatch, pred)
                loss.backward()

            with torch.no_grad():
                self.weight -= self.weight.grad * learn_rate
                self.bias   -= self.bias.grad * learn_rate

                self.weight.grad.zero_()
                self.bias.grad.zero_()

            if (verbose and lambda_weight):
                if epoch < 10:
                    print(f'loss : {loss.item():.2f}', 'L2 : ', lambda_ * self.weight.pow(2).sum())
            if (verbose and not(lambda_weight)):
                if epoch < 10:
                    print('loss : ', loss.item())
    def predict(self, x):
        with torch.no_grad():
            pred_ = self.modelo(x)
            
        return pred_    

In [342]:
Ridge = ModelRidge(n_features=10, seed=10)

In [343]:
Ridge.fit(train, n_epoch=80, lambda_weight=True, lambda_=0.03)

loss : 484.36 L2 :  tensor(0.3169, grad_fn=<MulBackward0>)
loss : 481.17 L2 :  tensor(0.3137, grad_fn=<MulBackward0>)
loss : 478.02 L2 :  tensor(0.3106, grad_fn=<MulBackward0>)
loss : 474.91 L2 :  tensor(0.3076, grad_fn=<MulBackward0>)
loss : 471.84 L2 :  tensor(0.3048, grad_fn=<MulBackward0>)
loss : 468.81 L2 :  tensor(0.3020, grad_fn=<MulBackward0>)
loss : 465.82 L2 :  tensor(0.2994, grad_fn=<MulBackward0>)
loss : 462.86 L2 :  tensor(0.2968, grad_fn=<MulBackward0>)
loss : 459.94 L2 :  tensor(0.2944, grad_fn=<MulBackward0>)
loss : 457.06 L2 :  tensor(0.2921, grad_fn=<MulBackward0>)


In [344]:
pred_ = Ridge.predict(X_)

In [345]:
(y_ - pred_).pow(2).mean()

tensor(314.0753)

In [346]:
pred_[[0,1],:], y_[[0, 1],:]

(tensor([[4.9106],
         [4.9204]]),
 tensor([[21.],
         [21.]]))

In [347]:
r2_score(y_, pred_)

-7.925389579092299

In [348]:
Ridge.weight

tensor([-1.4062, -1.7904, -1.0037, -0.5168,  0.0527, -0.0127, -0.2460, -0.3089,
        -0.6781,  1.3436], requires_grad=True)

## Lasso

$$
loss =  \frac{1}{N}\sum ( y_i - \hat y_i)^2 + \lambda \sum |w_i|
$$

In [349]:
class ModelLasso:
    weight:torch.Tensor = None
    bias  :torch.Tensor = None
    lambda_:float       = None

    def __init__(self, n_features, seed):
        torch.manual_seed(seed = seed)
        self.weight = torch.randn(n_features, dtype=torch.float32)
        self.bias   = torch.zeros(1)

    def modelo(self, x):    
        return x @ self.weight.reshape(-1, 1) + self.bias
    
    def MSE_loss(self, input_:torch.Tensor, output_:torch.Tensor):
        return (( input_ - output_ ).pow(2).mean()) + (self.lambda_ * self.weight.abs().sum())
    

    def fit(self, train, learn_rate=0.001,n_epoch=10, lambda_=0.001, verbose=True, lambda_weight=False):
        self.weight.requires_grad_()
        self.bias.requires_grad_()
        self.lambda_ = lambda_

        for epoch in range(n_epoch):
            for enum_, (xbatch, ybatch) in enumerate(train, 1):
                pred = self.modelo(xbatch) 
                
                loss = self.MSE_loss(ybatch, pred)
                loss.backward()

            with torch.no_grad():
                self.weight -= self.weight.grad * learn_rate
                self.bias   -= self.bias.grad * learn_rate

            self.weight.grad.zero_()
            self.bias.grad.zero_()

            if (verbose and lambda_weight):
                if epoch < 10:
                    print(f'loss : {loss.item():.2f}', 'L2 : ', lambda_ * self.weight.pow(2).sum())
            
            if (verbose and not(lambda_weight)):
                if epoch < 10:
                    print('loss : ', loss.item())
                else: 
                    if epoch % 10 == 0: print(loss.item())
    def predict(self, x):
        with torch.no_grad():
            pred_ = self.modelo(x)
            
        return pred_    

In [350]:
lasso = ModelLasso(n_features=10, seed=10)

In [351]:
lasso.fit(train, learn_rate=0.01, n_epoch=10)

loss :  484.37127685546875
loss :  453.3959655761719
loss :  426.1936340332031
loss :  402.0782470703125
loss :  380.5040283203125
loss :  361.0361022949219
loss :  343.32720947265625
loss :  327.0994567871094
loss :  312.1297607421875
loss :  298.23870849609375


In [352]:
lasso.weight

tensor([-1.5404, -1.9187, -1.1211, -0.3931, -0.1016,  0.0378, -0.1440, -0.1907,
        -0.5887,  1.2396], requires_grad=True)

In [353]:
pred_ = Ridge.predict(X_)

In [354]:
pred_

tensor([[ 4.9106],
        [ 4.9204],
        [ 4.5187],
        [ 2.8529],
        [ 0.1346],
        [ 3.7198],
        [ 0.7309],
        [ 6.0560],
        [ 5.4131],
        [ 4.7533],
        [ 4.7490],
        [ 2.2182],
        [ 2.1985],
        [ 2.1984],
        [ 0.0445],
        [ 0.0144],
        [-0.1400],
        [ 5.1023],
        [ 5.3361],
        [ 5.0687],
        [ 5.9663],
        [ 1.4897],
        [ 1.3074],
        [ 0.3904],
        [-0.3539],
        [ 5.0878],
        [ 4.2144],
        [ 4.3710],
        [-2.8627],
        [ 5.2059],
        [ 0.8257],
        [ 4.7020]])

In [355]:
(y_ - pred_).mean()

tensor(17.1174)

In [356]:
pred_[0], y_[0]

(tensor([4.9106]), tensor([21.]))

In [357]:
r2_score(y_, pred_)

-7.925389579092299

No keras

In [358]:
import tensorflow as tf
import keras 

In [359]:
def modelo(x, weights, bias):
    return tf.matmul(x, weights) + bias

In [360]:
X = X.astype(np.float32)
y = y.astype(np.float32)

In [361]:
X =( X - X.mean(0) ) / X.std(0)

In [362]:
X.shape

(32, 10)

In [363]:
data = tf.data.Dataset.from_tensor_slices((X, y))
train = data\
            .batch( 32 )\
            .shuffle(10,)

In [364]:
optim = keras.optimizers.SGD(learning_rate = 0.01, nesterov=True)
loss_ = keras.losses.mean_squared_error

tf.random.set_seed(1)

weights = tf.random.normal((10, 1), 0, 1, dtype=tf.float32)
weights = tf.Variable(weights)

bias    = tf.zeros(1, dtype=tf.float32)
bias    = tf.Variable(bias)

for epoch in range(100):
    for xbatch, ybatch in train:
        with tf.GradientTape() as tape:
            pred = modelo(xbatch, weights, bias)
            
            loss = tf.reduce_mean(loss_(ybatch, pred))
        gradient = tape.gradient(loss, [weights, bias])
        optim.apply_gradients(
            zip(gradient, [weights, bias])
            )
       
    print(loss.numpy())

447.789
423.4589
401.5676
381.69855
363.52106
346.77194
331.24106
316.75983
303.1925
290.42914
278.38013
266.97192
256.14374
245.84485
236.03255
226.67055
217.72766
209.17685
200.99442
193.15929
185.65274
178.4578
171.55904
164.94229
158.59448
152.50348
146.65793
141.04721
135.66129
130.49069
125.52644
120.76003
116.18333
111.78862
107.56851
103.51596
99.62422
95.88685
92.29765
88.85073
85.54038
82.361176
79.30789
76.37552
73.55926
70.85449
68.256805
65.76193
63.365807
61.06449
58.85427
56.73149
54.692703
52.73458
50.85392
49.04767
47.31285
45.646667
44.04637
42.50937
41.033157
39.61532
38.253548
36.94562
35.6894
34.48285
33.324
32.210953
31.141914
30.11512
29.128902
28.181664
27.271862
26.397999
25.558664
24.752491
23.978157
23.23442
22.520054
21.833897
21.174837
20.541805
19.933762
19.349718
18.788727
18.249868
17.732273
17.235107
16.757545
16.298828
15.858204
15.434954
15.028397
14.637861
14.262725
13.902363
13.556202
13.223678
12.904252
12.597412


In [365]:
pred = modelo(X, weights, bias)

In [366]:
tf.reduce_mean(tf.abs(pred - y)).numpy()

np.float32(2.918015)

In [367]:
r2_score(y, pred.numpy())

0.6503836512565613