## Regresión ridge

In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import Ridge, LinearRegression, Lasso
from numpy.linalg import norm
from sklearn.metrics import mean_squared_error

In [120]:
# Carga el dataset "Boston house prices"
boston_X, boston_y = datasets.load_boston(return_X_y=True)

In [3]:
datasets.load_boston().feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [4]:
boston_X.shape

(506, 13)

In [5]:
pd.DataFrame(boston_X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48


https://scikit-learn.org/stable/datasets/toy_dataset.html#boston-dataset

In [121]:
# dividir los datos en training/testing sets
boston_X_train = boston_X[:-30]
boston_X_test = boston_X[-30:]
# dividir los variables objetivo en training/testing sets
boston_y_train = boston_y[:-30]
boston_y_test = boston_y[-30:]

#### Regresión ridge

In [7]:
class RidgeReg():
  # El hiperparámetro lambda también es conocido como alpha
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.coef_ = []
        
    def fit(self, X, y): 
                        
        X = np.c_[np.ones((X.shape[0], 1)), X]
            
        dimension = X.shape[1]
        I = np.identity(dimension)
        A = self.alpha * I  

        A[0, 0] = 0 # El primer valor de la matriz de penalización se coloca en cero ya que el coeficiente B0 no es penalizado.
        
        self.coef_ = np.linalg.inv(X.T.dot(X) + A).dot(X.T).dot(y)      

    def predict(self, X):
        coef_ = self.coef_
        X= np.c_[np.ones((X.shape[0], 1)), X]
        self.predictions = X.dot(coef_)
        return self.predictions


In [8]:
# Generar instancia del modelo
ridge_model_0 = RidgeReg(alpha=1.0)

# Entrenar el modelo
ridge_model_0.fit(boston_X_train, boston_y_train)

In [9]:
# Coeficientes resultantes
pd.DataFrame(ridge_model_0.coef_)

Unnamed: 0,0
0,31.259196
1,-0.100746
2,0.050296
3,-0.002394
4,2.412094
5,-10.336344
6,3.840713
7,0.002012
8,-1.357286
9,0.349599


In [10]:
ridge_model_0.predict(boston_X_test)

array([20.1801648 , 10.95348286, 18.88827307, 21.72954594, 22.90321109,
       26.66457592, 28.18615492, 20.54039576, 19.13002724, 22.07609688,
       19.33485985, 21.04403486, 10.06290087,  6.18795339,  1.39941164,
       11.99393032, 14.35748055, 20.68964917, 20.60740812, 16.74795614,
       13.79142314, 19.17714625, 21.35444142, 18.42285213, 20.50071118,
       24.12046686, 22.95962431, 28.37445806, 26.86045606, 23.00251574])

#### Comparación con los métodos de la librería sklearn

In [11]:
#Regresión lineal simple
linear_model = LinearRegression(fit_intercept=True)
linear_model.fit(boston_X_train, boston_y_train)

LinearRegression()

In [12]:
#Regresión ridge
ridge_model = Ridge(alpha=1.0, fit_intercept=True)
ridge_model.fit(boston_X_train, boston_y_train)

Ridge()

In [13]:
#Coeficcientes obtenidos por el método lineal
pd.DataFrame(linear_model.coef_)

Unnamed: 0,0
0,-0.105267
1,0.048756
2,0.032235
3,2.535939
4,-17.598329
5,3.80981
6,0.008266
7,-1.448808
8,0.35945
9,-0.015468


In [14]:
#Coeficcientes obtenidos por el método ridge
pd.DataFrame(ridge_model.coef_)

Unnamed: 0,0
0,-0.100746
1,0.050296
2,-0.002394
3,2.412094
4,-10.336344
5,3.840713
6,0.002012
7,-1.357286
8,0.349599
9,-0.016676


In [15]:
# Coeficiente B0 obtenido por el método ridge
ridge_model.intercept_

31.259196061411323

In [16]:
# Suma de los coeficientes elevados al cuadrado para regresión simple
l2_linear = norm(linear_model.coef_)
print(l2_linear**2)

334.0289527626677


In [17]:
# Suma de los coeficientes elevados al cuadrado para regresión ridge
l2_ridge = norm(ridge_model.coef_)
print(l2_ridge**2)

130.38999955633244


In [18]:
# Obtener predicciones del método de regresión simple
boston_y_pred_linear = linear_model.predict(boston_X_test)
boston_y_pred_linear

array([20.64585359, 11.51602122, 19.36811921, 22.11074903, 23.61374782,
       27.40628725, 28.91715005, 21.06299314, 19.35539178, 22.29202739,
       19.80136804, 21.36004923, 10.85100641,  7.08057082,  2.34950905,
       12.8007766 , 15.00324079, 20.38014882, 20.23340043, 16.294177  ,
       13.65233939, 18.94014048, 21.11124687, 18.26554948, 20.35176283,
       23.67204364, 22.58472206, 28.04288838, 26.51152693, 22.62546042])

In [19]:
# Obtener predicciones del método de regresión ridge
boston_y_pred_ridge = ridge_model.predict(boston_X_test)
boston_y_pred_ridge

array([20.1801648 , 10.95348286, 18.88827307, 21.72954594, 22.90321109,
       26.66457592, 28.18615492, 20.54039576, 19.13002724, 22.07609688,
       19.33485985, 21.04403486, 10.06290087,  6.18795339,  1.39941164,
       11.99393032, 14.35748055, 20.68964917, 20.60740812, 16.74795614,
       13.79142314, 19.17714625, 21.35444142, 18.42285213, 20.50071118,
       24.12046686, 22.95962431, 28.37445806, 26.86045606, 23.00251574])

In [20]:
# Obtener los residuos del método de regresión simple
print('Mean squared error linear regression: %.2f'
      % mean_squared_error(boston_y_test, boston_y_pred_linear))

Mean squared error linear regression: 14.21


In [21]:
# Obtener los residuos del método de regresión ridge
print('Mean squared error ridge regression: %.2f'
      % mean_squared_error(boston_y_test, boston_y_pred_ridge))

Mean squared error ridge regression: 14.84


## Regresión Lasso

In [296]:
#Regresión lasso
lasso_model = Lasso(alpha=1.0, fit_intercept=True)
lasso_model.fit(boston_X_train, boston_y_train)

Lasso()

In [297]:
#Coeficcientes obtenidos por el método lineal
pd.DataFrame(lasso_model.coef_)

Unnamed: 0,0
0,-0.060882
1,0.052662
2,-0.0
3,0.0
4,-0.0
5,1.06781
6,0.026713
7,-0.718704
8,0.317049
9,-0.018803


In [298]:
# Obtener predicciones del método de regresión lasso
boston_y_pred_lasso= lasso_model.predict(boston_X_test)
boston_y_pred_lasso

array([18.00922014, 11.13723358, 17.91654458, 21.50727339, 22.31746067,
       25.51562569, 26.43320684, 21.06904433, 19.06574474, 21.83105431,
       19.16224775, 21.28165525, 10.87536634,  6.00229886,  0.93882341,
       11.5238388 , 14.67884784, 21.69590085, 20.38595404, 16.29237152,
       14.43185536, 20.22845845, 21.59718337, 19.63594521, 20.82709483,
       24.19723839, 24.53373515, 28.59914891, 27.51392191, 25.3270111 ])

In [293]:
# Obtener los residuos del método de regresión lasso
print('Mean squared error lasso regression: %.2f'
      % mean_squared_error(boston_y_test, boston_y_pred_lasso))

Mean squared error lasso regression: 19.25


In [308]:
class LassoReg:
    
    def __init__(self,iteration,l1): 
        self.iteration=iteration  #número de iteraciones
        self.l1=l1    #valor de lambda 
        
    
    def fit(self,X,Y):
        

        m,n=X.shape     # m - número de observaciones, n-numero de dimensiones
        self.coef_=np.ones((n,1)) #Inicializar coeficientes
        Y.shape = (m,1)
       
        for v in range(self.iteration):  
            for j in range(n):   #Por cada variable o coeficiente
                 
                Y_pred = self.predict(X)  #Obtener el valor de Y estimado
                residuals = Y-Y_pred     #Calcular los residuos

                x_j = X[:,j].reshape(-1,1)     #Tomar el coeficiente jth para realizar el cálculo
                
                z = x_j.T.dot(x_j)      #Calcular el término z
                rho = np.dot(x_j.T,(residuals+self.coef_[j,:]*x_j))    #Calcular el término rho 

                
                if(rho<-self.l1):   
                    self.coef_[j,:]=(rho+self.l1/2)/z
                elif(rho>self.l1):
                    self.coef_[j,:]=(rho-self.l1/2)/z
                else:
                    self.coef_[j,:]=0
                    
    
    def predict(self, X):
        coef_ = self.coef_
        self.predictions = X.dot(coef_)
        return self.predictions


In [309]:
lasso_model_0 = LassoReg(400,1.0)
lasso_model_0.fit(boston_X_train, boston_y_train)

In [310]:
pd.DataFrame(lasso_model_0.coef_)

Unnamed: 0,0
0,-0.099934
1,0.03586
2,0.009238
3,2.587623
4,-6.34044
5,6.23392
6,0.017095
7,-0.705257
8,0.206777
9,-0.008458


In [311]:
# Obtener predicciones del método de regresión lasso
boston_y_pred_lasso_0= lasso_model_0.predict(boston_X_test)
boston_y_pred_lasso_0

array([[21.9687076 ],
       [10.37168838],
       [19.782007  ],
       [21.90788173],
       [23.17088205],
       [27.88136528],
       [30.15566611],
       [19.67530999],
       [18.7532593 ],
       [22.8379077 ],
       [20.27629378],
       [20.40325984],
       [12.26543928],
       [ 8.805101  ],
       [ 3.83772536],
       [15.5659004 ],
       [17.3425114 ],
       [19.09848563],
       [19.54984507],
       [15.58675308],
       [13.04651029],
       [18.61771361],
       [20.80276036],
       [17.15742626],
       [20.40528713],
       [24.92053971],
       [22.57018467],
       [29.77622835],
       [28.02765328],
       [22.46361392]])

In [312]:
# Obtener los residuos del método de regresión lasso
print('Mean squared error lasso regression: %.2f'
      % mean_squared_error(boston_y_test, boston_y_pred_lasso_0))

Mean squared error lasso regression: 16.46
