<a href="https://colab.research.google.com/github/rmonterof/Scikit-learn/blob/main/1_7_Regresi%C3%B3n_m%C3%BAltiple_regularizada_inmuebles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regresión regularizada

Ejemplo con los distintos algoritmos de regularización de regresión: Lasso, Ridge, ElasticNet

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet, LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.model_selection import GridSearchCV
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.dpi'] = 110

In [14]:
df = pd.read_csv("/content/inmuebles.csv")

In [15]:
df.head()

Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
0,141.0,31770,6,5,1960,1960,112.0,639.0,0.0,441.0,...,0,0,0,0,1,0,0,0,1,0
1,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,...,0,0,0,0,1,0,0,0,1,0
2,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,...,0,0,0,0,1,0,0,0,1,0
3,93.0,11160,7,5,1968,1968,0.0,1065.0,0.0,1045.0,...,0,0,0,0,1,0,0,0,1,0
4,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,...,0,0,0,0,1,0,0,0,1,0


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2925 entries, 0 to 2924
Columns: 274 entries, Lot Frontage to Sale Condition_Partial
dtypes: float64(11), int64(263)
memory usage: 6.1 MB


In [17]:
df.head()

Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
0,141.0,31770,6,5,1960,1960,112.0,639.0,0.0,441.0,...,0,0,0,0,1,0,0,0,1,0
1,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,...,0,0,0,0,1,0,0,0,1,0
2,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,...,0,0,0,0,1,0,0,0,1,0
3,93.0,11160,7,5,1968,1968,0.0,1065.0,0.0,1045.0,...,0,0,0,0,1,0,0,0,1,0
4,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,...,0,0,0,0,1,0,0,0,1,0


In [18]:
df['SalePrice'][:10]

Unnamed: 0,SalePrice
0,215000
1,105000
2,172000
3,244000
4,189900
5,195500
6,213500
7,191500
8,236500
9,189000


In [19]:
X = df.drop('SalePrice',axis=1)
y = df['SalePrice']

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [21]:
# scaler = StandardScaler()
# scaled_X_train = scaler.fit_transform(X_train)
# scaled_X_test = scaler.transform(X_test)

In [22]:
df_results = pd.DataFrame(columns=['model_name', 'train_r2', 'test_r2', 'MAE', 'RMSE'])

## 1. Regresión lineal

In [23]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2_score(y_test, y_pred)

0.9318542823751907

In [24]:
model1_train_r2 = round(model.score(X_train, y_train), 4)
model1_test_r2 = round(model.score(X_test, y_test), 4)
model1_mae = round(mean_absolute_error(y_test, y_pred), 4)
model1_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [25]:
df_results.loc[len(df_results)] = ['regresion_lineal', model1_train_r2, model1_test_r2, model1_mae, model1_rmse]
df_results.head()

Unnamed: 0,model_name,train_r2,test_r2,MAE,RMSE
0,regresion_lineal,0.9401,0.9319,14680.9039,21492.012803


## 2. Regresión Lasso (L1)

In [26]:
from sklearn.linear_model import Lasso

# model = Lasso(alpha=1, max_iter=20000)
model = Lasso(alpha=20)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

model2_train_r2 = round(model.score(X_train, y_train), 4)
model2_test_r2 = round(model.score(X_test, y_test), 4)
model2_mae = round(mean_absolute_error(y_test, y_pred), 4)
model2_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [27]:
df_results.loc[len(df_results)] = ['regresion_lasso', model2_train_r2, model2_test_r2, model2_mae, model2_rmse]
df_results.head()

Unnamed: 0,model_name,train_r2,test_r2,MAE,RMSE
0,regresion_lineal,0.9401,0.9319,14680.9039,21492.012803
1,regresion_lasso,0.9366,0.935,14273.1921,20992.361746


## 3. Regresión Ridge (L2)

In [28]:
from sklearn.linear_model import Ridge

model = Ridge(alpha=15)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2_score(y_test, y_pred)

model3_train_r2 = round(model.score(X_train, y_train), 4)
model3_test_r2 = round(model.score(X_test, y_test), 4)
model3_mae = round(mean_absolute_error(y_test, y_pred), 4)
model3_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [29]:
df_results.loc[len(df_results)] = ['regresion_ridge', model3_train_r2, model3_test_r2, model3_mae, model3_rmse]
df_results.head()

Unnamed: 0,model_name,train_r2,test_r2,MAE,RMSE
0,regresion_lineal,0.9401,0.9319,14680.9039,21492.012803
1,regresion_lasso,0.9366,0.935,14273.1921,20992.361746
2,regresion_ridge,0.932,0.9346,14256.8798,21052.437435


## 3. Regresión ElasticNet (L1 + L2)

In [30]:
model = ElasticNet(alpha=2, max_iter=20000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2_score(y_test,y_pred)

model4_train_r2 = round(model.score(X_train, y_train), 4)
model4_test_r2 = round(model.score(X_test, y_test), 4)
model4_mae = round(mean_absolute_error(y_test, y_pred), 4)
model4_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

  model = cd_fast.enet_coordinate_descent(


In [31]:
df_results.loc[len(df_results)] = ['regresion_elasticnet', model4_train_r2, model4_test_r2, model4_mae, model4_rmse]
df_results.head()

Unnamed: 0,model_name,train_r2,test_r2,MAE,RMSE
0,regresion_lineal,0.9401,0.9319,14680.9039,21492.012803
1,regresion_lasso,0.9366,0.935,14273.1921,20992.361746
2,regresion_ridge,0.932,0.9346,14256.8798,21052.437435
3,regresion_elasticnet,0.8713,0.8809,18525.2646,28415.054853


## Hiperparámetros

In [32]:
# param_grid = {'alpha':[0.01, 0.1,1,5,10,20, 50,100],
#               'l1_ratio':[.1, .5, .7, .9, .95, .99, 1]}
param_grid = {'alpha':[0.01, 0.1,1,5,10,20, 50,100]}

In [33]:
base_elastic_model = ElasticNet()
grid_model = GridSearchCV(estimator=base_elastic_model,
                          param_grid=param_grid,
                          cv=5)
grid_model.fit(X_train,y_train)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c

In [34]:
grid_model.best_params_

{'alpha': 0.01}

In [35]:
y_pred = grid_model.predict(X_test)

In [36]:
model5_train_r2 = round(grid_model.score(X_train, y_train), 4)
model5_test_r2 = round(grid_model.score(X_test, y_test), 4)
model5_mae = round(mean_absolute_error(y_test, y_pred), 4)
model5_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [37]:
df_results.loc[len(df_results)] = ['regresion_elasticnet_hp', model5_train_r2, model5_test_r2, model5_mae, model5_rmse]
df_results.head()

Unnamed: 0,model_name,train_r2,test_r2,MAE,RMSE
0,regresion_lineal,0.9401,0.9319,14680.9039,21492.012803
1,regresion_lasso,0.9366,0.935,14273.1921,20992.361746
2,regresion_ridge,0.932,0.9346,14256.8798,21052.437435
3,regresion_elasticnet,0.8713,0.8809,18525.2646,28415.054853
4,regresion_elasticnet_hp,0.9334,0.9348,14239.7521,21016.346898
