In [7]:
import pandas as pd
from lasso_mlr import perform_lasso_mlr
import warnings
from sklearn.exceptions import ConvergenceWarning
import plotly.graph_objects as go
import plotly.express as px

# Filter out ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [8]:
df = pd.read_csv("../processed_tables/merged_without_lags_represas_criterio.csv")
df.shape

(644, 34)

## Regresión lineal con Lasso

Esta vez utilizaremos `sklearn` para hacer la regresión Lasso y poder ver qué coeficientes son cero.


In [9]:
results = perform_lasso_mlr({ "df": df })
y_pred = results["y_pred"]
y_pred_1=y_pred[1:]
y_test = results["y_test"]
X_test_dates = results["X_test_dates"]
X_train_dates=results["X_train_dates"]
y_train=results["y_train"]
y_pred_train=results["y_pred_train"]
lasso_features=results["lasso_features"]

Best parameters found: {'alpha': 4.281332398719396}
Best cross-validation score: 0.27598717336032197
Mean Squared Error on Test Data: 220124.79349171164
Mean Absolute Percentage Error on Test Data: 0.489248210544515
precipitacion_amazonas: 9.36547740603043
precipitacion_arauca: -5.359373008797834
precipitacion_atlantico: -7.075255779036906
precipitacion_bolivar: -2.6737198658459165
precipitacion_caqueta: 0.3157078280848675
precipitacion_casanare: 10.02496585557462
precipitacion_cesar: 1.3114811517546177
precipitacion_guainia: -4.174453503835209
precipitacion_guaviare: 4.7566194496733205
precipitacion_la guajira: -9.113666057270432
precipitacion_norte de santander: -10.089764236364344
precipitacion_san andres providencia: -2.5308557419791704
precipitacion_sucre: -14.095275394450463
precipitacion_vaupes: 8.685909923259633
temp_ARAUCA: -16.26370911345746
temp_ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA: -0.7740034096108981
temp_CAQUETA: 2.5712599529294273
temp_CHOCO: 6.7670855

In [10]:
len(list(lasso_features))

22

### Matriz de Correlación de vairables seleccionadas por el modelo

In [11]:
df[list(lasso_features)].corr().style.background_gradient(cmap="coolwarm")

Unnamed: 0,precipitacion_amazonas,precipitacion_arauca,precipitacion_atlantico,precipitacion_bolivar,precipitacion_caqueta,precipitacion_casanare,precipitacion_cesar,precipitacion_guainia,precipitacion_guaviare,precipitacion_la guajira,precipitacion_norte de santander,precipitacion_san andres providencia,precipitacion_sucre,precipitacion_vaupes,temp_ARAUCA,temp_ARCHIPIELAGO DE SAN ANDRES PROVIDENCIA Y SANTA CATALINA,temp_CAQUETA,temp_CHOCO,temp_NARINO,temp_VICHADA,temp_avg_represas,brent_value
precipitacion_amazonas,1.0,-0.066088,-0.075014,-0.033369,-0.022714,-0.013511,-0.015286,-0.059546,0.043675,-0.105463,0.046852,-0.062482,0.008736,0.0032,0.073518,-0.060093,0.055474,0.127459,0.101894,-0.016458,0.036399,0.01518
precipitacion_arauca,-0.066088,1.0,0.01771,0.247388,0.209292,0.208583,0.061164,0.107836,0.117684,0.039089,-0.047415,0.142209,0.190068,0.13098,0.380051,0.185561,-0.2081,-0.109112,0.084866,0.022685,-0.065212,0.034924
precipitacion_atlantico,-0.075014,0.01771,1.0,0.119168,0.019529,0.052842,0.036497,0.116575,0.119205,0.230301,0.053496,0.137871,0.114213,-0.050822,-0.045853,0.117832,0.070169,-0.181007,0.041105,0.095447,-0.082236,0.115467
precipitacion_bolivar,-0.033369,0.247388,0.119168,1.0,0.063806,0.091918,0.14274,0.073121,0.16794,0.13825,0.041052,0.120488,0.340527,0.145462,0.158988,0.003807,-0.07708,-0.152417,0.043375,0.036608,-0.199072,0.191135
precipitacion_caqueta,-0.022714,0.209292,0.019529,0.063806,1.0,0.229139,0.085191,0.146477,0.22314,-0.008876,-0.000406,0.127199,0.066551,0.07925,0.012223,0.01824,-0.298821,-0.103962,-0.056841,-0.034095,-0.112341,0.051083
precipitacion_casanare,-0.013511,0.208583,0.052842,0.091918,0.229139,1.0,0.018447,0.141937,0.223296,0.023034,-0.126998,0.042577,0.094089,0.109686,-0.103097,0.146754,-0.163715,0.025559,-0.037587,-0.077683,0.050068,0.006283
precipitacion_cesar,-0.015286,0.061164,0.036497,0.14274,0.085191,0.018447,1.0,0.095596,0.168497,0.043014,0.23075,0.002816,0.04924,-0.01747,0.042398,-0.03367,-0.068896,-0.077214,-0.155227,0.03743,-0.149193,0.204421
precipitacion_guainia,-0.059546,0.107836,0.116575,0.073121,0.146477,0.141937,0.095596,1.0,0.114822,0.00067,-0.02539,0.113545,0.065659,0.040606,0.050947,0.015437,-0.15165,-0.020749,-0.018777,-0.04557,-0.038997,0.134976
precipitacion_guaviare,0.043675,0.117684,0.119205,0.16794,0.22314,0.223296,0.168497,0.114822,1.0,0.033942,0.070243,0.133772,0.192362,0.126658,-0.017161,-0.01876,-0.165468,-0.020139,0.021267,-0.112902,-0.15869,0.183881
precipitacion_la guajira,-0.105463,0.039089,0.230301,0.13825,-0.008876,0.023034,0.043014,0.00067,0.033942,1.0,0.113244,0.046858,0.034341,-0.026851,0.005444,0.0845,0.035378,-0.128843,0.032113,0.081599,-0.12044,0.154511


In [12]:
fig = px.line(x=X_train_dates, y=y_train,title="Forecasting of Energy Price with Lasso MLR")
fig.update_layout(xaxis_title='Date', yaxis_title='Average Energy Price')
fig.add_trace(go.Scatter(x=X_train_dates, y=y_pred_train, mode='lines', name='Valores predichos de train'))
fig.add_trace(go.Scatter(x=X_test_dates, y=y_test, mode='lines', name='Valores reales de test'))
fig.add_trace(go.Scatter(x=X_test_dates, y=y_pred, mode='lines', name='Valores predichos de test'))
fig.add_trace(go.Scatter(x=X_test_dates, y=y_pred_1, mode='lines', name='Valores predichos de test -1'))
fig.show()