In [32]:
from lasso_mlr import perform_lasso_mlr
import pandas as pd
import plotly.graph_objects as go

In [33]:
df = pd.read_csv("../processed_tables/merged_standardized.csv")

## Sacamos las variables exógenas

In [34]:
lag_columns = df.filter(like="lag_")
price = df[["Date", "energy_price"]]
df = pd.concat([price, lag_columns], axis=1)
df.head()

Unnamed: 0,Date,energy_price
0,2021-10-01,216.929726
1,2021-10-02,256.940174
2,2021-10-03,282.065065
3,2021-10-04,286.526116
4,2021-10-05,278.157325


## Regresión lineal con Lasso

Esta vez utilizaremos `sklearn` para hacer la regresión Lasso y poder ver qué coeficientes son cero.


In [35]:
for i in range(1, 10):
    print(f"Lags: {i}")
    perform_lasso_mlr({ "df": df, "lags": i })
    print("-----------------------")

Lags: 1


Best parameters found: {'alpha': 206.913808111479}
Best cross-validation score: 0.9355698036817355
Mean Squared Error on Test Data: 4803.445749029219
energy_price_lag_1: 0.9656740502995752
-----------------------
Lags: 2
Best parameters found: {'alpha': 4.281332398719396}
Best cross-validation score: 0.9386516725010919
Mean Squared Error on Test Data: 4543.376056829038
energy_price_lag_1: 1.100372648462993
energy_price_lag_2: -0.1321051419324675
-----------------------
Lags: 3
Best parameters found: {'alpha': 4.281332398719396}
Best cross-validation score: 0.940924245016036
Mean Squared Error on Test Data: 4532.002384450109
energy_price_lag_1: 1.0792803619451377
energy_price_lag_2: 0.06410894229918909
energy_price_lag_3: -0.18187404105984578
-----------------------
Lags: 4
Best parameters found: {'alpha': 4.281332398719396}
Best cross-validation score: 0.9427036570621518
Mean Squared Error on Test Data: 4531.334315041626
energy_price_lag_1: 1.0794672188396242
energy_price_lag_2: 0.0640

Vemos que el mejor valor para el lag es de 4.

In [36]:
results = perform_lasso_mlr({ "df": df, "lags": 4 })
y_pred = results["y_pred"]
y_test = results["y_test"]
X_test_dates = results["X_test_dates"]

Best parameters found: {'alpha': 4.281332398719396}
Best cross-validation score: 0.9440709542708168
Mean Squared Error on Test Data: 4620.442159078567
energy_price_lag_1: 1.0849724956667026
energy_price_lag_2: 0.04446935117746335
energy_price_lag_3: -0.19116400435567396
energy_price_lag_4: 0.11528134068306972
energy_price_lag_5: -0.09639979220942355
energy_price_lag_7: 0.10615445784993605
energy_price_lag_8: -0.18884423117115137
energy_price_lag_9: 0.0816177991102726


In [37]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=X_test_dates, y=y_test, mode='lines', name='Valores reales'))
fig.add_trace(go.Scatter(x=X_test_dates, y=y_pred, mode='lines', name='Valores predichos'))
fig.show()