In [16]:
import pandas as pd
import numpy as np

import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures
from scipy.stats import norm

from energy_consumption.help_functions import get_energy_data, dummy_mapping, handle_outstanding_dp, get_forecast_timestamps, create_submission_frame

In [2]:
energydata = get_energy_data.get_data()


100%|██████████| 102/102 [01:36<00:00,  1.05it/s]


In [3]:
# get dummies
energydata = dummy_mapping.get_mappings(energydata)

# quantile regression data
y_ec = energydata['energy_consumption']    
X_ec = energydata.drop(
    columns=['energy_consumption'])
X_ec = sm.add_constant(X_ec, has_constant="add")

# include interaction terms
poly_input = PolynomialFeatures(interaction_only=True, include_bias=False)    
X_interaction = poly_input.fit_transform(X_ec)

Lasso regression to see which interaction terms to include

In [7]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_interaction, y_ec, test_size=0.2, random_state=42)

# Lasso Regression
lasso = Lasso()
parameters = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
lasso_cv = GridSearchCV(
    lasso, parameters, scoring='neg_mean_squared_error', cv=5)
lasso_cv.fit(X_train, y_train)

# Ridge Regression
ridge = Ridge()
parameters = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
ridge_cv = GridSearchCV(
    ridge, parameters, scoring='neg_mean_squared_error', cv=5)
ridge_cv.fit(X_train, y_train)

print("Lasso best alpha:", lasso_cv.best_params_['alpha'])
print("Ridge best alpha:", ridge_cv.best_params_['alpha'])

Lasso best alpha: 0.001
Ridge best alpha: 1


In [10]:
# best alpha = 0.001
# Train the final Ridge model
final_ridge_model = Ridge(alpha=1)
final_ridge_model.fit(X_train, y_train)
final_lasso_model = Lasso(alpha=0.001)
final_lasso_model.fit(X_train, y_train)

ridge_predictions = final_ridge_model.predict(X_test)
lasso_predictions = final_lasso_model.predict(X_test)

print("Ridge MSE:", mean_squared_error(y_test, ridge_predictions))
print("Lasso MSE:", mean_squared_error(y_test, lasso_predictions))

print("Ridge R-squared:", r2_score(y_test, ridge_predictions))
print("Lasso R-squared:", r2_score(y_test, lasso_predictions))

Ridge MSE: 40.25381462393384
Lasso MSE: 40.256890471954584
Ridge R-squared: 0.5509094963411375
Lasso R-squared: 0.5508751807327978


Both perform well --> Use Lasso since its more "State-of-the-art"

In [15]:
lasso_predictions

array([41.79213777, 54.38432277, 65.26871332, ..., 50.72383265,
       57.64757856, 52.66868956])

In [11]:
coefficient_summary = { 
    final_lasso_model.coef_

array([ 0.        ,  6.94399521,  3.14395605,  1.41762974,  0.        ,
        8.01187088,  1.18842084, -6.39048859,  5.53509663,  0.36281287,
        2.92382257,  9.94147023, -5.38539594,  1.61784196,  1.17902223,
        0.78093919,  0.        ,  2.29415089,  2.34844043, -0.        ,
        2.44837177,  0.63161457,  2.41456359,  1.33889014, -0.37297866,
        0.        ,  0.        ,  0.        , -0.        , -0.        ,
       -0.27363192,  1.14785783,  0.12797521,  0.72275381, -0.12787228,
       -6.85469507,  0.        ,  0.        , -0.65090629,  0.46318139,
       -0.01805078, -0.65240003,  0.46659967,  0.35895686, -0.3022292 ,
       -3.91474295,  0.        ,  0.34400786,  0.53240499, -0.41679695,
        1.06468686,  0.64254106,  1.40080312,  0.92696576, -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        , -3.78

In [6]:
final_lasso_model.summary()

AttributeError: 'Lasso' object has no attribute 'summary'

In [None]:
indexes = [47, 51, 55, 71, 75, 79]
# create dataframe to store forecast quantiles
energyforecast = get_forecast_timestamps.forecast_timestamps(
    energydata.index[-1])
energyforecast = dummy_mapping.get_mappings(energyforecast)
X_fc = sm.add_constant(energyforecast, has_constant='add')

poly_forecast = PolynomialFeatures(
    interaction_only=True, include_bias=False)
X_fc_interaction = poly_forecast.fit_transform(X_fc)

# model
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
model_qr = sm.QuantReg(y_ec, X_interaction)

for q in quantiles:
    model_temp = model_qr.fit(q=q)
    forecast_temp = model_temp.predict(X_fc_interaction)
    energyforecast[f'forecast{q}'] = forecast_temp

selected_forecasts = energyforecast.loc[energyforecast.index[indexes],
                                        'forecast0.025':'forecast0.975']
selected_forecasts_frame = create_submission_frame.get_frame(
    selected_forecasts)
selected_forecasts
model_qr.fit(q=0.025).summary()