In [36]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import itertools
import matplotlib.pyplot as plt
from sklearn.preprocessing import *
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

### Import dataset from prepared csv file and preprocess it

In [37]:
import data_preprocessing

train, test = data_preprocessing.preprocess_data("Residual Load Dataset.csv")

### Prepare data for applying and testing regression models

In [38]:
# Function to seperate influencers and target variable within the dataset
def split_x_y(df):
    y = df["Residual Load"]
    X = df.drop("Residual Load", axis=1)
    return X, y

In [39]:
X_train, y_train = split_x_y(train)
X_test, y_test = split_x_y(test)

### Definition of testing function based on test dataset

In [40]:
def test_regressor(pipe):
    y_pred = pipe.fit(X_train, y_train).predict(X_test)
    return mean_squared_error(y_true=y_test, y_pred=y_pred, squared=False)

### Application of Linear regression

In [41]:
pipe = Pipeline([("Regressor", LinearRegression())])
test_regressor(pipe)

18152.20759372999

### Preperation of hyperparameter tuning for Lasso/Ridge regression

In [42]:
from sklearn.model_selection import *

tscv = TimeSeriesSplit(5)

### Application of Lasso Regression

In [51]:
pipe = Pipeline([("Regressor", Lasso())])
params = {"Regressor__alpha": np.arange(0.1,20, 0.1)}
reg = GridSearchCV(pipe, params, cv=tscv, scoring="neg_mean_squared_error", n_jobs=-1)
reg.fit(X_train, y_train)
reg.best_params_

{'Regressor__alpha': 2.7}

In [52]:
pipe = Pipeline([("Regressor", Lasso(alpha=reg.best_params_["Regressor__alpha"]))])
test_regressor(pipe)

10135.211154709661

### Application of Ridge regression

In [57]:
pipe = Pipeline([("Regressor", Ridge())])
params = {"Regressor__alpha": np.arange(0.1,20, 0.1)}
reg = GridSearchCV(pipe, params, cv=tscv, scoring="neg_mean_squared_error", n_jobs=-1)
reg.fit(X_train, y_train)
reg.best_params_

{'Regressor__alpha': 1.2000000000000002}

In [58]:
pipe = Pipeline([("Regressor", Ridge(alpha=reg.best_params_["Regressor__alpha"]))])
test_regressor(pipe)

16882.450715523573

Best alpha of earlier grid searches: alpha = 10, RMSE = 12854.15