In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import itertools
import matplotlib.pyplot as plt
from sklearn.preprocessing import *
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

# Data DL

In [2]:
import data_preprocessing

train, test = data_preprocessing.preprocess_data("Energy Consumption Dataset.csv")

In [3]:
def split_x_y(df):
    y = df["Energy Consumption"]
    X = df.drop("Energy Consumption", axis=1)
    return X, y

In [4]:
X_train, y_train = split_x_y(train)
X_test, y_test = split_x_y(test)

In [5]:
def test_regressor(pipe):
    y_pred = pipe.fit(X_train, y_train).predict(X_test)
    return mean_squared_error(y_true=y_test, y_pred=y_pred, squared=False)

In [6]:
pipe = Pipeline([("Regressor", LinearRegression())])
test_regressor(pipe)

18152.20759372999

In [7]:
pipe = Pipeline([("Regressor", Lasso())])
params = {"Regressor__alpha": np.arange(1,20, 0.1)}
reg = GridSearchCV(pipe, params, cv=2, verbose=3, scoring="neg_mean_squared_error")
reg.fit(X_train, y_train)
reg.best_params_

Fitting 2 folds for each of 190 candidates, totalling 380 fits
[CV 1/2] END .....Regressor__alpha=1.0;, score=-132049773.424 total time=   1.7s
[CV 2/2] END .....Regressor__alpha=1.0;, score=-140127389.104 total time=   1.7s
[CV 1/2] END .....Regressor__alpha=1.1;, score=-132030252.057 total time=   2.1s
[CV 2/2] END .....Regressor__alpha=1.1;, score=-140130633.053 total time=   1.9s
[CV 1/2] END Regressor__alpha=1.2000000000000002;, score=-132011307.982 total time=   1.9s
[CV 2/2] END Regressor__alpha=1.2000000000000002;, score=-140134097.675 total time=   1.8s
[CV 1/2] END Regressor__alpha=1.3000000000000003;, score=-131992947.606 total time=   1.0s
[CV 2/2] END Regressor__alpha=1.3000000000000003;, score=-140137783.134 total time=   0.7s
[CV 1/2] END Regressor__alpha=1.4000000000000004;, score=-131975164.722 total time=   0.7s
[CV 2/2] END Regressor__alpha=1.4000000000000004;, score=-140141689.616 total time=   0.7s
[CV 1/2] END Regressor__alpha=1.5000000000000004;, score=-131958593

{'Regressor__alpha': 6.300000000000004}

In [10]:
pipe = Pipeline([("Regressor", Lasso(alpha=6.3))])
test_regressor(pipe)

10121.04361884465

In [20]:
pipe = Pipeline([("Regressor", Ridge(alpha=100))])
test_regressor(pipe)

10429.648022335967