In [56]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import itertools
import matplotlib.pyplot as plt
from sklearn.preprocessing import *
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV

# Data DL

In [57]:
import data_preprocessing

train, test = data_preprocessing.preprocess_data("Energy Consumption Dataset.csv")

In [58]:
def split_x_y(df):
    y = df["Energy Consumption"]
    X = df.drop("Energy Consumption", axis=1)
    return X, y

In [59]:
X_train, y_train = split_x_y(train)
X_test, y_test = split_x_y(test)

In [60]:
def test_regressor(pipe):
    y_pred = pipe.fit(X_train, y_train).predict(X_test)
    return mean_squared_error(y_true=y_test, y_pred=y_pred, squared=False)

In [61]:
pipe = Pipeline([("Regressor", LinearRegression())])
test_regressor(pipe)

18152.20759372999

In [77]:
pipe = Pipeline([("Regressor", Lasso())])
params = {"Regressor__alpha": np.arange(1,20, 1)}
reg = GridSearchCV(pipe, params, cv=2, verbose=3, scoring="neg_mean_squared_error")
reg.fit(X_train, y_train)
reg.best_params_

Fitting 2 folds for each of 19 candidates, totalling 38 fits
[CV 1/2] END .......Regressor__alpha=1;, score=-132049773.424 total time=   0.3s
[CV 2/2] END .......Regressor__alpha=1;, score=-140127389.104 total time=   0.7s
[CV 1/2] END .......Regressor__alpha=2;, score=-131890866.037 total time=   0.4s
[CV 2/2] END .......Regressor__alpha=2;, score=-140169748.394 total time=   0.4s
[CV 1/2] END .......Regressor__alpha=3;, score=-131753522.800 total time=   0.4s
[CV 2/2] END .......Regressor__alpha=3;, score=-140216893.973 total time=   0.9s
[CV 1/2] END .......Regressor__alpha=4;, score=-131612420.359 total time=   0.4s
[CV 2/2] END .......Regressor__alpha=4;, score=-140257923.473 total time=   0.4s
[CV 1/2] END .......Regressor__alpha=5;, score=-131469857.795 total time=   0.3s
[CV 2/2] END .......Regressor__alpha=5;, score=-140290376.705 total time=   0.5s
[CV 1/2] END .......Regressor__alpha=6;, score=-131329582.599 total time=   0.3s
[CV 2/2] END .......Regressor__alpha=6;, score=-

{'Regressor__alpha': 7}

In [78]:
pipe = Pipeline([("Regressor", Lasso(alpha=7))])
test_regressor(pipe)

10119.109240215745

In [75]:
pipe = Pipeline([("Regressor", Ridge())])
test_regressor(pipe)

17070.692535566843