In [17]:
import pandas as pd
import numpy as np
import datetime
import os

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import PolynomialFeatures

import joblib

%config Completer.use_jedi = False
%matplotlib inline  

daily_data_path = "Data/London/daily_dataset/daily_dataset/"
daily_weather_path = 'Data/London/weather_daily_darksky.csv'
f_energy_all_name = "energy_all.csv"
f_energy_clean_name = "energy_clean"
f_energy_avg_name = "energy_avg"
f_energy_avg_all_file = "energy_avg_all.csv"

mode_path = "Modes/"

In [3]:
dtypes_in = {'avg_energy':'float32', 'day': 'int8', 'month': 'int8', 'temperatureMax': 'float16'}
energy_data = pd.read_csv(f_energy_avg_name+'.csv', dtype=dtypes_in)

energy = energy_data.drop('avg_energy', axis=1)
energy_labels = energy_data['avg_energy'].copy()

In [4]:
class AttributesAdder(BaseEstimator, TransformerMixin):
    def __init__(self, add_house_income_cat=True): # no *args or **kargs
        self.add_house_income_cat = add_house_income_cat
    def fit(self, X, y=None):
        return self  # nothing else to do
    def transform(self, X):
        return X
    
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [5]:
pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="median")),
        ('attribs_adder', AttributesAdder()),
        ('std_scaler', StandardScaler()),
    ])

energy_tr = pipeline.fit_transform(energy)

# Testing SVM models

In [26]:
poly_features = PolynomialFeatures(degree=3, include_bias=False)
energy_poly = poly_features.fit_transform(energy_tr)

In [11]:
svm_reg = SVR(kernel="rbf", degree=2, C=100, epsilon=0.1)
scores = cross_val_score(svm_reg, energy_tr, energy_labels, scoring="neg_mean_squared_error", cv=10)
svm_rmse_scores = np.sqrt(-scores)
display_scores(svm_rmse_scores)

Scores: [1.56575636 1.09481928 0.56085929 0.42142872 0.83019566 1.55774541
 0.65075733 0.38113112 0.48950624 1.679208  ]
Mean: 0.9231407418824442
Standard deviation: 0.4862506315391586


In [13]:
svm_reg = SVR(kernel="rbf", degree=2, C=100, epsilon=0.1)
scores = cross_val_score(svm_reg, energy_poly, energy_labels, scoring="neg_mean_squared_error", cv=10)
svm_rmse_scores = np.sqrt(-scores)
display_scores(svm_rmse_scores)

Scores: [1.62841232 1.18906117 0.58618282 0.43451391 0.88959102 2.67431348
 0.6684     0.65713298 0.54903435 1.71524481]
Mean: 1.0991886879787425
Standard deviation: 0.6768499587008127


In [20]:
param_grid = {
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'C': [1, 10, 100],
    'epsilon':[10,1,0.1,0.01]
}

svm_reg = SVR()

grid_search_svm = GridSearchCV(svm_reg, param_grid, cv=5, scoring='neg_mean_squared_error', return_train_score=True, n_jobs=-1)
grid_search_svm.fit(energy_tr, energy_labels)

GridSearchCV(cv=5, estimator=SVR(), n_jobs=-1,
             param_grid={'C': [1, 10, 100], 'epsilon': [10, 1, 0.1, 0.01],
                         'kernel': ['linear', 'rbf', 'sigmoid']},
             return_train_score=True, scoring='neg_mean_squared_error')

In [23]:
print('Best Score: %s' % np.sqrt(-grid_search_svm.best_score_))
print('Best Hyperparameters: %s' % grid_search_svm.best_params_)

Best Score: 0.9277103132094748
Best Hyperparameters: {'C': 1, 'epsilon': 0.01, 'kernel': 'rbf'}


In [27]:
grid_search_svm_poly = GridSearchCV(svm_reg, param_grid, cv=5, scoring='neg_mean_squared_error', return_train_score=True, n_jobs=-1)
grid_search_svm_poly.fit(energy_poly, energy_labels)

GridSearchCV(cv=5, estimator=SVR(), n_jobs=-1,
             param_grid={'C': [1, 10, 100], 'epsilon': [10, 1, 0.1, 0.01],
                         'kernel': ['linear', 'rbf', 'sigmoid']},
             return_train_score=True, scoring='neg_mean_squared_error')

In [25]:
print('Best Score: %s' % np.sqrt(-grid_search_svm.best_score_))
print('Best Hyperparameters: %s' % grid_search_svm.best_params_)

Best Score: 0.8910818601140648
Best Hyperparameters: {'C': 100, 'epsilon': 0.01, 'kernel': 'linear'}


In [28]:
print('Best Score: %s' % np.sqrt(-grid_search_svm_poly.best_score_))
print('Best Hyperparameters: %s' % grid_search_svm_poly.best_params_)

Best Score: 0.8978551761623954
Best Hyperparameters: {'C': 1, 'epsilon': 0.01, 'kernel': 'linear'}
