In [7]:
import csv
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import learning_curve
from sklearn.kernel_ridge import KernelRidge
import os
import json
import import_ipynb

from data_processor import DataLoader
import time
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
from plotly.offline import iplot
import plotly as py
import plotly.graph_objs as go

py.offline.init_notebook_mode(connected=True)
import math



In [103]:
# configs = json.load(open('config.json', 'r'))

data = DataLoader(
        os.path.join('data', "Sodermalmsallen4-6.csv"),
        colsX=["hour","one_hour_before","weekday","two_hour_before","three_hour_before","7days_before"],
        colsY=["Consumption"],
        split=0.9,
        Root=1/10
    )

train_X, test_X = data.get_norX_data()
train_Y, test_Y = data.get_norY_data()
train_Y = train_Y.ravel()
test_Y = test_Y.ravel()

In [98]:
def plot_results(predicted_data, true_data,pre_time,type_,mse):
    pre_time = pd.to_datetime(pre_time)

    trace1 = go.Scatter(x=pre_time,
                        y=true_data,
                        mode='lines',
                        name='True',
                        hoverinfo='name',
                        line=dict(
                                    shape='spline'
                                 )
                        )
    
    trace2 = go.Scatter(x=pre_time,
                        y=predicted_data,
                        mode='lines',
                        name='Prediction',
                        hoverinfo='name',
                        line=dict(
                                    shape='spline'
                                 )
                        )
    data = [trace1, trace2]
    layout = go.Layout(title = type_ +':Prediction & True',
                     annotations=[
                                  dict(
                                      x=pre_time[int(len(pre_time)*0.2)],
                                      y=max(true_data)+10,
                                      xref='x',
                                      yref='y',
                                      text='MSE:'+str(mse),
                                      font=dict(
                                          size=18
                                      ),
                                      showarrow=False
                                  )
                                ],
                   xaxis = dict(title = 'timestamp')
                  )
    fig = go.Figure(data=data, layout=layout)
    py.offline.iplot(fig)
    

**SVM**

In [55]:
svr = GridSearchCV(SVR(kernel='rbf', gamma=0.1),cv=5, param_grid={'C':[1e0, 1e1,1e2,1e3],
                                                                  'gamma':np.logspace(-2,2,5)})
t0 = time.time()
svr.fit(train_X, train_Y)
svr_fit = time.time() - t0
print("SVR complexity and bandwidth selected and model fitted in %.3f s"
      % svr_fit)

sv_ratio = svr.best_estimator_.support_.shape[0] / train_X.shape[0]
print("Support vector ratio: %.3f" % sv_ratio)

SVR complexity and bandwidth selected and model fitted in 801.777 s
Support vector ratio: 0.331


In [56]:
t0 = time.time()
y_svr = svr.predict(test_X)
svr_predict = time.time() - t0
print("SVR prediction for %d inputs in %.3f s"
      % (test_X.shape[0], svr_predict))

SVR prediction for 632 inputs in 0.026 s


In [57]:
min_max_scaler = preprocessing.MinMaxScaler().fit(data.data_trainY.reshape(-1,1))
y_predcit_kw = min_max_scaler.inverse_transform(y_svr.reshape(-1,1))
y_true_kw = data.data_testY.values   

y_true_kw = np.power(y_true_kw,2)
y_predcit_kw = np.power(y_predcit_kw,2)

In [58]:
mse = mean_squared_error(y_predcit_kw.flatten(),y_true_kw.flatten())
mse

13.451571006149923

In [59]:
pre_time_SVM = data.pre_time
plot_results(predicted_data=y_predcit_kw.flatten(), 
             true_data=y_true_kw.flatten(),
             pre_time=pre_time_SVM,
             type_='SVM',
             mse = mse)

**KRR**

In [99]:
kr = GridSearchCV(KernelRidge(kernel='rbf',gamma=0.1),cv=5,param_grid={'alpha':[1e0,0.1,1e2,1e3],
                                                                      'gamma': np.logspace(-2,2,5)})
t0 = time.time()
kr.fit(train_X, train_Y)
kr_fit = time.time() - t0
print("KRR complexity and bandwidth selected and model fitted in %.3f s"
      % kr_fit)


The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.



KRR complexity and bandwidth selected and model fitted in 217.313 s


In [100]:
t0 = time.time()
y_kr = kr.predict(test_X)
kr_predict = time.time() - t0
print("KRR prediction for %d inputs in %.3f s"
      % (test_X.shape[0], kr_predict))

KRR prediction for 632 inputs in 0.080 s


In [101]:
min_max_scaler = preprocessing.MinMaxScaler().fit(data.data_trainY.reshape(-1,1))
y_predcit_kw_kr = min_max_scaler.inverse_transform(y_kr.reshape(-1,1))
y_true_kw_kr = data.data_testY.values
y_true_kw_kr = np.power(y_true_kw_kr,6) 
y_predcit_kw_kr = np.power(y_predcit_kw_kr,6) 
mse_kr = mean_squared_error(y_predcit_kw_kr.flatten(),y_true_kw_kr.flatten())
mse_kr

13.133665179264161

In [102]:
pre_time_KRR = data.pre_time
plot_results(predicted_data=y_predcit_kw_kr.flatten(), 
             true_data=y_true_kw_kr.flatten(),
             pre_time=pre_time_KRR,
             type_='KRR',
             mse = mse_kr)