In [1]:
from src.data import read_data,prepare_data
from src.models import train_model,predict_model
import pandas as pd
pd.options.mode.chained_assignment = None

In [2]:
PATH_TO_TRAIN = '../data/raw/train15.csv'
PATH_TO_PREDICTORS = '../data/raw/predictors15.csv'

ONE_DAY = 24
ONE_WEEK = 7 * ONE_DAY
ONE_MONTH = 30* ONE_DAY

TRAIN_SIZE = 12 * ONE_MONTH
number_of_tests = 7
size_of_prediction = ONE_DAY
TEST_SIZE = number_of_tests * size_of_prediction

ROLLING_COLUMNS = ['POWER','VAR78', 'VAR79', 'VAR134', 'VAR157', 'VAR164', 'VAR165', 'VAR166', 'VAR167','VAR169', 'VAR175', 'VAR178', 'VAR228']
ACCUMLATED_FEATURE_COLUMNS = ['VAR169', 'VAR175', 'VAR178', 'VAR228']

df_original = read_data.read_data(PATH_TO_TRAIN,PATH_TO_PREDICTORS)

df_cut = df_original[:TRAIN_SIZE + TEST_SIZE]

intervals = []
for i in range(1,31):
    intervals.append(i*size_of_prediction)

In [3]:
df = df_cut.copy()
df = prepare_data.prepare(df, ONE_DAY,intervals)
df = prepare_data.dissipate_features(df, ACCUMLATED_FEATURE_COLUMNS)
#df = prepare_data.add_rolling(df,["POWER"], intervals, ONE_DAY)
#ROLLING_COLUMNS.remove("POWER")
df = prepare_data.add_rolling(df,ROLLING_COLUMNS, intervals, ONE_DAY)

In [4]:
df_train = df[:TRAIN_SIZE].dropna(axis=0)
X_train = df_train.drop(ROLLING_COLUMNS,axis=1)
y_train = df_train.POWER
boosted_model = train_model.train(X_train, y_train)

In [5]:
import numpy as np
from bokeh.plotting import figure,show
from bokeh.io import output_notebook
output_notebook()
from sklearn.metrics import explained_variance_score

results = []
y_tests = []
y_preds = []

for i in range(number_of_tests):
    start = TRAIN_SIZE+size_of_prediction * i
    end = TRAIN_SIZE+size_of_prediction * (i+1)
    df_test = df[start:end] 
    X_test = df_test.drop(ROLLING_COLUMNS,axis=1)
    y_test = df_test.POWER
    y_pred = predict_model.predict(X_test, boosted_model)
    p = figure()
    p.line(np.arange(len(y_test)), y_test, legend="real")
    p.line(np.arange(len(y_pred)), y_pred, legend="predicted_xgb", color="orange")
    show(p)
    y_tests.extend(y_test)
    y_preds.extend(y_pred)
    print(explained_variance_score(y_pred, y_test))
    results.append(explained_variance_score(y_pred, y_test))

0.9193161794316866


0.6831098527765695


0.8715811158046103


0.6706764094999611


0.889562630778462


0.9379633625637315


0.7161265850786465


In [13]:
p = figure()
p.line(np.arange(len(y_tests)), y_tests, legend="real", line_width=2)
p.line(np.arange(len(y_preds)), y_preds, legend="predicted", color="orange", line_width=2)
show(p)

In [7]:
p = figure()
p.line(np.arange(len(results)), results, legend="accuracy", line_width=2,color = "green")
show(p)