In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')

import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error, mean_absolute_error

import utility_functions as fn

In [None]:
weather = pd.read_csv('weather_clean.csv',index_col=0)
weather.index = pd.to_datetime(weather.index,format='%Y-%m-%d %H:00:00')

In [None]:
energy = pd.read_csv('energy_clean.csv',index_col=0)
energy.index = pd.to_datetime(energy.index,format='%Y-%m-%d %H:00:00')

energy['Yale_kWh'] = energy['YUAG']+energy['Berkeley']+energy['Hopper']+energy['17HH']
energy = pd.DataFrame(energy['Yale_kWh'])

In [None]:
split_date = '2018-07-20 00:00:00'
train = energy.loc[energy.index <= split_date].copy()
test = energy.loc[energy.index > split_date].copy()

In [None]:
def add_date_features(df, label=None):
    """
    Creates time series features from datetime index
    """
    df['date'] = df.index
    df['hour'] = df['date'].dt.hour
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.weekofyear
    
    X = df[['hour','dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear']]
    if label:
        y = df[label]
        return X, y
    return X

In [None]:
X_train, y_train = add_date_features(train, label='Yale_kWh')
X_test, y_test = add_date_features(test, label='Yale_kWh')

In [None]:
X_train

In [None]:
reg = xgb.XGBRegressor(n_estimators=1000)
reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        early_stopping_rounds=50,
        verbose=False)

In [None]:
_ = plot_importance(reg, height=0.9)

In [None]:
test['Prediction_kWh'] = reg.predict(X_test)
yale_all = pd.concat([test, train], sort=False)
_ = yale_all[['Yale_kWh','Prediction_kWh']].plot(figsize=(15, 5))

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [None]:
mean_absolute_percentage_error(y_true=test['Yale_kWh'], y_pred=test['Prediction_kWh'])