In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
import pickle
from datetime import datetime as dt
from scipy.stats import zscore
import tensorflow as tf
from tensorflow.keras import layers

Using TensorFlow backend.


In [2]:
f = "data/NCENT.csv"
df = pd.read_csv(f)
df

Unnamed: 0,load,tempc,year,month,day,hour
0,10336.304899,0.810573,2002,1,1,0
1,10178.052738,4.310353,2002,1,1,1
2,10100.887710,-4.096146,2002,1,1,2
3,10081.565109,-0.575617,2002,1,1,3
4,10192.218670,1.840488,2002,1,1,4
...,...,...,...,...,...,...
148915,12647.634898,16.669271,2018,12,31,19
148916,12440.117160,16.960569,2018,12,31,20
148917,12238.695990,18.138311,2018,12,31,21
148918,11964.726843,17.100493,2018,12,31,22


In [3]:
def makeUsefulDf(df, noise=2.5, hours_prior=24):
    if 'dates' not in df.columns:
        df['dates'] = df.apply(lambda x: dt(int(x['year']), int(x['month']), int(x['day']), int(x['hour'])), axis=1)
    
    #PREV LOAD & LOAD
    r_df = pd.DataFrame()
    r_df["loads_n"] = zscore(df["load"])
    r_df["loads_prev_n"] = r_df["loads_n"].shift(hours_prior)
    r_df["loads_prev_n"].bfill(inplace=True)
    #LOAD PREV
    
    def _chunks(l, n):
        return [l[i:i+n] for i in range(0, len(l), n)]
    
    n = np.array([val for val in _chunks(list(r_df["loads_n"]), 24) for _ in range(24)])
    
    l = ["l" + str(i) for i in range(24)]
    for i, s in enumerate(l):
        r_df[s] = n[:, i]
        r_df[s] = r_df[s].shift(hours_prior)
        r_df[s] = r_df[s].bfill()
    r_df.drop(['loads_n'], axis=1, inplace=True)
    #Date
    r_df["years_n"] = zscore(df["dates"].dt.year)
    r_df = pd.concat([r_df, pd.get_dummies(df.dates.dt.hour, prefix='hour')], axis=1)
    r_df = pd.concat([r_df, pd.get_dummies(df.dates.dt.dayofweek, prefix='day')], axis=1)
    r_df = pd.concat([r_df, pd.get_dummies(df.dates.dt.month, prefix='month')], axis=1)
    #I am not excluding holidays
    #for holiday in ["New Year's Day", "Memorial Day", "Independence Day", "Labor Day", "Thanksgiving", "Christmas Day"]:
    #r_df[holiday] = _isHoliday(holiday, df)
    
    #including noise in the data
    temp_noise = df['tempc'] + np.random.normal(0, noise, df.shape[0])
    r_df["temp_n"] = zscore(temp_noise)
    r_df['temp_n^2'] = zscore([x*x for x in temp_noise])

    return r_df

In [None]:
r_df = pd.DataFrame()
r_df["load_n"] = zscore(df["load"])
print("mean is {} and std is {}".format(df["load"].mean(), df["load"].std()))


In [None]:
r_df.head()

In [None]:
df["load"].head()

In [None]:
r_df["load_prev_n"] = r_df["load_n"].shift(24)

In [None]:
r_df.head()

In [None]:
r_df["load_prev_n"].bfill(inplace=True)

In [None]:
r_df.head()

In [None]:
def _chunks(l, n):
    return [l[i:i+n] for i in range(0, len(l), n)]

In [None]:
n = np.array([val for val in _chunks(list(r_df["load_n"]), 24) for _ in range(24)])

In [None]:
print(n)

In [4]:
x = makeUsefulDf(df)
y = df["load"]
shape = x.shape[1]
epochs=10

In [None]:
x.head()

In [None]:
#building the model


In [6]:
model = tf.keras.Sequential()
model.add(layers.Dense(shape, activation=tf.nn.relu, input_shape=[len(x.keys())]))
model.add(layers.Dense(shape, activation=tf.nn.relu))
model.add(layers.Dense(shape, activation=tf.nn.relu))
model.add(layers.Dense(shape, activation=tf.nn.relu))
model.add(layers.Dense(shape, activation=tf.nn.relu))
model.add(layers.Dense(1))
                                                                


In [7]:
def MAPE(predictions, answers):
    assert len(predictions) == len(answers)
    return sum([abs(x - y)/(y+1e-5) for x, y in zip(predictions, answers)])/len(answers)*100

In [15]:
optimizer = tf.keras.optimizers.RMSprop(0.0001)
model.compile(loss="mean_squared_error",optimizer=optimizer,metrics=["mean_absolute_error", "mean_squared_error"])
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("modelFP.h5", save_best_only=True)
early_stop = tf.keras.callbacks.EarlyStopping(monitor="mean_absolute_error", patience=3)
x_train, y_train = x[:-17520], y[:-17520]
x_val, y_val = x[-17520:-8760], y[-17520:-8760] 
model.fit(x_train, y_train, epochs=50, verbose=0, callbacks=[early_stop, model_checkpoint], validation_data = x_val)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [10]:
predictions = [float(f) for f in model.predict(x[-8760:])]
train = [float(f) for f in model.predict(x[:-8760])]

accuracy = {
    'test': MAPE(predictions, y[-8760:]),
    'train': MAPE(train, y[:-8760])
}

In [11]:
output = pd.DataFrame({'TimeStamp': df.dates[-8760:], 'Load_Prediction': predictions})

In [12]:
output.head()

Unnamed: 0,TimeStamp,Load_Prediction
140160,2018-01-01 00:00:00,18918.365234
140161,2018-01-01 01:00:00,18598.800781
140162,2018-01-01 02:00:00,18635.449219
140163,2018-01-01 03:00:00,18839.5
140164,2018-01-01 04:00:00,18946.654297


In [None]:
import pickle

In [None]:
model_name = "model.pkl"
with open(model_name, 'wb') as file:  
    pickle.dump(model, file)