In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data_path_rel = "./data/opt-arima"
dataset_size = "10000"

residuals_path = data_path_rel + "/residuals-" +  dataset_size + "-"
arima_path = data_path_rel + "/arima-" + dataset_size + "-"

time_categories = ["Hourly", "Daily", "Weekly", "Monthly", "Quarterly", "Yearly"]
prediction_horizons = {"Hourly": 48, "Daily": 14, "Weekly": 13, "Monthly": 18, "Quarterly": 8, "Yearly": 6}

In [3]:
from sklearn.preprocessing import MinMaxScaler

def scale_timeseries(residuals_data):
    scalers = [None] * residuals_data.shape[0]
    
    scaled_data = np.zeros(residuals_data.shape)  # TODO: Make sure the dtype is actually 32 bit in Keras
    
    counter = 0
    for i, row in residuals_data.iterrows():
        res = row.dropna().values
        
        # Scale
        res = res.reshape((res.shape[0], 1))
        
        scaler = MinMaxScaler()
        scaler = scaler.fit(res)
        res = scaler.transform(res).flatten()
        
        scalers[counter] = scaler
        
        scaled_data[counter, -res.shape[0]:] = res
        
        counter += 1
        
    return scaled_data, scalers

In [4]:
def create_sliding_window_from_data(data, horizon):
    nr_windows = data.shape[1] - horizon
    
    X = list()
    y = list()
    
    for row in data:
        for i in range(nr_windows):
            window = row[i: i + horizon]
            
            if np.count_nonzero(window) > 0:
                X.append(window)
                y.append(row[i + horizon])
        
    return np.array(X), np.array(y)

In [5]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

def create_model(input_len, neurons=6):
    model = Sequential()
    model.add(Dense(2 * input_len + 1, activation='relu', input_shape=(input_len,)))
    model.add(Dropout(0.4))
    model.add(Dense(2 * input_len + 1))
    model.add(Dropout(0.4))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    return model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [6]:
def make_predictions(residuals, model, horizon):
    data = residuals.copy()
    for i in range(horizon):
        pred = model.predict(data[:, -horizon:])
        data = np.hstack((data, pred))
        
    return data[:, -horizon:]

In [7]:
def inverse_scale(predictions, scalers):
    scaled_predictions = [None] * predictions.shape[0]
    
    for i in range(predictions.shape[0]):
        scaler = scalers[i]
        scaled_predictions[i] = scaler.inverse_transform(predictions[i,:].reshape(predictions.shape[1], 1)).flatten()

    return np.array(scaled_predictions)

In [8]:
def get_results(temporal_category):
    train_data = pd.read_csv(residuals_path + temporal_category + ".csv", index_col=0)
    
    arima_data = pd.read_csv(arima_path + temporal_category + ".csv", index_col=0)
    
    test_arima = arima_data.values
    
    scaled_residuals, scalers = scale_timeseries(train_data)
    
    train_X, train_y = create_sliding_window_from_data(scaled_residuals, prediction_horizons[temporal_category])
    
    model = create_model(train_X.shape[1])
    
    model.fit(train_X, train_y, epochs=20, batch_size=32, verbose=0, validation_split=0.1, shuffle=True)
    
    predictions = make_predictions(scaled_residuals, model, prediction_horizons[temporal_category])
    
    predictions = inverse_scale(predictions, scalers)
    
    results_frame = pd.DataFrame(test_arima + predictions, index=train_data.index)
    results_frame.to_csv(data_path_rel + "-results/a_" + temporal_category + "_" + dataset_size + ".csv")

In [9]:
for i in range(len(time_categories)):
    print("Working on " + time_categories[i])
    get_results(time_categories[i])
    
print("DONE")

Working on Hourly
Working on Daily
Working on Weekly
Working on Monthly
Working on Quarterly
Working on Yearly
DONE
