In [1]:
# univariate multi-step dlm
from numpy import array
from pandas import read_csv
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np

from dlmModel import DLM

In [2]:
%matplotlib
def plot_results(res, values, filled, last_n_data=None):
    res_copy = res.copy()
    if last_n_data:
        len_data = len(data)
        for key in res.keys():
            if len(res[key]) == len_data:
                res_copy[key] = res[key][-last_n_data:]

    data_ = res_copy['data']
    predict_n_ = len(res_copy["pred_values"])

    # x_data is the list of indices for the training
    x_data = [i for i in range(len(data_))]
    # x_pred is the list of indices for the test
    x_pred = [i for i in range(len(data_), len(data_) + predict_n_)]

    plt.figure()
    for f in filled:
        if len(res[f]) == len(res['data']):
            plt.fill_between(x_data, res_copy[f], alpha=0.5)
        else:
            plt.fill_between(x_pred, res_copy[f], alpha=0.5)
    for v in values:
        if len(res[v]) == len(res['data']):
            plt.plot(x_data, res_copy[v])
        else:
            plt.plot(x_pred, res_copy[v])
    plt.show()

Using matplotlib backend: MacOSX


In [3]:
# Load files
def load_file(path):
    return read_csv(path, sep=";", header=0, infer_datetime_format=True, parse_dates=['timestamp'], index_col=['timestamp'])

In [4]:
# Define dataset to import
def get_ids(selection):
    ignore_ids = [223, 45, 19, 105, 75, 63, 58, 59]
    directory = "../data/processed/building_forecast/"
    # Get ids
    meta = pd.read_csv(directory+"metadata.csv", sep=';')
    # Use selection
    for col in selection:
        meta = meta[meta[col] == selection[col]]
    # Remove manual ids
    return meta[~meta["bat_id"].isin(ignore_ids)]

In [5]:
def normalize(data_):
    data_ = array(data_)
    dims = len(data_.shape)
    if dims == 1:
        data_ = data_.reshape((len(data_), 1))
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler.fit(data_)
    if dims == 1:
        return scaler.transform(data_).flatten(), scaler
    else:
        return scaler.transform(data_), scaler

In [6]:
def run_dlm(data_, predict_n_, time_step_):
    # Parameters
    is_trend = True
    trend_degree = 1
    trend_discount = 0.99

    is_seasonality = True
    seasonality_period = 7*24*60//time_step_
    seasonality_discount = 0.99

    is_long_season = False
    long_season_n_period = 7
    long_season_period_duration = 24*60//time_step_
    long_season_discount = 0.99

    is_auto_reg = False
    auto_reg_degree = 2
    auto_reg_discount = 0.99

    return DLM(data_,
             predict_n=predict_n_,
             is_trend=is_trend,
             trend_degree=trend_degree,
             trend_discount=trend_discount,
             is_seasonality=is_seasonality,
             seasonality_period=seasonality_period,
             seasonality_discount=seasonality_discount,
             is_long_season=is_long_season,
             long_season_n_period=long_season_n_period,
             long_season_period_duration=long_season_period_duration,
             long_season_discount=long_season_discount,
             is_auto_reg=is_auto_reg,
             auto_reg_degree=auto_reg_degree,
             auto_reg_discount=auto_reg_discount)

In [7]:
directory = "../data/processed/building_forecast/"
selection = {'time_step': 60}
meta = get_ids(selection)

results = []

for id_ in [53]:# meta['bat_id'].values:
    time_step = int(meta[meta['bat_id'] == id_]['time_step'].values[0])
    dataset = load_file("{}{}.csv".format(directory, id_))
    data, scaler = normalize(dataset['active_power'])
    if len(data) > 6*4*7*24*60//time_step:
        data = data[-6*4*7*24*60//time_step:]

    predict_n = 7*24*60//time_step

    data, validation = data[:-predict_n], data[-predict_n:]
    print(data.shape, predict_n, time_step)

    results = run_dlm(data, predict_n, time_step)

    results['real'] = validation
    values = ['data', 'pred_values']
    filled = ['real']
    plot_results(results, values, filled, predict_n)

(3864,) 168 60
Initializing models...
Initialization finished.
Starting forward filtering...
Forward filtering completed.
Starting backward smoothing...
Backward smoothing completed.


In [14]:
values = ['fit', 'trend', 'seasonality']
filled = ['data']
plot_results(results, values, filled, predict_n)