In [1]:
from dataloader import load_raw, create_datasets
from measures import compute_intermittent_indicators

import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import pickle as pkl

In [None]:
# For each dataset, print: name, number of ts, length of ts, frequency, h, percentage of intermittent-lumpy ts

d = {}
for i, dname in enumerate(["M5", "OnlineRetail", "carparts", "RAF", "Auto"]):
    data_raw, data_info = load_raw(dataset_name=dname, datasets_folder_path=os.path.join("..","data"))
    adi, cv2 = compute_intermittent_indicators(data_raw, data_info['h'])
    d[dname] = (adi, cv2)
    datasets = create_datasets(data_raw, data_info)
    print(dname, len(datasets['train']), len(datasets['test'][0]['target']), data_info['freq'], data_info['h'], np.where(adi>=1.32)[0].size/adi.size)

In [None]:
# Scatterplots of ADI vs CV2 for each dataset, each ts is a point

fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(8,5), sharex=True)
axs = axs.flatten()

for i, dname in enumerate(["M5", "OnlineRetail", "carparts", "RAF", "Auto"]):
    adi, cv2 = d[dname]
    axs[i].scatter(adi[cv2!=0],cv2[cv2!=0], s=np.log(adi[cv2!=0].size)*0.4, color='black', alpha=0.05)
    axs[i].axvline(x=1.32, color='red', linestyle='--', linewidth=1)
    axs[i].axhline(y=0.49, color='red', linestyle='--', linewidth=1)
    axs[i].set_xscale('log')
    axs[i].set_yscale('log')

    formatter = FuncFormatter(lambda x, _: '{:.0f}'.format(x) if x.is_integer() else '{:.2f}'.format(x))
    formatter2 = FuncFormatter(lambda x, _: '')
    axs[i].xaxis.set_major_formatter(formatter)
    axs[i].yaxis.set_major_formatter(formatter)
    axs[i].xaxis.set_minor_formatter(formatter2)
    axs[i].yaxis.set_minor_formatter(formatter2)
    axs[i].tick_params(which='both', width=0.5)
    axs[i].tick_params(which='major', length=7)
    axs[i].tick_params(which='minor', length=4, color='black')
    axs[i].set_xlabel(r'$log(ADI)$')
    axs[i].set_ylabel(r'$log(CV^2)$')
    axs[i].set_title(dname)

axs[5].set_visible(False)
axs[0].set_xlabel('')
axs[1].set_xlabel('')
axs[1].set_ylabel('')
axs[2].set_ylabel('')
axs[4].set_ylabel('')

plt.tight_layout()
# plt.savefig("dset_adiXcv2.pdf", format="pdf", transparent=True)

In [None]:
# Boxplots of stats for each dataset: demand intervals, demand levels, demand per period

import pandas as pd
import seaborn as sns

def dataset_stats(data, to_pandas = True):
    assert isinstance(data, np.ndarray) and data.ndim == 2
    idx = np.arange(data.shape[1])
    demand_intervals_mean, demand_intervals_std = np.empty(len(data)), np.empty(len(data))
    demand_sizes_mean, demand_sizes_std = np.empty(len(data)), np.empty(len(data))
    demand_per_period_mean, demand_per_period_std = np.empty(len(data)), np.empty(len(data))
    for i, ts in enumerate(data):
        demand_idx = np.concatenate(([0], idx[ts > 0]+1))
        demand_intervals = demand_idx[1:] - demand_idx[:-1]
        demand_sizes = ts[ts > 0]
        demand_per_period = demand_sizes/demand_intervals
        assert len(demand_intervals) == len(demand_sizes)
        demand_intervals_mean[i], demand_intervals_std[i] = np.mean(demand_intervals), np.std(demand_intervals)
        demand_sizes_mean[i], demand_sizes_std[i] = np.mean(demand_sizes), np.std(demand_sizes)
        demand_per_period_mean[i], demand_per_period_std[i] = np.mean(demand_per_period), np.mean(demand_per_period)
    stats = {'demand intervals' : {'mean' : demand_intervals_mean,
                                   'std' : demand_intervals_std},
             'demand sizes' : {'mean' : demand_sizes_mean,
                               'std' : demand_sizes_std},
             'demand per period' : {'mean' : demand_per_period_mean,
                                    'std' : demand_per_period_std}}
    if to_pandas:
        df_dict = {}
        for k in stats.keys():
            for s in ['mean', 'std']:
               v = stats[k][s]
               if np.any(np.isnan(v)):
                   v = v[~np.isnan(v)]
               df_dict[k + ' (' + s + ')'] = [np.min(v), np.quantile(v, .25), np.median(v), np.quantile(v, .75), np.max(v)]
        df = pd.DataFrame(df_dict, index=['min', '25%ile', 'median', '75%ile', 'max'])
        df.columns = pd.MultiIndex.from_product([('demand intervals', 'demand sizes', 'demand per period'), ('mean', 'std')])
        return df
    else:
        return stats
    
grouped_stats = {}

for dname in ["M5", "OnlineRetail", "carparts", "RAF", "Auto"]:
    print(dname)
    data_raw, data_info = load_raw(dataset_name=dname, datasets_folder_path=os.path.join("..","data"))
    adi, cv2 = compute_intermittent_indicators(data_raw, data_info['h'])
    datasets = create_datasets(data_raw, data_info)
    data_raw = np.array([x['target'] for x in datasets['test']])
    data_stats = dataset_stats(data_raw[adi >= 1.32,:], to_pandas=False)
    for key in data_stats.keys():
        for stat in data_stats[key].keys():
            if key + ' (' + stat + ')' in grouped_stats.keys():
                grouped_stats[key + ' (' + stat + ')'][dname] = data_stats[key][stat]
            else:
                grouped_stats[key + ' (' + stat + ')'] = {dname : data_stats[key][stat]}
                
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 2),sharey = 'row', facecolor='#f9f9f9')
for i, k in enumerate(list(grouped_stats.keys())[:2]):
    row = 0
    col = i % 2
    sns.boxplot(data=grouped_stats[k], ax=axs[i], orient='h', fliersize=4, showfliers=False, linewidth=1, medianprops={'color': '#f9f9f9'}, color="black")
    axs[i].set_title(r"$mean$" if i==0 else r"$std$")
    axs[i].set_facecolor('#f9f9f9')
fig.text(-0.1, 0.5, "Demand intervals  ", ha='center', fontsize=12)
plt.tight_layout(rect=[0, 0, 1, 1.05])

fig2, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 2),sharey = 'row', facecolor='#f9f9f9')
for i, k in enumerate(list(grouped_stats.keys())[2:4]):
    row = 0
    col = i % 2
    sns.boxplot(data=grouped_stats[k], ax=axs[i], orient='h', fliersize=4, showfliers=False, linewidth=1, medianprops={'color': '#f9f9f9'}, color="black")
    axs[i].set_title(r"$mean$" if i==0 else r"$std$")
    axs[i].set_facecolor('#f9f9f9')
fig2.text(-0.1, 0.5, "Demand levels       ", ha='center', fontsize=12)
plt.tight_layout(rect=[0, 0, 1, 1.05])

fig3, axs = plt.subplots(nrows=1, ncols=2, figsize=(8, 2),sharey = 'row', facecolor='#f9f9f9')
for i, k in enumerate(list(grouped_stats.keys())[4:]):
    row = 0
    col = i % 2
    sns.boxplot(data=grouped_stats[k], ax=axs[i], orient='h', fliersize=4, showfliers=False, linewidth=1, medianprops={'color': '#f9f9f9'}, color="black")
    axs[i].set_title(r"$mean$" if i==0 else r"$std$")
    axs[i].set_facecolor('#f9f9f9')
fig3.text(-0.1, 0.5, "Demand per period", ha='center', fontsize=12)
plt.tight_layout(rect=[0, 0, 1, 1.05])

plt.show()

In [None]:
# Time features used by models

from gluonts.time_feature import time_features_from_frequency_str

time_features_from_frequency_str("D"), time_features_from_frequency_str("ME")

In [None]:
# Forecast plot

from visual import forecast_plot

dname = "M5"
data_raw, data_info = load_raw(dataset_name=dname, datasets_folder_path=os.path.join("..","data"))
datasets = create_datasets(data_raw, data_info)


model_folder_name = "deepAR__M5__negbin__mean-demand__2024-06-21-13-47-45-072320"
model_folder_path = os.path.join(os.path.expanduser("~/switchdrive"), "iTS", "trained_models", model_folder_name)
forecasts = np.load(os.path.join(model_folder_path,"forecasts.npy"))

o = {'index':7, 'forecasts':forecasts[7,:,:], 'datasets':datasets['test'][7], 'alphas':[0.01, 0.02, 0.05, 0.90], 'targetName':'target'}

forecast_plot(ts_index=7, forecasts=forecasts, datasets=datasets, data_info=data_info, alphas=[0.01, 0.02, 0.05, 0.90], targetName="target")