In [187]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox, Layout, Tab, Label, Checkbox, Button
from ipywidgets import FloatSlider, IntSlider, Play, Dropdown, HTMLMath
from IPython.display import display

import matplotlib.pyplot as plt
import seaborn as sns; sns.set_style('whitegrid')
import numpy as np

import pandas as pd
from datetime import datetime

In [188]:
df = pd.read_parquet("cmm_erdos_bootcamp_2020_timeseries.pq", engine='pyarrow')
df.date_val = pd.to_datetime(df.date_val)

In [189]:
def normalized(vals):
    return (vals - vals.mean()) / (vals.max() - vals.min())

In [190]:
# Helper function for creating widgets. Only to make subsequent code shorter.

def float_widget(value, min, max, step, description, format):
    return FloatSlider(value=value, min=min, max=max, step=step,
                       disabled=False,
                       description=description, continuous_update=False,
                       orientation='horizontal',
                       readout=True, readout_format=format)


def int_widget(value, min, max, step, description, format):
    return IntSlider(value=value, min=min, max=max, step=step,
                       disabled=False,
                       description=description, continuous_update=False,
                       orientation='horizontal',
                       readout=True, readout_format=format)

# Explore Data

In [191]:
from statsmodels.tsa.seasonal import STL

## Trend

In [192]:
colors = {'Volume A': 'C1',
          'Volume B': 'C2',
          'Volume C': 'C3'}

def plot_trends(volumes, normalize=True, reg_holidays=True, holiday_dates=True):
    if len(volumes) == 0:
        return
    
    # fetch data from dataframe
    vol_a = df.volume_A.values.copy()
    vol_b = df.volume_B.values.copy()
    vol_c = df.volume_C.values.copy()

    # preprocess if requested
    if reg_holidays:
        for i in df.index[df.is_holiday == 1]:
            if i >= 7:
                source_i = i - 7
            else:
                source_i = i + 7
            vol_a[i] = vol_a[source_i]
            vol_b[i] = vol_b[source_i]
            vol_c[i] = vol_c[source_i]

    # extract trends using STL
    trend_a = STL(vol_a, 7).fit().trend
    trend_b = STL(vol_b, 7).fit().trend
    trend_c = STL(vol_c, 7).fit().trend
    trends = {'Volume A': trend_a,
              'Volume B': trend_b,
              'Volume C': trend_c}
    
    
    plt.figure(figsize=(20, 5))
    for volume in volumes:
        trend = trends[volume]
        if normalize:
            trend = normalized(trend)
        plt.plot(df.date_val, trend, label=str(volume), c=colors[volume])
    plt.legend()
    
    if holiday_dates:
        for ind in df.date_val[df.is_holiday == 1]:
            plt.axvline(ind, lw=1, ls='--', c='k')
    
    
    #plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

normalize_w = widgets.Checkbox(value=True, description="Normalize")
reg_holidays_w = widgets.Checkbox(value=True, description="Regularize Holidays")
holiday_dates_w = widgets.Checkbox(value=True, description="Show Holiday Dates")


volumes_w = widgets.SelectMultiple(options=['Volume A', 'Volume B', 'Volume C'], value=['Volume A'],)
    
plot_trends_w = widgets.interactive_output(plot_trends, dict(normalize=normalize_w,
                                                            volumes=volumes_w,
                                                            reg_holidays=reg_holidays_w,
                                                            holiday_dates=holiday_dates_w))


explore_trends_subtab = VBox([HBox([VBox([normalize_w,
                                          reg_holidays_w,
                                          holiday_dates_w]), Label(value='Select Volumes: '), volumes_w]),
                              plot_trends_w,])

## Seasonality

In [193]:
colors = {'Volume A': 'C1',
          'Volume B': 'C2',
          'Volume C': 'C3'}

def plot_seasonality(volumes, normalize=True, reg_holidays=True, holiday_dates=True):
    if len(volumes) == 0:
        return
    
    # fetch data from dataframe
    vol_a = df.volume_A.values.copy()
    vol_b = df.volume_B.values.copy()
    vol_c = df.volume_C.values.copy()

    # preprocess if requested
    if reg_holidays:
        for i in df.index[df.is_holiday == 1]:
            if i >= 7:
                source_i = i - 7
            else:
                source_i = i + 7
            vol_a[i] = vol_a[source_i]
            vol_b[i] = vol_b[source_i]
            vol_c[i] = vol_c[source_i]

    # extract trends using STL
    season_a = STL(vol_a, 7).fit().seasonal
    season_b = STL(vol_b, 7).fit().seasonal
    season_c = STL(vol_c, 7).fit().seasonal
    seasons = {'Volume A': season_a,
              'Volume B': season_b,
              'Volume C': season_c}
    
    
    plt.figure(figsize=(20, 5))
    for volume in volumes:
        season = seasons[volume]
        if normalize:
            season = normalized(season)
        plt.plot(df.date_val, season, label=str(volume), c=colors[volume])
    plt.legend()
    
    if holiday_dates:
        for ind in df.date_val[df.is_holiday == 1]:
            plt.axvline(ind, lw=1, ls='--', c='k')
    
    
    #plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

normalize_w = widgets.Checkbox(value=True, description="Normalize")
reg_holidays_w = widgets.Checkbox(value=True, description="Regularize Holidays")
holiday_dates_w = widgets.Checkbox(value=True, description="Show Holiday Dates")


volumes_w = widgets.SelectMultiple(options=['Volume A', 'Volume B', 'Volume C'], value=['Volume A'],)
    
plot_seasonality_w = widgets.interactive_output(plot_seasonality, dict(normalize=normalize_w,
                                                                  volumes=volumes_w,
                                                                  reg_holidays=reg_holidays_w,
                                                                  holiday_dates=holiday_dates_w))


explore_seasonality_subtab = VBox([HBox([VBox([normalize_w,
                                          reg_holidays_w,
                                          holiday_dates_w]), Label(value="Select Volumes: "), volumes_w]),
                              plot_seasonality_w,])

## Causality

In [194]:
explore_causality_subtab = VBox([])

In [195]:
explore_tab = Tab(children=[explore_trends_subtab, explore_seasonality_subtab, explore_causality_subtab])
explore_tab.set_title(0, "Trend")
explore_tab.set_title(1, "Seasonality")
explore_tab.set_title(2, "Causality")

# Summary

In [196]:
tabs = Tab(children=[explore_tab])
tabs.set_title(0, "Explore Data")
tabs.set_title(1, "Proposed Models")
tabs.set_title(2, "Selected Model")
display(tabs)

Tab(children=(Tab(children=(VBox(children=(HBox(children=(VBox(children=(Checkbox(value=True, description='Nor…