In [None]:
!pip install orbit-ml==1.0.13
!pip install fbprophet==0.7.1



In [None]:
import numpy as np
import tqdm
import pandas as pd
import statsmodels.api as sm
import inspect
import random
from fbprophet import Prophet
from statsmodels.tsa.statespace.sarimax import SARIMAX

import orbit
from orbit.models.dlt import DLTMAP, DLTAggregated, DLTFull
from orbit.utils.dataset import load_m3monthly
from orbit.diagnostics.backtest import BackTester
from orbit.diagnostics.metrics import smape

In [None]:
seed=2021
n_sample=10
random.seed(seed)

In [None]:
data = load_m3monthly()
unique_keys = data['key'].unique().tolist()
if n_sample > 0:
    sample_keys = random.sample(unique_keys, 10)
    # just get the first 5 series for demo
    data = data[data['key'].isin(sample_keys)].reset_index(drop=True)
else:
    sample_keys = unique_keys
print(sample_keys)

['N2229', 'N2691', 'N2516', 'N1968', 'N1908', 'N2702', 'N1472', 'N2310', 'N2372', 'N2578']


In [None]:
data.columns

Index(['key', 'value', 'date'], dtype='object')

In [None]:
key_col='key'
response_col='value'
date_col='date'
seasonality=12

In [None]:

backtest_args = {
    'min_train_len': 1, # not useful; a placeholder
    'incremental_len': 18,  # not useful; a placeholder
    'forecast_len': 18,
    'n_splits': 1,
    'window_type': "expanding",
}

In [None]:
class DLTMAPWrapper(object):
    def __init__(self, response_col, date_col):
        self.response_col = response_col
        self.date_col = date_col
        self.model =  DLTMAP(
                         response_col=response_col,
                         date_col=date_col,
                         seasonality=52,
                         seed=8888,
                         estimator_type='stan-map', 
                        )
        '''
        DLT(
            response_col=response_col,
            date_col=date_col,
            estimator='stan-map',
            **kwargs)
            '''
       

    def fit(self, df):
        df = df.copy()
        df[[self.response_col]] = df[[self.response_col]].apply(np.log1p)
        self.model.fit(df)

    def predict(self, df):
        df = df.copy()
        pred_df = self.model.predict(df)
        pred_df['prediction'] = np.clip(np.expm1(pred_df['prediction']).values, 0, None)
        return pred_df

In [None]:
class SARIMAXWrapper(object):
    def __init__(self, response_col, date_col, **kwargs):
        kw_params = locals()['kwargs']
        for key, value in kw_params.items():
            setattr(self, key, value)
        self.response_col = response_col
        self.date_col = date_col
        self.model = DLT(
            response_col=response_col,
            date_col=date_col,
            estimator_type='stan-map',
            **kwargs)        

    def fit(self, df):

        df_copy = df.copy()
        infer_freq = pd.infer_freq(df_copy[self.date_col])
        df_copy = df_copy.set_index(self.date_col)
        df_copy = df_copy.asfreq(infer_freq)
        endog = df_copy[self.response_col]
        sig = inspect.signature(SARIMAX)
        all_params = dict()
        for key in sig.parameters.keys():
            if hasattr(self, key):
                all_params[key] = getattr(self, key)
        self.df = df_copy
        self.model = SARIMAX(endog=endog, **all_params).fit(disp=False)

    def predict(self, df, **kwargs):
        df_copy = df.copy()
        infer_freq = pd.infer_freq(df_copy[self.date_col])
        df_copy = df_copy.set_index(self.date_col)
        df_copy = df_copy.asfreq(infer_freq)

        pred_array = np.array(self.model.predict(start=df_copy.index[0],
                                                 end=df_copy.index[-1],
                                                 **kwargs))

        out = pd.DataFrame({
            self.date_col: df[self.date_col],
            'prediction': pred_array
        })
        return out

In [None]:

class ProphetWrapper(object):
    def __init__(self, response_col, date_col, **kwargs):
        kw_params = locals()['kwargs']
        for key, value in kw_params.items():
            setattr(self, key, value)
        self.response_col = response_col
        self.date_col = date_col
        self.model = Prophet(**kwargs)

    def fit(self, df):
        sig = inspect.signature(Prophet)
        all_params = dict()
        for key in sig.parameters.keys():
            if hasattr(self, key):
                all_params[key] = getattr(self, key)
        object_type = type(self.model)
        self.model = object_type(**all_params)

        train_df = df.copy()
        train_df = train_df.rename(columns={self.date_col: "ds", self.response_col: "y"})
        self.model.fit(train_df)

    def predict(self, df):
        df = df.copy()
        df = df.rename(columns={self.date_col: "ds"})
        pred_df = self.model.predict(df)
        pred_df = pred_df.rename(columns={'yhat': 'prediction', 'ds': self.date_col})
        pred_df = pred_df[[self.date_col, 'prediction']]
        return pred_df

In [None]:
dlt = DLTMAPWrapper(
    response_col=response_col,
    date_col=date_col,
)

sarima = SARIMAXWrapper(
    response_col=response_col,
    date_col=date_col,
    seasonality=seasonality,
    seed=seed,
)

prophet = ProphetWrapper(
    response_col=response_col,
    date_col=date_col,
)

TypeError: ignored

In [None]:
from orbit.utils.dataset import load_m4weekly
data = load_m4weekly()
data

Unnamed: 0,key,week_num,value,date
0,W1,2,1089.20,1970-01-05
1,W1,3,1078.91,1970-01-12
2,W1,4,1079.88,1970-01-19
3,W1,5,1063.58,1970-01-26
4,W1,6,1060.61,1970-02-02
...,...,...,...,...
6101,W3,10,9760.70,2011-11-28
6102,W3,11,9968.20,2011-12-05
6103,W3,12,10089.10,2011-12-12
6104,W3,13,10005.90,2011-12-19


In [None]:
from orbit.utils.dataset import load_electricity_demand
data = load_electricity_demand()
data

Unnamed: 0,date,electricity
0,2000-01-01,12551.500000
1,2000-01-02,13377.041667
2,2000-01-03,15300.625000
3,2000-01-04,15582.750000
4,2000-01-05,15691.750000
...,...,...
3283,2008-12-27,22546.000000
3284,2008-12-28,20477.791667
3285,2008-12-29,23093.000000
3286,2008-12-30,23493.625000
