In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm as log_progress
from ipywidgets import widgets # for buttons and dropdown menus
from IPython.display import display, clear_output # for buttons and dropdown menus
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from scalecast import GridGenerator
from scalecast.Forecaster import Forecaster
from scalecast.Forecaster import _determine_best_by_

In [2]:
models = ('knn','svr','elasticnet','mlp','lightgbm','xgboost','prophet','silverkite')
sns.set(rc={'figure.figsize':(12,8)})

In [3]:
GridGenerator.get_example_grids()

In [4]:
def find_integration(f):
    """ uses augmented dickey fuller to guess that series is stationary at 0, 1, or 2 differences
    """
    if f.adf_test():
        return 0
    f.diff()
    if f.adf_test():
        f.undiff()
        return 1
    f.undiff()
    return 2

def results_vis(f_dict,plot_type='forecast'):
    """ visualize the forecast results
        leverages Jupyter widgets
    """
    def display_user_selections(ts_selection,mo_selection,lv_selection,me_selection):
        matplotlib.use('nbAgg')
        %matplotlib inline
        sns.set(rc={'figure.figsize':(16,8)})
        selected_data = f_dict[ts_selection]
        if plot_type == 'forecast':
            print(ts_selection)
            selected_data.plot(models=f'top_{mo_selection}',order_by=me_selection,level=lv_selection,
                               print_attr=['TestSetRMSE','TestSetR2','LevelTestSetRMSE','TestSetMAPE','LevelTestSetR2','LevelTestSetMAPE','Scaler','HyperParams','Xvars','models','Integration'])
        elif plot_type == 'test':
            print(ts_selection)
            selected_data.plot_test_set(models=f'top_{mo_selection}',order_by=me_selection,include_train=52,level=lv_selection)

    def on_button_clicked(b):
        mo_selection = mo_dd.value
        ts_selection = ts_dd.value
        lv_selection = lv_dd.value
        me_selection = me_dd.value
        with output:
            clear_output()
            display_user_selections(ts_selection,mo_selection,lv_selection,me_selection)
    
    all_models = models + ('weighted','avg')
    ts_dd = widgets.Dropdown(options=f_dict.keys(), description = 'Time Series:')
    mo_dd = widgets.Dropdown(options=range(1,len(all_models)+1), description = 'No. Models')
    lv_dd = widgets.Dropdown(options=[True,False],description='View Level')
    me_dd = widgets.Dropdown(options=sorted([e for e in _determine_best_by_ if e is not None]),description='Order By')

    # never changes
    button = widgets.Button(description="Select Time Series")
    output = widgets.Output()

    display(ts_dd,mo_dd,lv_dd,me_dd)
    display(button, output)
    
    button.on_click(on_button_clicked)

SyntaxError: invalid syntax (<ipython-input-4-04b5e66fdeaf>, line 11)

In [None]:
df = pd.read_csv('Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv',dtype={'RegionName':str}).set_index('RegionName')

In [None]:
df.head()

In [None]:
zips = open('./zips.txt','r').read().split('\n')
zips

In [None]:
preds = {}
for z in zips:
    data_load = df.loc[z].transpose()
    f = Forecaster(y=data_load.values[8:],current_dates=data_load.index[8:],name=z)
    preds[str(z)] = f

In [None]:
f.plot_acf(diffy=2)
plt.show()

In [None]:
f.plot_pacf(diffy=2)
plt.show()

In [None]:
f.seasonal_decompose(diffy=2).plot()
plt.show()

In [None]:
for k, f in log_progress(preds.items()):
    print(k)
    i = find_integration(f)
    f.generate_future_dates(12)
    f.set_test_length(9)
    f.set_validation_length(6)
    f.add_ar_terms(3)
    f.add_AR_terms((4,3))
    f.add_AR_terms((2,12))
    f.diff(i)
    f.adf_test(quiet=False)
    f.add_seasonal_regressors('quarter',raw=False,dummy=True)
    f.add_seasonal_regressors('month',raw=False,dummy=True)
    f.add_seasonal_regressors('year')
    f.add_covid19_regressor(called='COVID19',end='2021-12-01')
    f.add_other_regressor(called='2008_recession',start='2007-10-01',end='2009-06-30')
    f.add_time_trend(called='t')
    f.add_combo_regressors('t','COVID19',sep='_')
    f.add_poly_terms('t_COVID19')
    
    for m in log_progress(models):
        f.set_estimator(m)
        f.tune()
        f.auto_forecast()
        
    f.set_estimator('combo')
    f.manual_forecast(how='simple',models='top_3',call_me='avg')
    f.manual_forecast(how='weighted',models='top_3',call_me='weighted')

In [None]:
model_summaries = pd.DataFrame()
for k, f in preds.items():
    df = f.export(dfs='model_summaries',determine_best_by='LevelTestSetMAPE')
    df['Name'] = k
    model_summaries = pd.concat([model_summaries,df],ignore_index=True)
    
model_summaries.to_csv('model_summaries.csv',index=False)

In [None]:
results_vis(preds,'test')

In [None]:
results_vis(preds,'forecast')