In [1]:
import pandas as pd
import numpy as np
import os
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import widgets
from IPython.display import display, clear_output
import pickle
import pandas_datareader as pdr
from tqdm.notebook import tqdm as log_progress

In [2]:
from Forecaster import Forecaster

In [3]:
# List of future dates to forecast for
futures = ['2020-07-01','2020-08-01','2020-09-01','2020-10-01','2020-11-01','2020-12-01','2021-01-01',
           '2021-02-01','2021-03-01','2021-04-01','2021-05-01','2021-06-01','2021-07-01','2021-08-01',
           '2021-09-01','2021-10-01','2021-11-01','2021-12-01','2022-01-01','2022-02-01','2022-03-01',
           '2022-04-01','2022-05-01','2022-06-01','2022-07-01','2022-08-01','2022-09-01','2022-10-01']
# a lit of state abbreviations
states = [e.replace('"','') for e in open('state_abb.txt').read().split('\n')]
# whether or not to rerun the forecast - if false, will read in a pickled forecast
rerun = True

In [4]:
def get_regressors(futures,recession_length=15,month_lags=24,year_lags=0):
    """ gets a vector of external regressors from FRED
        extracts the recession indicator and maps to length specified in recession_length
        adds month and year lags
        adds an inidcator of when the last recession occured
        Paramaters: futures : list
                        a list of dates in format YYYY-mm-01
                        if there is overlap between the earlier dates and what is extracted from FRED, that will be handled in the function
                    recession_length : int, default 15
                        the total assumed length of the recession
                        this does not take into account the amount of months the recession has already gone
                    month_lags : int, default 24
                        the amount of month lags you want added of the recession inidcator from FRED
                    year_lags : int, default 0
                        the amount of year lags you want added of the recession inidcator from FRED
    """
    externals = pdr.get_data_fred(['JHDUSRGDPBR','CPIAUCSL'],start='1900-01-01').reset_index()
    externals = externals[externals['DATE'] >= externals.loc[~externals['JHDUSRGDPBR'].isnull(),'DATE'].min()]
    externals['JHDUSRGDPBR'].fillna(method='ffill',inplace=True)
    externals = externals[['DATE','JHDUSRGDPBR']]
    futures = pd.to_datetime([d for d in futures if d not in list(externals['DATE'].apply(lambda x: str(x)[:10]))])
    past_recession_periods = externals.loc[externals['DATE'] >= '2019-10-01','JHDUSRGDPBR'].sum()
    recession_left = recession_length - past_recession_periods
    recession_array = [1] * max(0,int(recession_left)) + [0] * (max(0,len(futures)) - int(recession_left))
    externals_future = pd.DataFrame({'DATE':futures,'JHDUSRGDPBR':recession_array})
    externals = externals.append(externals_future,ignore_index=True,sort=False)
    for i in range(min(1,month_lags),month_lags+1):
        if i == 0:
            break
        externals[f'JHDUSRGDPBR_lagMonth_{i}'] = externals['JHDUSRGDPBR'].shift(i)
    for i in range(min(1,year_lags),year_lags+1):
        if i == 0:
            break
        externals[f'JHDUSRGDPBR_lagYear_{i}'] = externals['JHDUSRGDPBR'].shift(i*12)
    externals['LastRecession'] = externals[['DATE','JHDUSRGDPBR','JHDUSRGDPBR_lagMonth_1']].apply(lambda x: str(x[0]) if (x[1] == 1) & (x[2] == 0) else None, axis=1)
    externals['LastRecession'].fillna(method='ffill',inplace=True)
    externals['t'] = range(externals.shape[0])
    return externals.copy()

def pickle_out(obj):
    """ writes out forecast results as a pickle file to the pickled folder
        Parameters: obj : any data type
                        the object to be written out -- this will be a Forecaster object
    """
    with open('pickled/AllStatesForecasts-{0}.pickle'.format(str(datetime.datetime.now()).split('.')[0].replace(':','')),'wb') as f:
        pickle.dump(obj,f)

def pickle_in(which='latest'):
    """ reads a pickle object saved in the pickled folder
        Parameters: which : str, default "latest"
                        if "latest", will read in the most recently created file in the pickled folder
                        any other argument should be a datetime-like str and correspond with a file name in the pickled folder
                        ex. to extract the AllStatesForecast-2020-09-11 121129.pickle file, this would be "2020-09-11 121129"
    """
    if which == 'latest':
        most_recent_file = max([f for f in os.listdir('pickled')])
        f= open(f'pickled/{most_recent_file}','rb')
    else:
        f= open(f'pickled/AllStatesForecasts-{which}.pickle','rb')
    x = pickle.load(f)
    f.close()
    return x

def visualize():
    """ visualize the forecast results
        leverages Jupyter widgets
        relies on the ExternalsData object type
        add new forecast names to the model_options list
    """
    def display_user_selections(states_selection,ei_selection,model_selections):
        """ displays graphs with seaborn based on what user selects from dropdown menus
        """
        selected_data = forecasts[f'{states_selection}{ei_selection}']
        ax = sns.lineplot(x = pd.to_datetime(selected_data.current_dates), y = selected_data.y, ci = None)
        forecast_periods = selected_data.future_dates
        labels = ['Actual']
        max_display = [max(selected_data.y)]
        for k, v in model_selections.items():
            if (v == 'True') & (k in selected_data.forecasts.keys()):
                max_display.append(max(selected_data.forecasts[k]))
                sns.lineplot(x = pd.to_datetime(forecast_periods), y = selected_data.forecasts[k])
                if k == 'average':
                    labels.append('{0}'.format(selected_data.info['average']['model_form'].split(':')[0]))
                else:
                    labels.append('{0}'.format(selected_data.info[k]['model_form']))
        
        print('MAPEs:')
        models_best_to_worst = selected_data.order_all_forecasts_best_to_worst()
        for m in models_best_to_worst:
            if m!='naive':
                print('  {}: {:.3f}'.format(m,selected_data.mape[m]))
        
        plt.legend(labels=labels,loc='best')
        plt.xlabel('')
        plt.ylabel(f'{states_selection}{ei_selection}')
        plt.title(f'{states_selection}{ei_selection} Forecast')
        plt.ylim(min(0,min(selected_data.y)),1.1*max(max_display))
        plt.show()

    def on_button_clicked(b):
        """ passes the user options to the display_user_selections function after the button is pressed
        """
        states_selection = states_dropdown.value
        ei_selection = ei_dropdown.value
        model_selections = {}
        for k,s in model_dropdown.items():
            model_selections[k] = s.value
        with output:
            clear_output()
            display_user_selections(states_selection,ei_selection,model_selections)
    
    bool_op = ['True','False']
    # the models that you will have the option to display in the graph
    model_options = ['arima','arima_no_reg','tbats','ets','rf','adaboost','gbt','svr','mlr','ridge','lasso','mlp','average']
    states_dropdown = widgets.Dropdown(options=states, description = 'State')
    ei_dropdown = widgets.Dropdown(options=['SLIND','PHCI','UR'], description = 'Indicator')
    model_dropdown = {}
    for m in model_options:
        model_dropdown[m] = widgets.Dropdown(options=bool_op.copy(), description = m)

    # never changes
    button = widgets.Button(description="Show Forecast")
    output = widgets.Output()

    display(states_dropdown,ei_dropdown)
    for d, k in model_dropdown.items():
        display(k)
    display(button, output)
    
    button.on_click(on_button_clicked)   

In [5]:
if rerun:
    externals = get_regressors(futures)
    forecasts = {}
    for s in log_progress(states):
        for ei in ('SLIND','PHCI','UR'):
            d = Forecaster()
            d.get_data_fred(f'{s}{ei}')
            d.process_xreg_df(externals,date_col='DATE')
            d.check_xreg_future_current_consistency()
            d.forecast_mlp(test_length=3,hyper_params={'hidden_layer_sizes':(10,50,),'solver':'lbfgs','activation':'relu'})
            d.forecast_rf(test_length=3,hyper_params={'n_estimators':500,'max_depth':3})
            d.forecast_gbt(test_length=3,hyper_params={'max_depth':3,'n_estimators':500})
            d.forecast_adaboost(test_length=3,hyper_params={'learning_rate':.5,'n_estimators':250})
            d.forecast_mlr(test_length=3)
            d.forecast_ridge(test_length=3,alpha=10)
            d.forecast_lasso(test_length=3,alpha=.05)
            d.forecast_svr(test_length=3,hyper_params={'kernel':'linear','C':0.05, 'epsilon':1.25})
            d.forecast_arima(test_length=3,Xvars='top_3')
            d.forecast_arima(test_length=3,Xvars=None,call_me='arima_no_reg')
            d.forecast_tbats(test_length=3)
            d.forecast_ets(test_length=3)
            d.forecast_average(models='top_4')
            forecasts[f'{s}{ei}'] = d
    pickle_out(forecasts)
else:
    forecasts = pickle_in('latest')

In [6]:
sns.set(rc={'figure.figsize':(18,10)})
visualize()

Dropdown(description='State', options=('AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID',…

Dropdown(description='Indicator', options=('SLIND', 'PHCI', 'UR'), value='SLIND')

Dropdown(description='arima', options=('True', 'False'), value='True')

Dropdown(description='arima_no_reg', options=('True', 'False'), value='True')

Dropdown(description='tbats', options=('True', 'False'), value='True')

Dropdown(description='ets', options=('True', 'False'), value='True')

Dropdown(description='rf', options=('True', 'False'), value='True')

Dropdown(description='adaboost', options=('True', 'False'), value='True')

Dropdown(description='gbt', options=('True', 'False'), value='True')

Dropdown(description='svr', options=('True', 'False'), value='True')

Dropdown(description='mlr', options=('True', 'False'), value='True')

Dropdown(description='ridge', options=('True', 'False'), value='True')

Dropdown(description='lasso', options=('True', 'False'), value='True')

Dropdown(description='mlp', options=('True', 'False'), value='True')

Dropdown(description='average', options=('True', 'False'), value='True')

Button(description='Show Forecast', style=ButtonStyle())

Output()