In [1]:
import pandas as pd
import numpy as np
import os
import pandas_datareader as pdr

In [2]:
from Forecaster import Forecaster

In [3]:
futures = ['2020-08-01','2020-09-01','2020-10-01','2020-11-01','2020-12-01','2021-01-01',
           '2021-02-01','2021-03-01','2021-04-01','2021-05-01','2021-06-01','2021-07-01','2021-08-01',
           '2021-09-01','2021-10-01','2021-11-01','2021-12-01','2022-01-01','2022-02-01','2022-03-01',
           '2022-04-01','2022-05-01','2022-06-01','2022-07-01','2022-08-01','2022-09-01','2022-10-01']

In [4]:
def get_regressors(futures,recession_length=15,month_lags=24,year_lags=0):
    """ gets a vector of external regressors from FRED
        extracts the recession indicator and maps to length specified in recession_length
        adds month and year lags
        adds an inidcator of when the last recession occured
        Paramaters: futures : list
                        a list of dates in format YYYY-mm-01
                        if there is overlap between the earlier dates and what is extracted from FRED, that will be handled in the function
                    recession_length : int, default 15
                        the total assumed length of the recession
                        this does not take into account the amount of months the recession has already gone
                    month_lags : int, default 24
                        the amount of month lags you want added of the recession inidcator from FRED
                    year_lags : int, default 0
                        the amount of year lags you want added of the recession inidcator from FRED
    """
    externals = pdr.get_data_fred(['JHDUSRGDPBR','CPIAUCSL'],start='1900-01-01').reset_index()
    externals = externals[externals['DATE'] >= externals.loc[~externals['JHDUSRGDPBR'].isnull(),'DATE'].min()]
    externals['JHDUSRGDPBR'].fillna(method='ffill',inplace=True)
    externals = externals[['DATE','JHDUSRGDPBR']]
    futures = pd.to_datetime([d for d in futures if d not in list(externals['DATE'].apply(lambda x: str(x)[:10]))])
    past_recession_periods = externals.loc[externals['DATE'] >= '2019-10-01','JHDUSRGDPBR'].sum()
    recession_left = recession_length - past_recession_periods
    recession_array = [1] * max(0,int(recession_left)) + [0] * (max(0,len(futures)) - int(recession_left))
    externals_future = pd.DataFrame({'DATE':futures,'JHDUSRGDPBR':recession_array})
    externals = externals.append(externals_future,ignore_index=True,sort=False)
    for i in range(min(1,month_lags),month_lags+1):
        if i == 0:
            break
        externals[f'JHDUSRGDPBR_lagMonth_{i}'] = externals['JHDUSRGDPBR'].shift(i)
    for i in range(min(1,year_lags),year_lags+1):
        if i == 0:
            break
        externals[f'JHDUSRGDPBR_lagYear_{i}'] = externals['JHDUSRGDPBR'].shift(i*12)
    externals['LastRecession'] = externals[['DATE','JHDUSRGDPBR','JHDUSRGDPBR_lagMonth_1']].apply(lambda x: str(x[0]) if (x[1] == 1) & (x[2] == 0) else None, axis=1)
    externals['LastRecession'].fillna(method='ffill',inplace=True)
    externals['t'] = range(externals.shape[0])
    return externals.copy()

In [5]:
externals = get_regressors(futures)

In [6]:
f = Forecaster()

In [11]:
f.get_data_fred('UTUR')
f.process_xreg_df(externals,date_col='DATE')
f.forecast_rf(test_length=3)
f.forecast_rf(test_length=3,call_me='rf2',hyper_params={'n_estimators':1000,'max_depth':3})

In [15]:
f.info

{'rf': {'holdout_periods': 3,
  'model_form': 'Random Forest {}',
  'test_set_actuals': [5.3, 4.5, 4.1],
  'test_set_predictions': [8.214000000000004,
   8.200000000000003,
   8.229000000000005],
  'test_set_ape': [0.5498113207547178,
   0.8222222222222229,
   1.0070731707317087]},
 'rf2': {'holdout_periods': 3,
  'model_form': "Random Forest {'n_estimators': 1000, 'max_depth': 3}",
  'test_set_actuals': [5.3, 4.5, 4.1],
  'test_set_predictions': [5.740128902164917,
   5.74292496708493,
   5.7501010691564405],
  'test_set_ape': [0.08304318908772029, 0.2762055482410955, 0.40246367540401]}}

In [13]:
f.forecasts

{'rf': [4.530000000000005,
  4.590000000000004,
  4.700000000000005,
  5.287000000000005,
  5.778000000000005,
  6.328000000000003,
  6.732000000000004,
  6.995000000000004,
  7.22,
  7.293999999999998,
  7.328999999999999,
  7.4049999999999985,
  7.453999999999999,
  7.534999999999997,
  7.521999999999996,
  7.511999999999995,
  7.5049999999999955,
  7.427999999999996,
  7.327999999999999,
  7.206000000000002,
  7.075000000000002,
  7.057,
  6.924999999999999,
  6.761,
  6.633000000000003,
  6.542000000000005],
 'rf2': [4.652241289180027,
  4.7161641266477465,
  4.85559697025184,
  5.175915159517984,
  5.5154436477800814,
  5.950768086779065,
  6.349471644817935,
  6.657336162829097,
  6.915201092801552,
  7.063870036803678,
  7.118906199674119,
  7.187489549894485,
  7.236832527789884,
  7.270430484380893,
  7.2648672203870595,
  7.25887239798654,
  7.253653354069585,
  7.241672643949897,
  7.159189139372893,
  7.023547924151543,
  6.840376317001933,
  6.642535487500261,
  6.42770366