# Nowcasting

## Packages and Setting

In [None]:
import warnings
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from dateutil.relativedelta import relativedelta
from sklearn.linear_model import ElasticNet, Ridge, Lasso
from ExtendedDynamicFactor import ExtendedDynamicFactor, OptimizeExtendedDynamicFactor

from NowcastingPipeline import NowcastingPH

%matplotlib inline
warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.expand_frame_repr', False)

## Dynamic Factor Model

In [None]:
class NowcastingDFM(NowcastingPH):
    def fit_model(self, vintage, window, DFM_order, optimize_order, **kwargs):
        df, gdp_scaler, econ_scaler = self.load_data(vintage, window, **kwargs)
        factor_order, error_order, k_factors, factor_lag = DFM_order

        if optimize_order:
            model_ = OptimizeExtendedDynamicFactor(
                endog=df, k_factors_max=k_factors, factor_lag_max=factor_lag, factor_order_max=factor_order, 
                error_order=error_order, **kwargs
            ).optimize(**kwargs)
        else:
            model_ = ExtendedDynamicFactor(
                endog=df, k_factors=k_factors, factor_lag=factor_lag, factor_order=factor_order, 
                error_order=error_order, **kwargs
            )
        model = model_.fit(disp=False, maxiter=1000, method='powell', ftol=1e-5, **kwargs)
        DFM_order = (model_.factor_order, model_.error_order, model_.k_factors, model_.factor_lag)
        self.prefix = 'DFM_Opt' if optimize_order else f'DFM{DFM_order}'    # Override class name
        
        nowcasts = model.predict(start=f'{vintage.year}Q1', end=f'{vintage.year}Q4')[['GDP']]
        nowcasts = list(gdp_scaler.inverse_transform(nowcasts[['GDP']]).flatten())
        model_desc = f'DFM{DFM_order}'

        return nowcasts, model_desc

In [None]:
model = NowcastingDFM(DFM_order=(1,0,1,0), optimize_order=False) # order = factor_order, error_order, k_factors, factor_lag
summary = model.run(window=25, save_aggregate=False)
# summary = pd.read_csv('Results/DFM(1, 0, 1, 0)_W25_TE_summary.csv', parse_dates=['date'])
tweets = model.load_tweets('2023-01-01').loc[dt.datetime(2017,1,1):,:]
tweets.index = tweets.index.to_timestamp()

fig, axs = plt.subplots(3, 1, figsize=(10, 10), sharex=True)
axs[0].plot(summary['date'], summary['Nowcast_A'], linewidth=0, marker='*', label='Nowcast')
axs[0].plot(summary['date'], summary['Actual_A'], label='Actual')
axs[0].legend()
axs[0].set_title('Annual GDP Growth')
axs[1].plot(summary['date'], summary['Nowcast_Q'], linewidth=0, marker='*', label='Nowcast')
axs[1].plot(summary['date'], summary['Actual_Q'], label='Actual')
axs[1].legend()
axs[1].set_title('Quarter GDP Growth')
for metric in tweets.columns:
    axs[2].plot(tweets.index, tweets[metric], label=metric, alpha=0.5)
axs[2].legend()
axs[2].set_title('Tweet Metrics')
fig.show()
summary

## Machine Learning

In [None]:
class NowcastingML(NowcastingPH):
    def fit_model(self, vintage, window, lag_order, **kwargs):
        df, gdp_scaler, econ_scaler = self.load_data(vintage, window, **kwargs)
        gdp_lag, tweet_lag, econ_lag = lag_order

        lagged_df = ([df] + [df[['GDP']].shift(l).add_suffix(f'.Q{l}') for l in range(1, gdp_lag + 1)] + 
                      [df[[col for col in df.columns if 'TWT' in col]].shift(l).add_suffix(f'.Q{l}') for l in range(1, tweet_lag + 1)] +
                      [df[[col for col in df.columns if 'ECN' in col]].shift(l).add_suffix(f'.Q{l}') for l in range(1, econ_lag + 1)])
        df = pd.concat(lagged_df, axis=1)
        df = df.loc[:, ~df.T.duplicated(keep='first')]

        X_test = df.loc[vintage + relativedelta(month=3):, df.columns.drop('GDP')].dropna()
        df_train = df.loc[:vintage - relativedelta(months=3), :].dropna()
        X_train = df_train.loc[:, df_train.columns.drop('GDP')]
        y_train = df_train.loc[:, 'GDP']
        
        model = ElasticNet()
        model.fit(X_train, y_train)
        self.prefix = f'ENet{lag_order}'    # Override class name

        nowcasts = model.predict(X_test).reshape(-1,1)
        nowcasts = list(gdp_scaler.inverse_transform(nowcasts).flatten())
        model_desc = f'ENet{lag_order}'

        return nowcasts, model_desc

In [None]:
model = NowcastingML(lag_order=(2,1,1)) # order = gdp_lag, tweet_lag, econ_lag
summary = model.run(window=25, save_aggregate=False)
# summary = pd.read_csv('Results/ENet(2, 1, 1)_W25_TE_summary.csv', parse_dates=['date'])
tweets = model.load_tweets('2023-01-01').loc[dt.datetime(2017,1,1):,:]
tweets.index = tweets.index.to_timestamp()

fig, axs = plt.subplots(3, 1, figsize=(10, 10), sharex=True)
axs[0].plot(summary['date'], summary['Nowcast_A'], linewidth=0, marker='*', label='Nowcast')
axs[0].plot(summary['date'], summary['Actual_A'], label='Actual')
axs[0].legend()
axs[0].set_title('Annual GDP Growth')
axs[1].plot(summary['date'], summary['Nowcast_Q'], linewidth=0, marker='*', label='Nowcast')
axs[1].plot(summary['date'], summary['Actual_Q'], label='Actual')
axs[1].legend()
axs[1].set_title('Quarter GDP Growth')
for metric in tweets.columns:
    axs[2].plot(tweets.index, tweets[metric], label=metric, alpha=0.5)
axs[2].legend()
axs[2].set_title('Tweet Metrics')
fig.show()
summary

## Autoregression

In [None]:
class NowcastingAR(NowcastingPH):
    def fit_model(self, vintage, window, AR_order, **kwargs):
        df, gdp_scaler, econ_scaler = self.load_data(vintage, window, **kwargs)

        model = AutoReg(df['GDP'].dropna(), lags=AR_order).fit()
        self.prefix = 'AR(1)'    # Override class name
        
        nowcasts = model.predict(start=f'{vintage.year}Q1', end=f'{vintage.year}Q4').to_numpy().reshape(-1,1)
        nowcasts = list(gdp_scaler.inverse_transform(nowcasts).flatten())
        model_desc = 'AR(1)'

        return nowcasts, model_desc

In [None]:
model = NowcastingAR(AR_order=1)
summary = model.run(window=25, save_aggregate=False)
# summary = pd.read_csv('Results/AR(1)_W25_TE_summary.csv', parse_dates=['date'])
tweets = model.load_tweets('2023-01-01').loc[dt.datetime(2017,1,1):,:]
tweets.index = tweets.index.to_timestamp()

fig, axs = plt.subplots(3, 1, figsize=(10, 10), sharex=True)
axs[0].plot(summary['date'], summary['Nowcast_A'], linewidth=0, marker='*', label='Nowcast')
axs[0].plot(summary['date'], summary['Actual_A'], label='Actual')
axs[0].legend()
axs[0].set_title('Annual GDP Growth')
axs[1].plot(summary['date'], summary['Nowcast_Q'], linewidth=0, marker='*', label='Nowcast')
axs[1].plot(summary['date'], summary['Actual_Q'], label='Actual')
axs[1].legend()
axs[1].set_title('Quarter GDP Growth')
for metric in tweets.columns:
    axs[2].plot(tweets.index, tweets[metric], label=metric, alpha=0.5)
axs[2].legend()
axs[2].set_title('Tweet Metrics')
fig.show()
summary