# Nowcasting - Monthly

## Packages and Setting

In [13]:
import warnings
import numpy as np
import pandas as pd
import datetime as dt
import itertools
import matplotlib.pyplot as plt
from statsmodels.tsa.ar_model import AutoReg
from dateutil.relativedelta import relativedelta
from sklearn.linear_model import ElasticNet, Ridge, Lasso
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.ensemble import ExtraTreesRegressor
# from xgboost import XGBRegressor

from functools import reduce
from NowcastingPipelineM import NowcastingPH_M
import dynamicfactoranalysis as dfa

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MaxAbsScaler

warnings.filterwarnings("ignore")

## Elastic Net

In [2]:
class NowcastingML(NowcastingPH_M):
    def lag_data(self, df, lag_order):
        target_lag, tweet_lag, econ_lag = lag_order
        lagged_df = ([df] + [df[['target']].shift(l * 3).add_suffix(f'.Q{l}') for l in range(1, target_lag + 1)] + 
                        [df[[col for col in df.columns if 'TWT' in col]].shift(l).add_suffix(f'.L{l}') for l in range(1, tweet_lag + 1)] +
                        [df[[col for col in df.columns if 'ECN' in col]].shift(l).add_suffix(f'.L{l}') for l in range(1, econ_lag + 1)])
        df = pd.concat(lagged_df, axis=1)
        
        df = df.loc[:, ~df.T.duplicated(keep='first')]
        return df
    def fit_model(self, vintage, window, lag_order, **kwargs):
        ## Note: load_data here doesn't comply with strict info window reqt yet
        df, target_scaler, econ_scaler = self.load_data(vintage, window=1000, **kwargs)
        df = self.lag_data(df, lag_order)
        
        df = df.loc[pd.to_datetime(vintage) - relativedelta(months=window) :,]
        df = df.loc[df.index.month % 3 == vintage.month % 3, :]
        
        X_test = df.loc[vintage + relativedelta(month=1) :, df.columns.drop('target')].dropna()
        df_train = df.loc[: vintage - relativedelta(months=1), :].dropna()
        X_train = df_train.loc[:, df_train.columns.drop('target')]
        y_train = df_train.loc[:, 'target']
        
        model = ElasticNet()
        model.fit(X_train, y_train)
        self.prefix = f'ENet{lag_order}'    # Override class name

        nowcasts = [(model.predict(X_test_.to_frame().T)[0] if not X_test_.isnull().values.any() else np.nan) for _, X_test_ in X_test.iterrows()]
        nowcasts = list(target_scaler.inverse_transform(np.array(nowcasts).reshape(-1,1)).flatten())
        model_desc = f'ENet{lag_order}'

        return nowcasts, model_desc

In [None]:
target = 'GDP'
kmpair = {}
window = 37
model = NowcastingML(lag_order=(0,2,0), kmpair=kmpair, target=target) # lag_order = target_lag, tweet_lag, econ_lag
summary = model.run(window=window,save_aggregate=True, with_econ=False, with_tweets=True)
tweets = model.load_tweets('2023-01-01', kmpair=kmpair, window=window+72).loc[dt.datetime(2017,1,1):,:]
tweets.index = tweets.index.to_timestamp()

fig, axs = plt.subplots(3, 1, figsize=(10, 10), sharex=True)
axs[0].plot(summary['date'], summary['Nowcast_A'], linewidth=0, marker='*', label='Nowcast')
axs[0].plot(summary['date'], summary['Actual_A'], label='Actual')
axs[0].legend()
axs[0].set_title('Annual GDP Growth')
axs[1].plot(summary['date'], summary['Nowcast_Q'], linewidth=0, marker='*', label='Nowcast')
axs[1].plot(summary['date'], summary['Actual_Q'], label='Actual')
axs[1].legend()
axs[1].set_title('Quarter GDP Growth')
for metric in tweets.columns:
    axs[2].plot(tweets.index, tweets[metric], label=metric, alpha=0.5)
# axs[2].legend(loc=1)
axs[2].set_title('Tweet Metrics')
fig.show()
summary

In [62]:
target = 'GDP'
kmpair = {'PE':['C_00']}
window=1000
vintage = pd.to_datetime('2023-07-31')
model = NowcastingML(lag_order=(0,0,0), kmpair  =kmpair, target=target) # lag_order = target_lag, tweet_lag, econ_lag
df = model.load_data(vintage,window=1000,kmpair=kmpair, save_aggregate=True, with_econ=False, with_tweets=True)[0]
df = model.lag_data(df, lag_order=(1,4,0))
df = df.loc[vintage - relativedelta(months=window) :,]
df = df.loc[df.index.month % 3 == vintage.month % 3, :]
# df
X_test = df.loc[vintage+ relativedelta(month=1) :, df.columns.drop('target')].dropna()
df_train = df.loc[: vintage - relativedelta(months=1), :].dropna()
df_train.index = df_train.index.to_timestamp()
df_train

Unnamed: 0_level_0,target,TWT.C_00_PE,target.Q1,TWT.C_00_PE.L1,TWT.C_00_PE.L2,TWT.C_00_PE.L3,TWT.C_00_PE.L4
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-07-01,0.372106,-1.746025,0.556565,-1.755499,-1.65602,-1.905902,-1.826555
2010-10-01,0.215249,-1.670232,0.372106,-1.812344,-1.612202,-1.746025,-1.755499
2011-01-01,-0.084508,-1.266395,0.215249,-1.597991,-1.38956,-1.670232,-1.812344
2011-04-01,-0.34247,-1.640625,-0.084508,-1.632335,-1.599175,-1.266395,-1.597991
2011-07-01,-0.449294,-1.282975,-0.34247,-1.473643,-1.010593,-1.640625,-1.632335
2011-10-01,-0.245819,-1.252184,-0.449294,-1.10415,-0.845979,-1.282975,-1.473643
2012-01-01,0.162521,1.436111,-0.245819,-0.181603,-0.636363,-1.252184,-1.10415
2012-04-01,0.199038,-0.162655,0.162521,0.358424,-0.038307,1.436111,-0.181603
2012-07-01,0.439382,0.87595,0.199038,0.463824,0.524222,-0.162655,0.358424
2012-10-01,0.529875,0.049329,0.439382,0.020907,2.884474,0.87595,0.463824
