In [1]:
import pandas as pd
import numpy as np
import os
import pathlib
import joblib
from fredapi import Fred
from dotenv import load_dotenv
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_curve, auc
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
import matplotlib.pyplot as plt
import shap
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import json


# Load FRED API key from .env file
env_path = pathlib.Path('..') / '.env'
load_dotenv(dotenv_path=env_path)
FRED_API_KEY = os.getenv("FRED_API_KEY")
fred = Fred(api_key=FRED_API_KEY)

In [2]:
# Define series to fetch from FRED
series_dict = {
    'gdp': ('GDP', 'Q'),
    'cpi': ('CPIAUCSL', 'M'),
    'unemp': ('UNRATE', 'M'),
    'fedrate': ('FEDFUNDS', 'M'),
}

# Fetch data from FRED
data = pd.DataFrame()
for var_name, (series_id, freq) in series_dict.items():
    series_data = fred.get_series(series_id)
    data[var_name] = series_data

In [33]:
# Resample monthly data to quarterly and calculate additional variables
data_q = pd.DataFrame()
data_q['fedrate'] = data['fedrate'].resample('Q').last()
data_q['unemp'] = data['unemp'].resample('Q').last()
data_q['cpi'] = data['cpi'].resample('Q').last()
data_q['cpi_pct'] = data_q['cpi'].pct_change() * 100
data_q['unemp_pct'] = data_q['unemp'].pct_change() * 100
data_q['unemp_squared'] = data_q['unemp'] ** 3
data_q['cpi_pct_squared'] = data_q['cpi_pct'] ** 2

healthy_unemp = 5
cpi_quarter_target = 0.4967

# Define start and end dates
start_date = '1955-01-01'
end_date = '2022-12-31'
extended_start_date = pd.to_datetime(start_date) - pd.DateOffset(months=6)  # extend by two quarters

data_q = data_q.loc[extended_start_date:end_date]
data_q['high_unemp'] = (data_q['unemp'] > 5).astype(int)
data_q['high_cpi_growth'] = (data_q['cpi_pct'] > r).astype(int)
data_q['fedrate_change'] = data_q['fedrate'].diff()
data_q['target'] = (data_q['fedrate_change'] > 0).astype(int)
df = data_q.loc[start_date:end_date]
df

Unnamed: 0,fedrate,unemp,cpi,cpi_pct,unemp_pct,unemp_squared,cpi_pct_squared,high_unemp,high_cpi_growth,fedrate_change,target
1955-03-31,1.39,4.9,26.770,0.187126,-14.035088,117.649,0.035016,0,1,0.54,1
1955-06-30,1.43,4.7,26.790,0.074710,-4.081633,103.823,0.005582,0,1,0.04,1
1955-09-30,1.68,4.0,26.760,-0.111982,-14.893617,64.000,0.012540,0,0,0.25,1
1955-12-31,2.24,4.3,26.820,0.224215,7.500000,79.507,0.050272,0,1,0.56,1
1956-03-31,2.45,4.0,26.830,0.037286,-6.976744,64.000,0.001390,0,1,0.21,1
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31,0.08,4.5,276.522,1.750784,-16.666667,91.125,3.065244,0,1,-0.02,0
2022-03-31,0.08,4.0,282.599,2.197655,-11.111111,64.000,4.829688,0,1,0.00,0
2022-06-30,0.33,3.6,288.611,2.127396,-10.000000,46.656,4.525814,0,1,0.25,1
2022-09-30,1.68,3.5,294.628,2.084813,-2.777778,42.875,4.346446,0,1,1.35,1


In [34]:
# Preprocessed data
X = df[['unemp_squared', 'cpi_pct']]
y = df['target']

# Split the dataset while preserving time order
train_size = int(len(df) * 0.8)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Add a constant to the features
X_train_scaled_sm = sm.add_constant(X_train_scaled)
X_test_scaled_sm = sm.add_constant(X_test_scaled)

# Fit the logistic regression model
logit_model = sm.Logit(y_train, X_train_scaled_sm).fit()
print(logit_model.summary())

# Get the predicted probabilities
log_reg_probs = logit_model.predict(X_test_scaled_sm)

# Compute ROC Curve and AUC Score
fpr, tpr, thresholds = roc_curve(y_test, log_reg_probs)
roc_auc = auc(fpr, tpr)

# Plotting ROC Curve
roc_fig = px.line(
    x=fpr, y=tpr,
    labels={'x': 'False Positive Rate', 'y': 'True Positive Rate'},
    title='Receiver Operating Characteristic',
    width=600, height=600
)
roc_fig.add_shape(
    type='line', line=dict(dash='dash', color='navy'),
    x0=0, x1=1, y0=0, y1=1
)
roc_fig.add_annotation(
    x=0, y=1,
    text=f'AUC: {roc_auc:.2f}',
    showarrow=False
)
roc_fig.write_image('roc_curve.png', width=600, height=600)  # Save as PNG

# Get the predicted classes
log_reg_preds = np.round(log_reg_probs)

# Print Classification Report
print("Logistic Regression Classification Report:")
print(classification_report(y_test, log_reg_preds))

Optimization terminated successfully.
         Current function value: 0.670952
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:                 target   No. Observations:                  217
Model:                          Logit   Df Residuals:                      214
Method:                           MLE   Df Model:                            2
Date:                Wed, 01 Nov 2023   Pseudo R-squ.:                 0.02951
Time:                        13:12:47   Log-Likelihood:                -145.60
converged:                       True   LL-Null:                       -150.02
Covariance Type:            nonrobust   LLR p-value:                   0.01195
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1167      0.139      0.840      0.401      -0.156       0.389
x1            -0.3767      0.