#### Robustness test (d+1) - Regression Analysis on Interest Rate Levels

This notebook performs robustness test on the regression analysis made on notebook 9. It does the same thing, but using the data from the interest rates one day after the publication of the minute instead.

In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler

In [2]:
INITIAL_DATE = "2003-06-26"

In [3]:
minutes_info = pd.read_excel("./data/raw/minutes_info.xlsx")
minutes_info = minutes_info[minutes_info["DataReferencia"] >= INITIAL_DATE]
minutes_info = minutes_info.sort_values("DataReferencia", ascending=True)

dates_ref = minutes_info["DataReferencia"]
detes_pub = minutes_info["DataPublicacao"]

In [4]:
futures_ir = pd.read_excel("./data/future_interest_rates/future_interest_rates.xlsx", index_col="Date")
futures_ir.index = pd.to_datetime(futures_ir.index)
futures_ir.sort_index(inplace=True)

#futures_ir_returns = futures_ir.pct_change().dropna()
futures_ir_returns = futures_ir.diff().dropna()

In [5]:
futures_ir_returns_d_plus_1 = futures_ir_returns.shift(-1)
returns_pub_date_d_plus_1 = futures_ir_returns_d_plus_1[futures_ir_returns_d_plus_1.index.isin(detes_pub)]

#### Regression on optimism index

In [6]:
df_optimism = pd.read_excel("./data/processed/index_optimism.xlsx", index_col=0)
df_optimism = df_optimism.drop(columns=['minute'])
df_optimism.columns = [f"optimism_{i}" for i in range(len(df_optimism.columns))]

df_optimism = df_optimism.diff()

In [7]:
regression_data = pd.concat([returns_pub_date_d_plus_1, df_optimism], axis=1)
regression_data.dropna(inplace=True)

possible_y = returns_pub_date_d_plus_1.columns.tolist()
all_x = df_optimism.columns.tolist()

In [8]:
reg_level_optimism = []
X = regression_data[all_x]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

for maturity in possible_y:
    Y = regression_data[maturity]

    lasso_cv_model = LassoCV(cv=10, random_state=100, max_iter=10000)
    lasso_cv_model.fit(X_scaled, Y)

    selected_coeffs_mask = lasso_cv_model.coef_ != 0
    selected_variables_names = X.columns[selected_coeffs_mask].tolist()

    if selected_variables_names:
        X_selected_scaled = X_scaled[:, selected_coeffs_mask]
        X_selected_sm = sm.add_constant(X_selected_scaled)

        ols_final_model = sm.OLS(Y, X_selected_sm).fit()

        ols_params = pd.Series(ols_final_model.params.values[1:], index=selected_variables_names)
        ols_std = pd.Series(ols_final_model.bse.values[1:], index=selected_variables_names)
        ols_pvalues = pd.Series(ols_final_model.pvalues.values[1:], index=selected_variables_names)

        reg_level_optimism.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': ols_params,
            'Std_OLS_Final': ols_std,
            'Pvalues_OLS_Final': ols_pvalues,
            'Adj_R_Sqrd_OLS_Final': ols_final_model.rsquared_adj
        })
    else:
        reg_level_optimism.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': pd.Series(),
            'Std_OLS_Final': pd.Series(),
            'Pvalues_OLS_Final': pd.Series(),
            'Adj_R_Sqrd_OLS_Final': 0
        })

In [9]:
for result in reg_level_optimism:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print(f"Best alpha (λ) found by cross-validation: {result['Selected_Alpha']:.6f}")
    print(f"Adjusted R squared of the final OLS model: {result['Adj_R_Sqrd_OLS_Final']:.4f}")
            
    # --- Final OLS Model Results ---
    if not result['Coefs_OLS_Final'].empty:
        print("\n  Final OLS Results:")
        for var in result['Coefs_OLS_Final'].index:
            coef = result['Coefs_OLS_Final'][var]
            std_err = result['Std_OLS_Final'][var]
            pval = result['Pvalues_OLS_Final'][var]
            print(f"    - {var}: Coef = {coef:.6f}, Std. Err = {std_err:.6f}, P-value = {pval:.4f}")
    else:
        print("No variables selected.")
    print("-" * 60)

#### Results for Maturity (Vertex): v_21
Best alpha (λ) found by cross-validation: 0.002729
Adjusted R squared of the final OLS model: 0.0000
No variables selected.
------------------------------------------------------------
#### Results for Maturity (Vertex): v_63
Best alpha (λ) found by cross-validation: 0.004179
Adjusted R squared of the final OLS model: 0.0154

  Final OLS Results:
    - optimism_3: Coef = -0.004804, Std. Err = 0.002441, P-value = 0.0505
------------------------------------------------------------
#### Results for Maturity (Vertex): v_126
Best alpha (λ) found by cross-validation: 0.004787
Adjusted R squared of the final OLS model: 0.0036

  Final OLS Results:
    - optimism_1: Coef = -0.004787, Std. Err = 0.003717, P-value = 0.1994
------------------------------------------------------------
#### Results for Maturity (Vertex): v_252
Best alpha (λ) found by cross-validation: 0.005804
Adjusted R squared of the final OLS model: 0.0000
No variables selected.
---------

#### Regression on uncertainty index

In [10]:
df_uncertainty = pd.read_excel("./data/processed/index_uncertainty.xlsx", index_col=0)
df_uncertainty = df_uncertainty.drop(columns=['minute'])
df_uncertainty.columns = [f"uncertainty_{i}" for i in range(len(df_uncertainty.columns))]

df_uncertainty = df_uncertainty.diff()

In [11]:
regression_data = pd.concat([returns_pub_date_d_plus_1, df_uncertainty], axis=1)
regression_data.dropna(inplace=True)

possible_y = returns_pub_date_d_plus_1.columns.tolist()
all_x = df_uncertainty.columns.tolist()

In [12]:
reg_level_uncertainty = []
X = regression_data[all_x]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

for maturity in possible_y:
    Y = regression_data[maturity]

    lasso_cv_model = LassoCV(cv=10, random_state=100, max_iter=10000)
    lasso_cv_model.fit(X_scaled, Y)

    selected_coeffs_mask = lasso_cv_model.coef_ != 0
    selected_variables_names = X.columns[selected_coeffs_mask].tolist()

    if selected_variables_names:
        X_selected_scaled = X_scaled[:, selected_coeffs_mask]
        X_selected_sm = sm.add_constant(X_selected_scaled)

        ols_final_model = sm.OLS(Y, X_selected_sm).fit()
        
        ols_params = pd.Series(ols_final_model.params.values[1:], index=selected_variables_names)
        ols_std = pd.Series(ols_final_model.bse.values[1:], index=selected_variables_names)
        ols_pvalues = pd.Series(ols_final_model.pvalues.values[1:], index=selected_variables_names)

        reg_level_uncertainty.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': ols_params,
            'Std_OLS_Final': ols_std,
            'Pvalues_OLS_Final': ols_pvalues,
            'Adj_R_Sqrd_OLS_Final': ols_final_model.rsquared_adj
        })
    else:
        reg_level_uncertainty.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': pd.Series(),
            'Std_OLS_Final': pd.Series(),
            'Pvalues_OLS_Final': pd.Series(),
            'Adj_R_Sqrd_OLS_Final': 0
        })

In [13]:
for result in reg_level_uncertainty:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print(f"Best alpha (λ) found by cross-validation: {result['Selected_Alpha']:.6f}")
    print(f"Adjusted R squared of the final OLS model: {result['Adj_R_Sqrd_OLS_Final']:.4f}")
            
    # --- Final OLS Model Results ---
    if not result['Coefs_OLS_Final'].empty:
        print("\n  Final OLS Results:")
        for var in result['Coefs_OLS_Final'].index:
            coef = result['Coefs_OLS_Final'][var]
            std_err = result['Std_OLS_Final'][var]
            pval = result['Pvalues_OLS_Final'][var]
            print(f"    - {var}: Coef = {coef:.6f}, Std. Err = {std_err:.6f}, P-value = {pval:.4f}")
    else:
        print("No variables selected.")
    print("-" * 60)

#### Results for Maturity (Vertex): v_21
Best alpha (λ) found by cross-validation: 0.002156
Adjusted R squared of the final OLS model: 0.0000
No variables selected.
------------------------------------------------------------
#### Results for Maturity (Vertex): v_63
Best alpha (λ) found by cross-validation: 0.001678
Adjusted R squared of the final OLS model: -0.0029

  Final OLS Results:
    - uncertainty_5: Coef = 0.001678, Std. Err = 0.002463, P-value = 0.4966
------------------------------------------------------------
#### Results for Maturity (Vertex): v_126
Best alpha (λ) found by cross-validation: 0.004283
Adjusted R squared of the final OLS model: 0.0018

  Final OLS Results:
    - uncertainty_4: Coef = 0.004283, Std. Err = 0.003720, P-value = 0.2512
------------------------------------------------------------
#### Results for Maturity (Vertex): v_252
Best alpha (λ) found by cross-validation: 0.009077
Adjusted R squared of the final OLS model: 0.0064

  Final OLS Results:
    -