#### Regression Analysis on Interest Rate Volatility

This notebook performs a regression analysis to model how central bank communication impacts the volatility of the term structure of futures interest rates.
 * It uses several linear regression specifications to examine the relationship between different aspects of the communication—such as the event of the release, the level of the sentiment indices (optimism and uncertainty), and dummies for pessimistic communication—and the volatility of interest rates across various maturities.

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler

In [2]:
INITIAL_DATE = "2003-06-26"

In [3]:
minutes_info = pd.read_excel("./data/raw/minutes_info.xlsx")
minutes_info = minutes_info[minutes_info["DataReferencia"] >= INITIAL_DATE]
minutes_info = minutes_info.sort_values("DataReferencia", ascending=True)

dates_ref = minutes_info["DataReferencia"]
detes_pub = minutes_info["DataPublicacao"]

In [4]:
futures_ir = pd.read_excel("./data/future_interest_rates/future_interest_rates.xlsx", index_col="Date")
futures_ir.index = pd.to_datetime(futures_ir.index)
futures_ir.sort_index(inplace=True)

futures_ir_returns = futures_ir.pct_change().dropna()
#futures_ir_returns = futures_ir_returns.shift(-1) # aqui esta certo?

vertix_names = futures_ir_returns.columns.to_list()

#### Futures MA Volatility (5 bd)

In [5]:
def calc_vol(returns: pd.Series) -> float:
    vol = returns.std() * np.sqrt(252)
    return vol


mw_volatility_5 = futures_ir_returns.rolling(5).apply(calc_vol)
mw_volatility_5 = mw_volatility_5.dropna()

mw_log_volatility_5 = np.log(mw_volatility_5)


In [6]:
mw_volatility_5["dummy_minute_pub_date"] = 0
mw_volatility_5.loc[mw_volatility_5.index.isin(detes_pub), "dummy_minute_pub_date"] = 1

mw_log_volatility_5["dummy_minute_pub_date"] = 0
mw_log_volatility_5.loc[mw_log_volatility_5.index.isin(detes_pub), "dummy_minute_pub_date"] = 1

#### General regression on volatility - Using only the dummy

In [7]:
df_reg_vol_1 = mw_volatility_5.copy()

In [8]:
# REGRESSION 1
reg1_results = []
possible_y = vertix_names

for maturity in possible_y:
    Y = df_reg_vol_1[maturity]

    X = df_reg_vol_1["dummy_minute_pub_date"]
    X = sm.add_constant(X)
    
    model = sm.OLS(Y, X).fit()

    if len(X.columns) > 0:
        reg1_results.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'Std': model.bse[X.columns],
            'P-Values': model.pvalues[X.columns],
            'Adj_R_Sqrd': model.rsquared_adj
        })


In [9]:
for result in reg1_results:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print("Betas:")
    print(result["Betas"])
    print("\n")
    print("Standard Deviation:")
    print(result["Std"])
    print("\n")
    print("P-Values:")
    print(result["P-Values"])
    print("\n")
    print("Adjusted R Squared:")
    print(result["Adj_R_Sqrd"])
    print("-" * 30 + "\n")

#### Results for Maturity (Vertex): v_21
Betas:
const                    0.027774
dummy_minute_pub_date    0.010506
dtype: float64


Standard Deviation:
const                    0.000610
dummy_minute_pub_date    0.003335
dtype: float64


P-Values:
const                    0.00000
dummy_minute_pub_date    0.00164
dtype: float64


Adjusted R Squared:
0.0016054167257748553
------------------------------

#### Results for Maturity (Vertex): v_63
Betas:
const                    0.047431
dummy_minute_pub_date    0.010996
dtype: float64


Standard Deviation:
const                    0.000989
dummy_minute_pub_date    0.005403
dtype: float64


P-Values:
const                    0.000000
dummy_minute_pub_date    0.041899
dtype: float64


Adjusted R Squared:
0.0005656600766092978
------------------------------

#### Results for Maturity (Vertex): v_126
Betas:
const                    0.079304
dummy_minute_pub_date    0.018086
dtype: float64


Standard Deviation:
const                    0.001444


#### Regressions on optimism index (2 e 3)

In [10]:
df_optimism = pd.read_excel("./data/processed/index_optimism.xlsx", index_col=0)
df_optimism = df_optimism.drop(columns=['minute'])
df_optimism.columns = [f"optimism_{i}" for i in range(len(df_optimism.columns))]

df_optimism_dif = df_optimism.diff()

In [11]:
df_optimism_bool = df_optimism.mask(df_optimism_dif >= 0, 0)
df_optimism_bool = df_optimism_bool.mask(df_optimism_dif < 0, 1)
df_optimism_bool = df_optimism_bool[1:]
df_base_reg2 = mw_volatility_5.copy()
df_optimism_reg_2 = df_base_reg2.join(df_optimism_bool)
cols_optimism = [col for col in df_optimism_reg_2.columns if 'optimism_' in col]
df_optimism_reg_2[cols_optimism] = df_optimism_reg_2[cols_optimism].fillna(0)

df_base_reg3 = mw_log_volatility_5.copy()
df_optimism_reg_3 = df_base_reg3.join(df_optimism)
cols_optimism = [col for col in df_optimism_reg_3.columns if 'optimism_' in col]
df_optimism_reg_3[cols_optimism] = df_optimism_reg_3[cols_optimism].fillna(0)

In [12]:
# REGRESSION 2
results_reg2 = []
possible_y = vertix_names
all_x_reg2 = df_optimism_bool.columns.tolist() + ["dummy_minute_pub_date"]
X_reg2 = df_optimism_reg_2[all_x_reg2]

scaler_reg2 = StandardScaler()
X_reg2_scaled = scaler_reg2.fit_transform(X_reg2)

X_reg2_scaled = pd.DataFrame(X_reg2_scaled, index=X_reg2.index, columns=X_reg2.columns)

for maturity in possible_y:
    Y = df_optimism_reg_2[maturity]

    lasso_cv_model = LassoCV(cv=10, random_state=100, max_iter=10000)
    lasso_cv_model.fit(X_reg2_scaled, Y)

    selected_coeffs_mask = lasso_cv_model.coef_ != 0
    selected_variables_names = X_reg2.columns[selected_coeffs_mask].tolist()

    if selected_variables_names:
        X_selected_scaled = X_reg2_scaled[selected_variables_names]
        X_selected_sm = sm.add_constant(X_selected_scaled)

        ols_final_model = sm.OLS(Y, X_selected_sm).fit()

        ols_params = pd.Series(ols_final_model.params[1:], index=selected_variables_names)
        ols_std = pd.Series(ols_final_model.bse[1:], index=selected_variables_names)
        ols_pvalues = pd.Series(ols_final_model.pvalues[1:], index=selected_variables_names)

        results_reg2.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': ols_params,
            'Std_OLS_Final': ols_std,
            'Pvalues_OLS_Final': ols_pvalues,
            'Adj_R_Sqrd_OLS_Final': ols_final_model.rsquared_adj
        })
    else:
        results_reg2.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': pd.Series(dtype=float),
            'Std_OLS_Final': pd.Series(dtype=float),
            'Pvalues_OLS_Final': pd.Series(dtype=float),
            'Adj_R_Sqrd_OLS_Final': 0
        })

In [13]:
for result in results_reg2:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print(f"Best alpha (λ) found by cross-validation: {result['Selected_Alpha']:.6f}")
    print(f"Adjusted R squared of the final OLS model: {result['Adj_R_Sqrd_OLS_Final']:.4f}")
            
    # --- Final OLS Model Results ---
    if not result['Coefs_OLS_Final'].empty:
        print("\n  Final OLS Results:")
        for var in result['Coefs_OLS_Final'].index:
            coef = result['Coefs_OLS_Final'][var]
            std_err = result['Std_OLS_Final'][var]
            pval = result['Pvalues_OLS_Final'][var]
            print(f"    - {var}: Coef = {coef:.6f}, Std. Err = {std_err:.6f}, P-value = {pval:.4f}")
    else:
        print("No variables selected.")
    print("-" * 60)

#### Results for Maturity (Vertex): v_21
Best alpha (λ) found by cross-validation: 0.000399
Adjusted R squared of the final OLS model: 0.0041

  Final OLS Results:
    - optimism_0: Coef = -0.000962, Std. Err = 0.000751, P-value = 0.2002
    - optimism_3: Coef = 0.001975, Std. Err = 0.000785, P-value = 0.0119
    - optimism_4: Coef = -0.001704, Std. Err = 0.000750, P-value = 0.0231
    - optimism_5: Coef = 0.000920, Std. Err = 0.000820, P-value = 0.2618
    - dummy_minute_pub_date: Coef = 0.001460, Std. Err = 0.001066, P-value = 0.1710
------------------------------------------------------------
#### Results for Maturity (Vertex): v_63
Best alpha (λ) found by cross-validation: 0.001514
Adjusted R squared of the final OLS model: 0.0019

  Final OLS Results:
    - optimism_3: Coef = 0.002675, Std. Err = 0.001106, P-value = 0.0156
    - optimism_5: Coef = 0.001226, Std. Err = 0.001106, P-value = 0.2678
------------------------------------------------------------
#### Results for Maturity 

In [14]:
# REGRESSION 3
results_reg3 = []

all_x_reg3 = df_optimism.columns.tolist() + ["dummy_minute_pub_date"]
X_reg3 = df_optimism_reg_3[all_x_reg3]

scaler_reg3 = StandardScaler()
X_reg3_scaled = scaler_reg3.fit_transform(X_reg3)
X_reg3_scaled = pd.DataFrame(X_reg3_scaled, index=X_reg3.index, columns=X_reg3.columns)

for maturity in possible_y:
    Y = df_optimism_reg_3[maturity]

    lasso_cv_model = LassoCV(cv=10, random_state=100, max_iter=10000)
    lasso_cv_model.fit(X_reg3_scaled, Y)

    selected_coeffs_mask = lasso_cv_model.coef_ != 0
    selected_variables_names = X_reg3.columns[selected_coeffs_mask].tolist()

    if selected_variables_names:
        X_selected_scaled = X_reg3_scaled[selected_variables_names]
        X_selected_sm = sm.add_constant(X_selected_scaled)

        ols_final_model = sm.OLS(Y, X_selected_sm).fit()

        ols_params = pd.Series(ols_final_model.params[1:], index=selected_variables_names)
        ols_std = pd.Series(ols_final_model.bse[1:], index=selected_variables_names)
        ols_pvalues = pd.Series(ols_final_model.pvalues[1:], index=selected_variables_names)

        results_reg3.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': ols_params,
            'Std_OLS_Final': ols_std,
            'Pvalues_OLS_Final': ols_pvalues,
            'Adj_R_Sqrd_OLS_Final': ols_final_model.rsquared_adj
        })
    else:
        results_reg3.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': pd.Series(dtype=float),
            'Std_OLS_Final': pd.Series(dtype=float),
            'Pvalues_OLS_Final': pd.Series(dtype=float),
            'Adj_R_Sqrd_OLS_Final': 0
        })

In [15]:
for result in results_reg3:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print(f"Best alpha (λ) found by cross-validation: {result['Selected_Alpha']:.6f}")
    print(f"Adjusted R squared of the final OLS model: {result['Adj_R_Sqrd_OLS_Final']:.4f}")
            
    # --- Final OLS Model Results ---
    if not result['Coefs_OLS_Final'].empty:
        print("\n  Final OLS Results:")
        for var in result['Coefs_OLS_Final'].index:
            coef = result['Coefs_OLS_Final'][var]
            std_err = result['Std_OLS_Final'][var]
            pval = result['Pvalues_OLS_Final'][var]
            print(f"    - {var}: Coef = {coef:.6f}, Std. Err = {std_err:.6f}, P-value = {pval:.4f}")
    else:
        print("No variables selected.")
    print("-" * 60)

#### Results for Maturity (Vertex): v_21
Best alpha (λ) found by cross-validation: 0.002383
Adjusted R squared of the final OLS model: 0.0027

  Final OLS Results:
    - optimism_0: Coef = 0.031716, Std. Err = 0.017683, P-value = 0.0729
    - optimism_1: Coef = 0.006579, Std. Err = 0.014310, P-value = 0.6457
    - optimism_2: Coef = 0.029334, Std. Err = 0.016435, P-value = 0.0743
    - optimism_3: Coef = -0.011991, Std. Err = 0.013801, P-value = 0.3850
    - optimism_4: Coef = -0.009511, Std. Err = 0.013335, P-value = 0.4757
    - optimism_5: Coef = -0.011752, Std. Err = 0.014434, P-value = 0.4156
    - dummy_minute_pub_date: Coef = 0.075715, Std. Err = 0.017353, P-value = 0.0000
------------------------------------------------------------
#### Results for Maturity (Vertex): v_63
Best alpha (λ) found by cross-validation: 0.019887
Adjusted R squared of the final OLS model: 0.0013

  Final OLS Results:
    - dummy_minute_pub_date: Coef = 0.034753, Std. Err = 0.012250, P-value = 0.0046
--

#### Regressions on optimism index (4 e 5)

In [16]:
df_uncertainty = pd.read_excel("./data/processed/index_uncertainty.xlsx", index_col=0)
df_uncertainty = df_uncertainty.drop(columns=['minute'])
df_uncertainty.columns = [f"uncertainty_{i}" for i in range(len(df_uncertainty.columns))]

df_uncertainty_dif = df_uncertainty.diff()

In [17]:
df_uncertainty_bool = df_uncertainty.mask(df_uncertainty_dif >= 0, 0)
df_uncertainty_bool = df_uncertainty_bool.mask(df_uncertainty_dif < 0, 1)
df_uncertainty_bool = df_uncertainty_bool[1:]
df_base_reg4 = mw_volatility_5.copy()
df_uncertainty_reg_4 = df_base_reg4.join(df_uncertainty_bool)
cols_uncertainty = [col for col in df_uncertainty_reg_4.columns if 'uncertainty_' in col]
df_uncertainty_reg_4[cols_uncertainty] = df_uncertainty_reg_4[cols_uncertainty].fillna(0)

df_base_reg5 = mw_log_volatility_5.copy()
df_uncertainty_reg_5 = df_base_reg3.join(df_uncertainty)
cols_uncertainty = [col for col in df_uncertainty_reg_5.columns if 'uncertainty_' in col]
df_uncertainty_reg_5[cols_uncertainty] = df_uncertainty_reg_5[cols_uncertainty].fillna(0)

In [18]:
# REGRESSION 4
results_reg4 = []
possible_y = vertix_names
all_x_reg4 = df_uncertainty_bool.columns.tolist() + ["dummy_minute_pub_date"]
X_reg4 = df_uncertainty_reg_4[all_x_reg4]

scaler_reg4 = StandardScaler()
X_reg4_scaled = scaler_reg4.fit_transform(X_reg4)

X_reg4_scaled = pd.DataFrame(X_reg4_scaled, index=X_reg4.index, columns=X_reg4.columns)

for maturity in possible_y:
    Y = df_uncertainty_reg_4[maturity]

    lasso_cv_model = LassoCV(cv=10, random_state=100, max_iter=10000)
    lasso_cv_model.fit(X_reg4_scaled, Y)

    selected_coeffs_mask = lasso_cv_model.coef_ != 0
    selected_variables_names = X_reg4.columns[selected_coeffs_mask].tolist()

    if selected_variables_names:
        X_selected_scaled = X_reg4_scaled[selected_variables_names]
        X_selected_sm = sm.add_constant(X_selected_scaled)

        ols_final_model = sm.OLS(Y, X_selected_sm).fit()

        ols_params = pd.Series(ols_final_model.params[1:], index=selected_variables_names)
        ols_std = pd.Series(ols_final_model.bse[1:], index=selected_variables_names)
        ols_pvalues = pd.Series(ols_final_model.pvalues[1:], index=selected_variables_names)

        results_reg4.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': ols_params,
            'Std_OLS_Final': ols_std,
            'Pvalues_OLS_Final': ols_pvalues,
            'Adj_R_Sqrd_OLS_Final': ols_final_model.rsquared_adj
        })
    else:
        results_reg4.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': pd.Series(dtype=float),
            'Std_OLS_Final': pd.Series(dtype=float),
            'Pvalues_OLS_Final': pd.Series(dtype=float),
            'Adj_R_Sqrd_OLS_Final': 0
        })


In [19]:
for result in results_reg4:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print(f"Best alpha (λ) found by cross-validation: {result['Selected_Alpha']:.6f}")
    print(f"Adjusted R squared of the final OLS model: {result['Adj_R_Sqrd_OLS_Final']:.4f}")
            
    # --- Final OLS Model Results ---
    if not result['Coefs_OLS_Final'].empty:
        print("\n  Final OLS Results:")
        for var in result['Coefs_OLS_Final'].index:
            coef = result['Coefs_OLS_Final'][var]
            std_err = result['Std_OLS_Final'][var]
            pval = result['Pvalues_OLS_Final'][var]
            print(f"    - {var}: Coef = {coef:.6f}, Std. Err = {std_err:.6f}, P-value = {pval:.4f}")
    else:
        print("No variables selected.")
    print("-" * 60)

#### Results for Maturity (Vertex): v_21
Best alpha (λ) found by cross-validation: 0.000354
Adjusted R squared of the final OLS model: 0.0030

  Final OLS Results:
    - uncertainty_0: Coef = -0.000435, Std. Err = 0.000758, P-value = 0.5663
    - uncertainty_1: Coef = 0.001187, Std. Err = 0.000838, P-value = 0.1568
    - uncertainty_2: Coef = -0.000582, Std. Err = 0.000744, P-value = 0.4338
    - uncertainty_4: Coef = -0.002007, Std. Err = 0.000840, P-value = 0.0169
    - uncertainty_5: Coef = 0.000216, Std. Err = 0.000854, P-value = 0.8003
    - dummy_minute_pub_date: Coef = 0.002522, Std. Err = 0.001108, P-value = 0.0228
------------------------------------------------------------
#### Results for Maturity (Vertex): v_63
Best alpha (λ) found by cross-validation: 0.002358
Adjusted R squared of the final OLS model: 0.0000
No variables selected.
------------------------------------------------------------
#### Results for Maturity (Vertex): v_126
Best alpha (λ) found by cross-validation

In [20]:
# REGRESSION 5
results_reg5 = []

all_x_reg5 = df_uncertainty.columns.tolist() + ["dummy_minute_pub_date"]
X_reg5 = df_uncertainty_reg_5[all_x_reg5]

scaler_reg5 = StandardScaler()
X_reg5_scaled = scaler_reg5.fit_transform(X_reg5)
X_reg5_scaled = pd.DataFrame(X_reg5_scaled, index=X_reg5.index, columns=X_reg5.columns)

for maturity in possible_y:
    Y = df_uncertainty_reg_5[maturity]

    lasso_cv_model = LassoCV(cv=10, random_state=100, max_iter=10000)
    lasso_cv_model.fit(X_reg5_scaled, Y)

    selected_coeffs_mask = lasso_cv_model.coef_ != 0
    selected_variables_names = X_reg5.columns[selected_coeffs_mask].tolist()

    if selected_variables_names:
        X_selected_scaled = X_reg5_scaled[selected_variables_names]
        X_selected_sm = sm.add_constant(X_selected_scaled)

        ols_final_model = sm.OLS(Y, X_selected_sm).fit()

        ols_params = pd.Series(ols_final_model.params[1:], index=selected_variables_names)
        ols_std = pd.Series(ols_final_model.bse[1:], index=selected_variables_names)
        ols_pvalues = pd.Series(ols_final_model.pvalues[1:], index=selected_variables_names)

        results_reg5.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': ols_params,
            'Std_OLS_Final': ols_std,
            'Pvalues_OLS_Final': ols_pvalues,
            'Adj_R_Sqrd_OLS_Final': ols_final_model.rsquared_adj
        })
    else:
        results_reg5.append({
            'Vertix': maturity,
            'Selected_Alpha': lasso_cv_model.alpha_,
            'Coefs_OLS_Final': pd.Series(dtype=float),
            'Std_OLS_Final': pd.Series(dtype=float),
            'Pvalues_OLS_Final': pd.Series(dtype=float),
            'Adj_R_Sqrd_OLS_Final': 0
        })

In [21]:
for result in results_reg5:
    print(f"#### Results for Maturity (Vertex): {result['Vertix']}")
    print(f"Best alpha (λ) found by cross-validation: {result['Selected_Alpha']:.6f}")
    print(f"Adjusted R squared of the final OLS model: {result['Adj_R_Sqrd_OLS_Final']:.4f}")
            
    # --- Final OLS Model Results ---
    if not result['Coefs_OLS_Final'].empty:
        print("\n  Final OLS Results:")
        for var in result['Coefs_OLS_Final'].index:
            coef = result['Coefs_OLS_Final'][var]
            std_err = result['Std_OLS_Final'][var]
            pval = result['Pvalues_OLS_Final'][var]
            print(f"    - {var}: Coef = {coef:.6f}, Std. Err = {std_err:.6f}, P-value = {pval:.4f}")
    else:
        print("No variables selected.")
    print("-" * 60)

#### Results for Maturity (Vertex): v_21
Best alpha (λ) found by cross-validation: 0.001270
Adjusted R squared of the final OLS model: 0.0089

  Final OLS Results:
    - uncertainty_1: Coef = -0.029833, Std. Err = 0.019913, P-value = 0.1342
    - uncertainty_2: Coef = -0.020764, Std. Err = 0.018663, P-value = 0.2659
    - uncertainty_3: Coef = -0.020126, Std. Err = 0.034213, P-value = 0.5564
    - uncertainty_4: Coef = 0.009525, Std. Err = 0.019884, P-value = 0.6319
    - uncertainty_5: Coef = 0.181023, Std. Err = 0.031064, P-value = 0.0000
    - dummy_minute_pub_date: Coef = -0.072410, Std. Err = 0.032948, P-value = 0.0280
------------------------------------------------------------
#### Results for Maturity (Vertex): v_63
Best alpha (λ) found by cross-validation: 0.002536
Adjusted R squared of the final OLS model: 0.0073

  Final OLS Results:
    - uncertainty_1: Coef = -0.056553, Std. Err = 0.018411, P-value = 0.0021
    - uncertainty_2: Coef = -0.010803, Std. Err = 0.017348, P-valu