In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

In [2]:
futures_ir = pd.read_excel("./data/future_interest_rates/future_interest_rates.xlsx", index_col="Date")
futures_ir.index = pd.to_datetime(futures_ir.index)
futures_ir.sort_index(inplace=True)

futures_ir_returns = futures_ir.pct_change().dropna()

In [3]:
minutes_info = pd.read_excel("./data/raw/minutes_info.xlsx")
minutes_info = minutes_info[minutes_info["DataReferencia"] >= "2003-06-17"]
minutes_info = minutes_info.sort_values("DataReferencia", ascending=True)

dates_ref = minutes_info["DataReferencia"]
detes_pub = minutes_info["DataPublicacao"]

In [4]:
returns_pub_date = futures_ir_returns[futures_ir_returns.index.isin(detes_pub)]

#### Regression on optimism index

In [5]:
df_optimism = pd.read_excel("./data/processed/index_optimism.xlsx", index_col=0)
df_optimism = df_optimism.drop(columns=['filename'])
df_optimism.columns = [f"optimism_{i}" for i in range(len(df_optimism.columns))]

In [6]:
regression_data = pd.concat([returns_pub_date, df_optimism], axis=1)
regression_data.dropna(inplace=True)

possible_y = returns_pub_date.columns.tolist()
all_x = df_optimism.columns.tolist()

In [7]:
results_list = []

initial_X = regression_data[all_x]

for maturity in possible_y:
    Y = regression_data[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vencimento (d.u.)': maturity,
            'Coeficiente (β)': model.params[X.columns],
            'P-valor': model.pvalues[X.columns],
            'R² Ajustado': model.rsquared_adj
        })

#### Regression on optimism index

In [8]:
df_uncertainty = pd.read_excel("./data/processed/index_uncertainty.xlsx", index_col=0)
df_uncertainty = df_uncertainty.drop(columns=['filename'])
df_uncertainty.columns = [f"uncertainty_{i}" for i in range(len(df_uncertainty.columns))]

In [9]:
regression_data = pd.concat([returns_pub_date, df_uncertainty], axis=1)
regression_data.dropna(inplace=True)

possible_y = returns_pub_date.columns.tolist()
all_x = df_uncertainty.columns.tolist()

In [10]:
results_list = []

initial_X = regression_data[all_x]

for maturity in possible_y:
    Y = regression_data[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vencimento (d.u.)': maturity,
            'Coeficiente (β)': model.params[X.columns],
            'P-valor': model.pvalues[X.columns],
            'R² Ajustado': model.rsquared_adj
        })