In [136]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [137]:
futures_ir = pd.read_excel("./data/future_interest_rates/future_interest_rates.xlsx", index_col="Date")
futures_ir.index = pd.to_datetime(futures_ir.index)
futures_ir.sort_index(inplace=True)

futures_ir_returns = futures_ir.pct_change().dropna()

In [138]:
minutes_info = pd.read_excel("./data/raw/minutes_info.xlsx")
minutes_info = minutes_info[minutes_info["DataReferencia"] >= "2003-06-17"]
minutes_info = minutes_info.sort_values("DataReferencia", ascending=True)

dates_ref = minutes_info["DataReferencia"]
detes_pub = minutes_info["DataPublicacao"]

In [139]:
returns_pub_date = futures_ir_returns[futures_ir_returns.index.isin(detes_pub)]

#### MA Volatility (5 bd)

In [140]:
def calc_vol(retornos: pd.Series) -> float:
    vol = retornos.std() * np.sqrt(252)
    return vol


mw_volatility_5 = futures_ir_returns.rolling(5).apply(calc_vol)
mw_volatility_5 = mw_volatility_5.dropna()

mw_log_volatility_5 = np.log(mw_volatility_5)


In [141]:
mw_volatility_5["dummy_minute_pub_date"] = 0
mw_volatility_5.loc[mw_volatility_5.index.isin(detes_pub), "dummy_minute_pub_date"] = 1

mw_log_volatility_5["dummy_minute_pub_date"] = 0
mw_log_volatility_5.loc[mw_log_volatility_5.index.isin(detes_pub), "dummy_minute_pub_date"] = 1

#### Regression on optimism index

In [142]:
df_optimism = pd.read_excel("./data/processed/index_optimism.xlsx", index_col=0)
df_optimism = df_optimism.drop(columns=['filename'])
df_optimism.columns = [f"optimism_{i}" for i in range(len(df_optimism.columns))]

df_optimism_dif = df_optimism.diff()

In [143]:
df_optimism_reg_1 = mw_volatility_5.copy()

df_optimism_bool = df_optimism.mask(df_optimism_dif >= 0, 0)
df_optimism_bool = df_optimism_bool.mask(df_optimism_dif < 0, 1)
df_optimism_bool = df_optimism_bool[1:]
df_optimism_reg_2 = pd.concat([mw_volatility_5, df_optimism_bool], axis=1)
df_optimism_reg_2 = df_optimism_reg_2.dropna()

df_optimism_reg_3 = pd.concat([mw_log_volatility_5, df_optimism], axis=1)
df_optimism_reg_3 = df_optimism_reg_3.dropna()

df_opt_x_bool = df_optimism_bool.multiply(df_optimism)
df_optimism_reg_4 = pd.concat([mw_log_volatility_5, df_opt_x_bool], axis=1) #DIFERENTE
df_optimism_reg_4 = df_optimism_reg_4.dropna()


In [144]:
# REG 1

results_list = []
possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_optimism_reg_1[maturity]

    X = df_optimism_reg_1["dummy_minute_pub_date"]
    X = sm.add_constant(X)
    
    model = sm.OLS(Y, X).fit()

    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })

"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

In [145]:
# REG 2

results_list = []

initial_X = df_optimism_reg_2[df_optimism.columns]
initial_X["dummy_minute_pub_date"] = df_optimism_reg_2["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_optimism_reg_2[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_optimism_reg_2["dummy_minute_pub_date"]


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

In [146]:
# REG 3

results_list = []

initial_X = df_optimism_reg_3[df_optimism.columns]
initial_X["dummy_minute_pub_date"] = df_optimism_reg_3["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_optimism_reg_3[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_optimism_reg_3["dummy_minute_pub_date"]


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

In [147]:
# REG 4

results_list = []

initial_X = df_optimism_reg_4[df_optimism.columns]
initial_X["dummy_minute_pub_date"] = df_optimism_reg_4["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()
for maturity in possible_y:
    Y = df_optimism_reg_4[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_optimism_reg_4["dummy_minute_pub_date"]


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

#### Regression on uncertainty index

In [148]:
df_uncertainty = pd.read_excel("./data/processed/index_uncertainty.xlsx", index_col=0)
df_uncertainty = df_uncertainty.drop(columns=['filename'])
df_uncertainty.columns = [f"optimism_{i}" for i in range(len(df_uncertainty.columns))]

df_uncertainty_dif = df_uncertainty.diff()

In [149]:
df_uncertainty_reg_1 = mw_volatility_5.copy()

df_uncertainty_bool = df_uncertainty.mask(df_uncertainty_dif >= 0, 0)
df_uncertainty_bool = df_uncertainty_bool.mask(df_uncertainty_dif < 0, 1)
df_uncertainty_bool = df_uncertainty_bool[1:]
df_uncertainty_reg_2 = pd.concat([mw_volatility_5, df_uncertainty_bool], axis=1)
df_uncertainty_reg_2 = df_uncertainty_reg_2.dropna()

df_uncertainty_reg_3 = pd.concat([mw_log_volatility_5, df_uncertainty], axis=1)
df_uncertainty_reg_3 = df_uncertainty_reg_3.dropna()

df_opt_x_bool = df_uncertainty_bool.multiply(df_uncertainty)
df_uncertainty_reg_4 = pd.concat([mw_log_volatility_5, df_opt_x_bool], axis=1) #DIFERENTE
df_uncertainty_reg_4 = df_uncertainty_reg_4.dropna()


In [150]:
# REG 1

results_list = []
possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_uncertainty_reg_1[maturity]

    X = df_uncertainty_reg_1["dummy_minute_pub_date"]
    X = sm.add_constant(X)
    
    model = sm.OLS(Y, X).fit()

    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })

"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

In [151]:
# REG 2

results_list = []

initial_X = df_uncertainty_reg_2[df_uncertainty.columns]
initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_2["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_uncertainty_reg_2[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })

"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_2["dummy_minute_pub_date"]


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

In [152]:
# REG 3
# GREAT RESULTS HERE

results_list = []

initial_X = df_uncertainty_reg_3[df_uncertainty.columns]
initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_3["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_uncertainty_reg_3[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_3["dummy_minute_pub_date"]


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'

In [None]:
# REG 4

results_list = []

initial_X = df_uncertainty_reg_4[df_uncertainty.columns]
initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_4["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()
for maturity in possible_y:
    Y = df_uncertainty_reg_4[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


"""
for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")
"""

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_4["dummy_minute_pub_date"]


'\nfor i in results_list:\n    print(i["Betas"])\n    print("\n")\n    print(i["P-values"])\n    print("\n")\n    print(i["R_sqrd_adj"])\n    print("\n\n")\n    print("#####################################")\n'