In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [2]:
futures_ir = pd.read_excel("./data/future_interest_rates/future_interest_rates.xlsx", index_col="Date")
futures_ir.index = pd.to_datetime(futures_ir.index)
futures_ir.sort_index(inplace=True)

futures_ir_returns = futures_ir.pct_change().dropna()

In [3]:
minutes_info = pd.read_excel("./data/raw/minutes_info.xlsx")
minutes_info = minutes_info[minutes_info["DataReferencia"] >= "2003-06-17"]
minutes_info = minutes_info.sort_values("DataReferencia", ascending=True)

dates_ref = minutes_info["DataReferencia"]
detes_pub = minutes_info["DataPublicacao"]

In [4]:
returns_pub_date = futures_ir_returns[futures_ir_returns.index.isin(detes_pub)]

#### MA Volatility (5 bd)

In [5]:
def calc_vol(retornos: pd.Series) -> float:
    vol = retornos.std() * np.sqrt(252)
    return vol


mw_volatility_5 = futures_ir_returns.rolling(5).apply(calc_vol)
mw_volatility_5 = mw_volatility_5.dropna()

mw_log_volatility_5 = np.log(mw_volatility_5)


In [6]:
mw_volatility_5["dummy_minute_pub_date"] = 0
mw_volatility_5.loc[mw_volatility_5.index.isin(detes_pub), "dummy_minute_pub_date"] = 1

mw_log_volatility_5["dummy_minute_pub_date"] = 0
mw_log_volatility_5.loc[mw_log_volatility_5.index.isin(detes_pub), "dummy_minute_pub_date"] = 1

#### Regression on optimism index

In [7]:
df_optimism = pd.read_excel("./data/processed/index_optimism.xlsx", index_col=0)
df_optimism = df_optimism.drop(columns=['filename'])
df_optimism.columns = [f"optimism_{i}" for i in range(len(df_optimism.columns))]

df_optimism_dif = df_optimism.diff()

In [8]:
df_optimism_reg_1 = mw_volatility_5.copy()

df_optimism_bool = df_optimism.mask(df_optimism_dif >= 0, 0)
df_optimism_bool = df_optimism_bool.mask(df_optimism_dif < 0, 1)
df_optimism_bool = df_optimism_bool[1:]
df_optimism_reg_2 = pd.concat([mw_volatility_5, df_optimism_bool], axis=1)
df_optimism_reg_2 = df_optimism_reg_2.dropna()

df_optimism_reg_3 = pd.concat([mw_log_volatility_5, df_optimism], axis=1)
df_optimism_reg_3 = df_optimism_reg_3.dropna()

df_opt_x_bool = df_optimism_bool.multiply(df_optimism)
df_optimism_reg_4 = pd.concat([mw_log_volatility_5, df_opt_x_bool], axis=1) #DIFERENTE
df_optimism_reg_4 = df_optimism_reg_4.dropna()


In [9]:
# REG 1

results_list = []
possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_optimism_reg_1[maturity]

    X = df_optimism_reg_1["dummy_minute_pub_date"]
    X = sm.add_constant(X)
    
    model = sm.OLS(Y, X).fit()

    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")


const                    0.027774
dummy_minute_pub_date    0.010449
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.001691
dtype: float64


0.0015951776161778897



#####################################
const                    0.047433
dummy_minute_pub_date    0.010899
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.043195
dtype: float64


0.0005563776356660721



#####################################
const                    0.079308
dummy_minute_pub_date    0.017865
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.023203
dtype: float64


0.0007482883762447434



#####################################
const                    0.120733
dummy_minute_pub_date    0.018545
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.075552
dtype: float64


0.0003889041450746422



#####################################
const                    0.154950
dummy_minute_pub_date    0.019478


In [10]:
# REG 2

results_list = []

initial_X = df_optimism_reg_2[df_optimism.columns]
initial_X["dummy_minute_pub_date"] = df_optimism_reg_2["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_optimism_reg_2[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })



for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")


const         0.051613
optimism_1   -0.018656
optimism_6   -0.021731
dtype: float64


const         6.903284e-13
optimism_1    5.151961e-02
optimism_6    4.041549e-02
dtype: float64


0.032720775110091194



#####################################
const         0.066281
optimism_6   -0.029051
dtype: float64


const         8.379519e-15
optimism_6    5.531887e-02
dtype: float64


0.014331770397926435



#####################################
const         0.120686
optimism_5   -0.035469
optimism_6   -0.037586
dtype: float64


const         4.080790e-20
optimism_5    5.378506e-02
optimism_6    6.009739e-02
dtype: float64


0.036883820837596115



#####################################
const         0.175071
optimism_5   -0.058245
optimism_7   -0.043866
dtype: float64


const         2.236733e-27
optimism_5    4.283548e-03
optimism_7    3.705145e-02
dtype: float64


0.059971527562936444



#####################################
const         0.214573
optimism_5   -0.062109
optimism_7   -0.0524

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_optimism_reg_2["dummy_minute_pub_date"]


In [11]:
# REG 3

results_list = []

initial_X = df_optimism_reg_3[df_optimism.columns]
initial_X["dummy_minute_pub_date"] = df_optimism_reg_3["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_optimism_reg_3[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })



for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")



const        -3.542844
optimism_2   -9.092319
dtype: float64


const         9.275032e-82
optimism_2    7.605184e-06
dtype: float64


0.09754685634587679



#####################################
const        -3.100641
optimism_2   -6.009869
dtype: float64


const         2.238498e-81
optimism_2    6.711870e-04
dtype: float64


0.055401050420678



#####################################
const        -2.445574
optimism_2   -6.979859
dtype: float64


const         1.349228e-70
optimism_2    2.278317e-05
dtype: float64


0.08730528328700893



#####################################
const        -1.956972
optimism_2   -7.930460
dtype: float64


const         3.721540e-64
optimism_2    8.091625e-08
dtype: float64


0.13912156383490226



#####################################
const        -1.756111
optimism_0   -4.371397
optimism_2   -7.354123
optimism_7   -8.956278
dtype: float64


const         1.376801e-68
optimism_0    5.788541e-02
optimism_2    4.029277e-07
optimism_7    2.582023e-02
dtype

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_optimism_reg_3["dummy_minute_pub_date"]


In [12]:
# REG 4

results_list = []

initial_X = df_optimism_reg_4[df_optimism.columns]
initial_X["dummy_minute_pub_date"] = df_optimism_reg_4["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()
for maturity in possible_y:
    Y = df_optimism_reg_4[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })



for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")



const         -3.819581
optimism_7    14.126117
dtype: float64


const         3.095666e-103
optimism_7     5.272086e-02
dtype: float64


0.014758335620363772



#####################################
const         -3.380146
optimism_3   -23.728233
dtype: float64


const         1.555290e-111
optimism_3     6.290662e-03
dtype: float64


0.03427934167167479



#####################################
const         -2.690788
optimism_3   -15.679154
optimism_7    10.753679
dtype: float64


const         4.689568e-90
optimism_3    5.633056e-02
optimism_7    6.842478e-02
dtype: float64


0.03203418176553818



#####################################
const         -2.228431
optimism_0    -9.867275
optimism_7    12.072357
dtype: float64


const         2.441357e-85
optimism_0    2.654051e-02
optimism_7    2.276194e-02
dtype: float64


0.044384106348338004



#####################################
const         -2.025988
optimism_0    -9.633993
optimism_3   -11.123409
dtype: float64


const         3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_optimism_reg_4["dummy_minute_pub_date"]


#### Regression on uncertainty index

In [13]:
df_uncertainty = pd.read_excel("./data/processed/index_uncertainty.xlsx", index_col=0)
df_uncertainty = df_uncertainty.drop(columns=['filename'])
df_uncertainty.columns = [f"optimism_{i}" for i in range(len(df_uncertainty.columns))]

df_uncertainty_dif = df_uncertainty.diff()

In [14]:
df_uncertainty_reg_1 = mw_volatility_5.copy()

df_uncertainty_bool = df_uncertainty.mask(df_uncertainty_dif >= 0, 0)
df_uncertainty_bool = df_uncertainty_bool.mask(df_uncertainty_dif < 0, 1)
df_uncertainty_bool = df_uncertainty_bool[1:]
df_uncertainty_reg_2 = pd.concat([mw_volatility_5, df_uncertainty_bool], axis=1)
df_uncertainty_reg_2 = df_uncertainty_reg_2.dropna()

df_uncertainty_reg_3 = pd.concat([mw_log_volatility_5, df_uncertainty], axis=1)
df_uncertainty_reg_3 = df_uncertainty_reg_3.dropna()

df_opt_x_bool = df_uncertainty_bool.multiply(df_uncertainty)
df_uncertainty_reg_4 = pd.concat([mw_log_volatility_5, df_opt_x_bool], axis=1) #DIFERENTE
df_uncertainty_reg_4 = df_uncertainty_reg_4.dropna()


In [15]:
# REG 1

results_list = []
possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_uncertainty_reg_1[maturity]

    X = df_uncertainty_reg_1["dummy_minute_pub_date"]
    X = sm.add_constant(X)
    
    model = sm.OLS(Y, X).fit()

    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")



const                    0.027774
dummy_minute_pub_date    0.010449
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.001691
dtype: float64


0.0015951776161778897



#####################################
const                    0.047433
dummy_minute_pub_date    0.010899
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.043195
dtype: float64


0.0005563776356660721



#####################################
const                    0.079308
dummy_minute_pub_date    0.017865
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.023203
dtype: float64


0.0007482883762447434



#####################################
const                    0.120733
dummy_minute_pub_date    0.018545
dtype: float64


const                    0.000000
dummy_minute_pub_date    0.075552
dtype: float64


0.0003889041450746422



#####################################
const                    0.154950
dummy_minute_pub_date    0.019478


In [16]:
# REG 2

results_list = []

initial_X = df_uncertainty_reg_2[df_uncertainty.columns]
initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_2["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_uncertainty_reg_2[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")



const         0.045784
optimism_7   -0.026831
dtype: float64


const         2.882500e-14
optimism_7    1.035197e-02
dtype: float64


0.029625537515316935



#####################################
const         0.067536
optimism_7   -0.031805
dtype: float64


const         4.924320e-15
optimism_7    3.254628e-02
dtype: float64


0.019086689389539502



#####################################
const         0.109911
optimism_7   -0.043965
dtype: float64


const         6.158608e-21
optimism_7    2.375672e-02
dtype: float64


0.021952837610450415



#####################################
const         0.155406
optimism_7   -0.055778
dtype: float64


const         7.940155e-29
optimism_7    1.125938e-02
dtype: float64


0.02884376321001958



#####################################
const         0.191212
optimism_6   -0.057499
dtype: float64


const         1.628649e-23
optimism_6    6.502323e-02
dtype: float64


0.012906399063145835



#####################################


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_2["dummy_minute_pub_date"]


In [17]:
# REG 3
# GREAT RESULTS HERE

results_list = []

initial_X = df_uncertainty_reg_3[df_uncertainty.columns]
initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_3["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()

for maturity in possible_y:
    Y = df_uncertainty_reg_3[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")


const         -4.402118
optimism_3    20.614162
optimism_7   -11.536074
dtype: float64


const         1.205157e-44
optimism_3    1.956935e-04
optimism_7    4.700684e-02
dtype: float64


0.157409787699908



#####################################
const         -3.876740
optimism_3    16.965276
dtype: float64


const         7.529282e-65
optimism_3    6.094732e-05
dtype: float64


0.0780671370160273



#####################################
const         -3.554695
optimism_0     8.606649
optimism_3    16.302408
dtype: float64


const         2.546638e-49
optimism_0    4.738710e-02
optimism_3    8.661788e-05
dtype: float64


0.1304602613396313



#####################################
const         -3.146559
optimism_0     8.125176
optimism_3    18.222245
dtype: float64


const         1.323531e-49
optimism_0    3.368592e-02
optimism_3    8.576634e-07
dtype: float64


0.18547571356912296



#####################################
const         -2.790723
optimism_0     7.541898
optimism_3    1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_3["dummy_minute_pub_date"]


In [18]:
# REG 4

results_list = []

initial_X = df_uncertainty_reg_4[df_uncertainty.columns]
initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_4["dummy_minute_pub_date"]

possible_y = returns_pub_date.columns.tolist()
for maturity in possible_y:
    Y = df_uncertainty_reg_4[maturity]
    X = initial_X.copy()

    p_value_threshold = 0.10

    while True:
        X = sm.add_constant(X)
        
        model = sm.OLS(Y, X).fit()

        pvalues = model.pvalues.drop('const')
        if pvalues.empty:
            break

        max_pvalue = pvalues.max()
        if max_pvalue > p_value_threshold:
            variable_to_remove = pvalues.idxmax()
            X = X.drop(variable_to_remove, axis=1)
        else:
            break
        
    if len(X.columns) > 0:
        results_list.append({
            'Vertix': maturity,
            'Betas': model.params[X.columns],
            'P-values': model.pvalues[X.columns],
            'R_sqrd_adj': model.rsquared_adj
        })


for i in results_list:
    print(i["Betas"])
    print("\n")
    print(i["P-values"])
    print("\n")
    print(i["R_sqrd_adj"])
    print("\n\n")
    print("#####################################")


const         -4.039521
optimism_1     7.938323
optimism_3     7.366957
optimism_7   -16.188306
dtype: float64


const         1.279710e-81
optimism_1    3.619927e-02
optimism_3    7.125475e-02
optimism_7    3.387447e-02
dtype: float64


0.054977460181006466



#####################################
const         -3.369218
optimism_1     6.615029
optimism_7   -12.133292
dtype: float64


const         2.000668e-91
optimism_1    4.516376e-02
optimism_7    6.414354e-02
dtype: float64


0.027789388773432067



#####################################
const         -2.723838
optimism_2    29.628488
optimism_3     6.291684
optimism_5   -14.767881
optimism_7   -13.327592
dtype: float64


const         6.735415e-73
optimism_2    5.846922e-02
optimism_3    5.854537e-02
optimism_5    2.963246e-02
optimism_7    3.193925e-02
dtype: float64


0.0567579119770899



#####################################
const         -2.282214
optimism_2    27.177588
optimism_3     6.261380
optimism_5   -10.690222
optimi

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  initial_X["dummy_minute_pub_date"] = df_uncertainty_reg_4["dummy_minute_pub_date"]
