In [179]:
import os
os.chdir("C:\\Users\\Thais\\Documents\\Python\\bcb-sentiment-analysis")

import datetime 

import pandas as pd
import numpy as np
from pandas.tseries.offsets import BDay
import scipy as sc
import statsmodels.api as sm
import pickle

DAILY_DATA = ".\\data\\financial data\\daily.xlsx"
MINUTES_DATA = ".\\data\\minutes\\copom_dates.xlsx"
MINUTES_SCORE = ".\\data\\minutes\\minutes_scores_uncased_20230423.pkl"
TARGET_INFLATION_DATA = ".\\data\\financial data\\historico_meta_inflacao.xlsx"

last_date = datetime.datetime(2023, 4, 11)

# 1 - Processing Data

### 1.1 Calculating Daily Historical Volatility

In [2]:
col_headers = ["21D","42D","63D","84D","105D","126D","147D","168D","189D","210D","231D","252D","273D","294D","504D","756D","1260D","1512D","1764D","2016D","2268D","2520D","2772D"]
daily_di = pd.read_excel(DAILY_DATA, sheet_name='daily_di', skiprows=5).set_index('Dates')
daily_di.columns = col_headers

In [3]:
# compute volatility using Pandas rolling and std methods, the trading days is set to 252 days
TRADING_DAYS = 252
returns = np.log(daily_di/daily_di.shift(1))
returns.fillna(0, inplace=True)
volatility_2D = returns.rolling(window=2).std()*np.sqrt(TRADING_DAYS)
volatility_1W = returns.rolling(window=5).std()*np.sqrt(TRADING_DAYS)
volatility_1M = returns.rolling(window=22).std()*np.sqrt(TRADING_DAYS)
volatility_6M = returns.rolling(window=126).std()*np.sqrt(TRADING_DAYS)
volatility_12M = returns.rolling(window=TRADING_DAYS).std()*np.sqrt(TRADING_DAYS)

### 1.2 - IPCA series

In [9]:
#Diário
ipca_yoy_desv_centro = pd.read_excel(TARGET_INFLATION_DATA, sheet_name='ipca_yoy_ano_corrente').set_index('date')[['desvio_centro_perc']]
ipca_yoy_desv_interv = pd.read_excel(TARGET_INFLATION_DATA, sheet_name='ipca_yoy_ano_corrente').set_index('date')[['desvio_intervalo_perc']]

#Dias que saiu IPCA
ipca_mom_desv_expect = pd.read_excel(TARGET_INFLATION_DATA, sheet_name='ipca_mom_ano_corrente').set_index('date')[['desvio_expectativa_perc']]

#Diário
ipca_hpm_desv_centro = pd.read_excel(TARGET_INFLATION_DATA, sheet_name='ipca_horizonte_PM').set_index('date')[['desvio_centro_perc']]
ipca_hpm_desv_interv = pd.read_excel(TARGET_INFLATION_DATA, sheet_name='ipca_horizonte_PM').set_index('date')[['desvio_intervalo_perc']]


### 1.3 - SELIC Surprise

In [213]:
selic_raw = pd.read_excel(MINUTES_DATA, sheet_name = 'Plan1').set_index('date')

In [194]:
selic_surprise_v = selic_raw[['decision_date','selic_surprise_v']].reset_index().set_index('decision_date').drop(columns = ['date'])
selic_surprise_d = selic_raw[['decision_date','selic_surprise_d']].reset_index().set_index('decision_date').drop(columns = ['date'])
selic_frd = selic_raw[['decision_date','frd_guidance']].reset_index().set_index('decision_date').drop(columns = ['date'])

In [217]:
d_ata = pd.read_excel(MINUTES_DATA, sheet_name = 'd_ata').set_index('date')
d_ato = pd.read_excel(MINUTES_DATA, sheet_name = 'd_ato').set_index('date')

### 1.4 Minutes score

In [18]:
minute_score = pd.read_pickle(MINUTES_SCORE).reset_index().set_index('date')[['score']]

### 1.5 - Indicador de Condições Financeiras

In [24]:
col_headers = ["GSBRFCI Index","BZFCIBBC Index","BZPIIPCM Index","BZPIIPCY Index","BCOIEYOY Index","BCOITYOY Index","BCOIEMOM Index","BCOITMOM Index","BBNCEEX1 Index","BBNCP55 Index","BBNCEXX2 Index"]
fci = pd.read_excel(DAILY_DATA, sheet_name='FCI_CPI', skiprows=6).set_index('Dates')
fci.columns = col_headers
fci_s = fci[['GSBRFCI Index']]
ipca_yoy_desv_centro

# 2 - Joining all in one dataframe

### Independent Variables

In [219]:
ls_df = [fci_s, minute_score, selic_frd, selic_surprise_v,selic_surprise_d, ipca_mom_desv_expect,
        ipca_yoy_desv_interv, ipca_yoy_desv_centro, ipca_hpm_desv_interv, ipca_hpm_desv_centro, d_ata, d_ato]

start_date = datetime.datetime(2001, 11, 9) #inicio de alguma série
end_date = datetime.datetime(2022, 12, 13) #última ata

new_ls_df = []
for df in ls_df:
    df.index.name = 'date'
    temp = df.resample('D').ffill()#.shift(n_days, freq='D')
    
    df_filtered = temp[(temp.index >= start_date) & (temp.index <= end_date)]

    new_ls_df.append(df_filtered)

df_independent = pd.concat(new_ls_df, axis = 1)

df_independent.columns = ['FCI', 'minutes_score', 'frd_guidance','selic_surprise_v', 'selic_surprise_d', 'desvio_expectativa_perc',
                           'desvio_intervalo_perc','desvio_centro_perc', 'desvio_intervalo_hpm_perc', 
                           'desvio_centro_hpm_perc', 'd_ata', 'd_ato']

df_independent = df_independent.fillna(method='ffill')
df_independent = df_independent.fillna(method='bfill')

### Dependent Variable

In [176]:
df_vol = volatility_2D.resample('D').ffill() 
df_dependent = df_vol[(df_vol.index >= start_date) & (df_vol.index <= end_date)]

# 3 - Regressions

## 3.1 - Sem controles

### 3.1.1 - vol_di = NLP + D_decisão + D_ata + Selic Surprise

In [None]:
#Para cada vértice

In [225]:
# define your dependent variable
y = df_dependent['42D']

# define your independent variables
x1 = df_independent['minutes_score']
x2 = df_independent['selic_surprise_v']
x3 = df_independent['d_ato']
x4 = df_independent['d_ata']

# create a matrix of your independent variables
X = sm.add_constant(pd.concat([x1, x2, x3, x4], axis=1))

# create the OLS model and fit it to your data
model = sm.OLS(y, X).fit()

# obtain a summary table in a structured format
summary_table = model.summary2()

# extract the p-values column from the table
p_values = summary_table.tables[1]['P>|t|']

# create a pandas dataframe to store the p-values
p_values_df = pd.DataFrame({'variable': p_values.index, 'p-value': p_values.values})


  x = pd.concat(x[::order], 1)


Unnamed: 0,variable,p-value
0,const,5.125085e-14
1,minutes_score,0.02650842
2,selic_surprise_v,0.04705338
3,d_ato,0.8995746
4,d_ata,0.6858036


In [222]:
p_values_df

Unnamed: 0,variable,p-value
0,const,2.417319e-14
1,minutes_score,0.02592174
2,selic_surprise_v,0.1131844
3,d_ato,0.9523867
4,d_ata,0.7663611


### 3.1.2 - vol_di = NLP + D_decisão + Selic Surprise

In [None]:
#Para cada vértice

### 3.1.3 - vol_di = NLP + D_ata + Selic Surprise

In [None]:
#Para cada vértice

## 3.2 - Com controles