In [67]:
import pandas as pd
import os
import logging
import warnings
import plotly.express as px
from matplotlib.colors import LinearSegmentedColormap
from datetime import datetime

In [68]:
# # Configurar logging, para desligar é só comentar as linhas abaixo
# logging.basicConfig(
#     level=logging.INFO,
#     format="%(asctime)s - %(levelname)s - %(message)s",
#     handlers=[
#         logging.FileHandler("processamento.log"),
#         logging.StreamHandler()
#     ]
# )

In [69]:
# Ignorar warnings do tipo FutureWarning
warnings.simplefilter(action='ignore', category=FutureWarning)
logging.info("Warnings do tipo FutureWarning ignorados.")

In [70]:
try:
    # Caminho para o primeiro arquivo (dados principais)
    base_dir = os.path.join("..", "excel", "out")
    file_name = "CDX_US_HY_spread_simple_analysis.xlsx"
    file_path = os.path.join(base_dir, file_name)
    logging.info(f"Lendo arquivo principal: {file_path}")
    regression = pd.read_excel(file_path)
    logging.info("Arquivo principal carregado com sucesso.")

    # Caminho para o segundo arquivo (legendas)
    base_dir_l = os.path.join("..", "excel", "in")
    file_name_l = "legendas.xlsx"
    file_path_l = os.path.join(base_dir_l, file_name_l)
    logging.info(f"Lendo arquivo de legendas: {file_path_l}")
    leg = pd.read_excel(file_path_l)
    logging.info("Arquivo de legendas carregado com sucesso.")

    # Renomear e adicionar coluna
    regression.columns.values[0] = "Codes"
    regression.insert(1, "Names", "")
    logging.info("Coluna 'Codes' renomeada e 'Names' inserida.")

    # Criar mapeamento e exibir
    mapping = dict(zip(leg["Codes"], leg["Names"]))
    logging.info(f"Mapa de legendas criado com {len(mapping)} entradas.")
    logging.debug(f"Mapping: {mapping}")  # Use DEBUG para não poluir o terminal

    # Preencher nomes com base nos códigos
    preenchidos = 0
    for idx, code in regression["Codes"].items():
        if code in mapping:
            regression.at[idx, "Names"] = mapping[code]
            preenchidos += 1

    logging.info(f"{preenchidos} códigos preenchidos com nomes correspondentes.")

except FileNotFoundError as e:
    logging.error(f"Arquivo não encontrado: {e}")
    raise
except Exception as e:
    logging.error(f"Erro durante o processamento: {e}")
    raise


In [71]:
# Adicionar a coluna 'Signal' ao DataFrame 'regression'
regression.insert(0, 'Signal', "")

# Formato AAAA-MM-DD_HH-MM-SS
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_path = f"../excel/out/regression_with_signal_{timestamp}.xlsx"
regression.to_excel(output_path, index=False)

logging.info(f"Exportando DataFrame 'regression' com a coluna 'Signal' para o caminho: {output_path}")

In [72]:
file_path = '../excel/out/regression_with_signal_2025-08-04_11-52-31.xlsx'
regression = pd.read_excel(file_path)

#display(regression)

In [73]:
# regression.head(5)

In [74]:
cmap_coef = LinearSegmentedColormap.from_list(
    "coef_cmap",
    ["#F8696B", "#FFEB84", "#63BE7B"]
)

In [75]:
# Função para destacar p-values < 0.05
def highlight_pval(v):
    return "color: red" if v < 0.05 else ""

In [76]:
regression_styled = (
    regression.style
      # 3-color nas colunas mean e std (usando um colormap padrão)
      .background_gradient(
          subset=["mean", "std"],
          #cmap="viridis"
          cmap=cmap_coef
      )
      # 3-color em R2
      .background_gradient(
          subset=["R2"],
          #cmap="Blues"
          cmap=cmap_coef
      )
      # 3-color custom em coef
      .background_gradient(
          subset=["coef"],
          cmap=cmap_coef
      )
      # fonte vermelha em p-values abaixo de 0.05
      .applymap(
          highlight_pval,
          subset=["pval_ADR", "pval_Coint", "pval"]
      )
      .hide(axis="index")
      .hide(axis="columns", subset=["Codes"])
      #.set_caption("Regression")
)

### QUAIS VARIAVEIS EXPLICAM BEM O COMPORTAMENTO DO CDX_US_HX_SPREAD AO LONGO DO TEMPO ?

In [77]:
#tabela 1
regression_styled

Signal,Names,series (m/y),start (m/y),last (m/y),mean,std,pval_ADR,ADF - UR at15.0%,pval_Coint,Cointegration to Y at15.0%,R2,coef,pval
+,2y Note Germany (%),9/1990,11/2001,8/2023,0.011,0.0165,0.494,yes UR,0.2373,no stat. resid,0.0283,0.1934,0.0063
-,2y Note France (%),8/1990,11/2001,8/2023,0.012,0.0163,0.557,yes UR,0.2288,no stat. resid,0.0391,0.2309,0.0013
+,2y Note Netherlands (%),5/1999,11/2001,8/2023,0.0113,0.0165,0.385,yes UR,0.2327,no stat. resid,0.0389,0.227,0.0013
-,2y Note Italy (%),11/1999,11/2001,8/2023,0.0179,0.0154,0.423,yes UR,0.2169,no stat. resid,0.1,0.3894,0.0
+,2y Note Spain (%),5/1993,11/2001,8/2023,0.0168,0.0163,0.319,yes UR,0.1942,no stat. resid,0.0831,0.3365,0.0
-,2y Note Portugal (%),5/1999,11/2001,8/2023,0.0239,0.0292,0.199,yes UR,0.2246,no stat. resid,0.0858,0.1906,0.0
+,2y Nom Europe All weighted Agg (%),9/1990,11/2001,8/2023,0.0136,0.0158,0.535,yes UR,0.2197,no stat. resid,0.0566,0.2866,0.0001
-,2y Nom Europe Spread All Weighted Agg (%),9/1990,11/2001,8/2023,0.0026,0.0036,0.122,no UR,0.254,no stat. resid,0.0751,1.4454,0.0
+,2y Nom Europe Simple Spread All Agg (%),5/1993,11/2001,8/2023,0.0046,0.0061,0.219,yes UR,0.2451,no stat. resid,0.0623,0.783,0.0
-,5y Note Germany (%),8/1990,11/2001,8/2023,0.015,0.0171,0.45,yes UR,0.2151,no stat. resid,0.0548,0.2603,0.0001


### Quanto da variação de Y é explicada pelas variáveis X do meu modelo?

In [78]:
from IPython.display import HTML
import plotly.io as pio
pio.renderers.default = "notebook_connected" 

# Filtrar Top 20 e remover valores inválidos
top_n = 20
df_top_20_regression = regression.sort_values(by='R2', ascending=False).dropna(subset=['Names']).head(top_n)

label_map = {
    "CDX HY BBG": "CDX HY",
    "JPM FX Vol": "JPM FX Vol",
    "VIX SPX": "VIX SPX",
    "ISM Service (ZS)": "ISM Srv",
    "CDX IG BBG": "CDX IG",
    "VIX Eurostoxx": "VIX Eurostoxx",
    "Global PMI Manufacturing (ZS)": "Gbl Mfg",
    "Eurozone Manufacturing PMI SA (ZS)": "EZ Mfg",
    "Ted Spread Europe (%)": "TED Spread",
    "MOVE": "MOVE",
    "ISM Manufacturing (ZS)": "ISM Mfg",
    "10y Real Note Germany (%)": "10y Germany",
    "Global PMI Composite (ZS)": "Gbl Comp",
    "US Retail Sales SA (6m %ave chg)": "US Retail 6m Avg %",
    "Eurozone Composite PMI SA (ZS)": "EZ Comp",
    "Global PMI Service (ZS)": "Gbl Srv",
    "US Industrial Production SA (6m %ave chg)": "US Ind Prod 6m Avg %",
    "GDP US Chained 2012 Dollars YoY SA (%)": "GDP US YoY",
    "Fed SPF Residential Investment 1y (Dispersion P75 - P25) (%)": "SPF Invest Dispersion",
    "Eurostat Industrial Production Eurozone SWDA (6m %ave chg)": "EZ Ind Prod 6m Avg %",
}

df_top_20_regression['Label'] = df_top_20_regression['Names'].map(label_map)

fig = px.bar(
    df_top_20_regression,
    x='R2',
    y='Label',
    orientation='h',
    hover_name='Names',
    text='R2',
    template='plotly_white',
    title='Top 20 séries por R² (com destaque de cor)',
    color='R2',  # define a intensidade de cor com base no R²
    color_continuous_scale=[(0, "#84c4e9"), (1, "#104eab")],  # você pode testar 'Viridis', 'Cividis', 'Plasma', etc.
)
fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')

# Layout refinado
fig.update_layout(
    yaxis=dict(autorange="reversed"),
    xaxis_range=[0, 1],
    margin=dict(t=60, b=40, l=180, r=20),
    coloraxis_showscale=False  # opcional: esconde a barra de escala lateral
)

HTML(fig.to_html(include_plotlyjs='cdn'))

In [79]:
#### PARTE 2 - Processamento de regs_summary - GERADO POR CDX_US_HY_spread_model.py

try:
    # Caminho para o primeiro arquivo (dados principais)
    base_dir = os.path.join("..", "excel", "out", 'US', 'Credit', 'HY', 't_0') 
    file_name = "sectors_regs_summary.xlsx"
    file_path = os.path.join(base_dir, file_name)
    logging.info(f"Lendo arquivo regs_summary: {file_path}")
    summary = pd.read_excel(file_path)
    logging.info("Arquivo regs_summary carregado com sucesso.")

    # Renomear e adicionar coluna
    summary.rename(columns={"idx": "Codes"}, inplace=True)
    summary.insert(summary.columns.get_loc("Codes") + 1, "Names", "")
    logging.info("Coluna 'Codes' renomeada e 'Names' inserida.")
    
    # adicionar coef e pval
    summary.insert(summary.columns.get_loc("Codes") + 1, "coef", summary['Codes'].str.endswith('(coef)'))
    summary.insert(summary.columns.get_loc("Codes") + 2, "pval", summary['Codes'].str.endswith('(pval)'))
    
    # Preenche coef e pval com True/False
    for idx, i in summary['coef'].items():
        if i == True:
            summary.at[idx, 'Codes'] = summary.at[idx, 'Codes'].replace(" (coef)", "")

    for idx, i in summary['pval'].items():
        if i == True:
            summary.at[idx, 'Codes'] = summary.at[idx - 1, 'Codes']
    
    # Criar mapeamento e exibir
    mapping = dict(zip(leg["Codes"], leg["Names"]))
    logging.info(f"Mapa de legendas criado com {len(mapping)} entradas.")
    logging.debug(f"Mapping: {mapping}") 

    # Preencher nomes com base nos códigos
    preenchidos = 0
    for idx, code in summary["Codes"].items():
        if code in mapping:
            summary.at[idx, "Names"] = mapping[code]
            preenchidos += 1

    logging.info(f"{preenchidos} códigos preenchidos com nomes correspondentes.")

except FileNotFoundError as e:
    logging.error(f"Arquivo não encontrado: {e}")
    raise
except Exception as e:
    logging.error(f"Erro durante o processamento: {e}")
    raise


In [80]:
df_coef = summary[
    (summary['coef'] == True) |
    ((summary['pval'] == False) & (summary['coef'] == False))
]

df_pval = summary[
    (summary['pval'] == True) |
    ((summary['pval'] == False) & (summary['coef'] == False))
]

In [81]:
last_7_codes = df_coef['Codes'].iloc[-7:]
df_coef.loc[df_coef.index[-7:], 'Names'] = last_7_codes.values
df_pval.loc[df_pval.index[-7:], 'Names'] = last_7_codes.values

In [82]:
coef = df_coef.copy()
pval = df_pval.copy()
coef_codes = df_coef.copy()
coef.drop(columns=['Unnamed: 0','Codes', 'coef', 'pval'], inplace=True)
pval.drop(columns=['Unnamed: 0','Codes', 'coef', 'pval'], inplace=True)

In [83]:
# coef

In [84]:
# pval

In [85]:
# TABELA 2

# Criar dois DataFrames a partir do df_coef
df_main = coef.iloc[:-7].copy()  # Todos menos as 7 últimas linhas
df_statistics = coef.iloc[-7:].copy()  # As 7 últimas linhas (dados da regressão como AIC, BIC, DW, R²)
df_statistics = df_statistics.drop(columns=["Signal"], errors="ignore")

cols_to_convert_main = df_main.columns.difference(['Signal', 'Names'])
df_main[cols_to_convert_main] = df_main[cols_to_convert_main].apply(pd.to_numeric, errors='coerce').round(3)

cols_to_convert_stats = df_statistics.columns.difference(['Names'])
df_statistics[cols_to_convert_stats] = df_statistics[cols_to_convert_stats].apply(pd.to_numeric, errors='coerce').round(3)

df_statistics['best'] = df_statistics.iloc[:, 1:].max(axis=1)
df_main = df_main.fillna('')
df_statistics = df_statistics.fillna('')

# display(df_main)
# display(df_statistics)

In [86]:
def style_df_main(df):
    def color_gradient(val):
        try:
            if pd.isna(val):
                return ''
            elif val > 0:
                return f'background-color: rgba(0, 128, 0, {min(val / 5, 1)})'
            else:
                return f'background-color: rgba(255, 0, 0, {min(abs(val) / 5, 1)})'
        except:
            return ''

    styled = df.style
    styled = styled.applymap(color_gradient, subset=df.columns.difference(['Signal', 'Names']))
    return styled

In [87]:
def style_df_stats(df):
    def style_row(row):
        name = df.loc[row.name, 'Names'] 
        styled = [''] * len(row)
        if name == 'R2':
            styled = [''] + ['background-color: #d0f0c0' if v >= 0.75 else 'background-color: #fdd' for v in row[1:]]
        elif name == 'DW':
            styled = [''] + ['background-color: #fdd' if (v < 1.5 or v > 2.5) else '' for v in row[1:]]
        elif name == 'LMpv':
            styled = [''] + ['background-color: #fdd' if v < 0.05 else '' for v in row[1:]]
        elif name in ['AIC', 'BIC']:
            min_val = row[1:].min()
            styled = [''] + ['background-color: #cce5ff' if v == min_val else '' for v in row[1:]]
        
        return styled

    return df.style.apply(style_row, axis=1)

O beta das regressões refere-se aos coeficientes de regressão (`coef`) que representam o impacto de cada variável independente na variável dependente. Esses valores estão presentes na coluna `coef` do DataFrame `regression` ou `df_top_20_regression`, dependendo do contexto.

Por exemplo, no DataFrame `df_top_20_regression`, os valores de beta (coeficientes) para as variáveis estão na coluna `coef`. Esses coeficientes indicam a magnitude e a direção do impacto de cada variável explicativa no comportamento do CDX_US_HY_spread.

In [88]:
# Exibir os DataFrames com formatação
display(style_df_main(df_main))

Unnamed: 0,Names,CDX_US_HY_spread,CDX_US_HY_spread1,CDX_US_HY_spread2,CDX_US_HY_spread3,CDX_US_HY_spread4,CDX_US_HY_spread5,CDX_US_HY_spread6,CDX_US_HY_spread7,CDX_US_HY_spread8,CDX_US_HY_spread9,CDX_US_HY_spread10,CDX_US_HY_spread11,CDX_US_HY_spread12,CDX_US_HY_spread13,CDX_US_HY_spread14,CDX_US_HY_spread15
0,Fed Sloos Tight Std for C&I Loans (Small Firms) (ZS) (Smoothed),,0.718,0.44,,,,,,,,,,,,,
2,Global PMI Composite (ZS),-0.825,,-0.605,,,,-0.384,-0.502,-0.517,-0.374,-0.342,-0.384,-0.384,-0.384,-0.384,-0.384
4,JPM FX Vol,,,,5.885,4.331,4.465,4.189,4.598,4.648,4.158,4.452,4.189,4.189,4.189,4.189,4.189
6,MOVE,,,,,,,0.011,0.016,0.017,0.01,,0.011,0.011,0.011,0.011,0.011
8,Ted Spread US (%),1.579,1.602,1.228,0.695,0.339,,,0.168,,0.123,0.187,,,,,
10,VIX SPX,,,,,0.787,0.837,0.424,,,0.418,0.492,0.424,0.424,0.424,0.424,0.424


### Dados Estatísticos

In [89]:
display(style_df_stats(df_statistics))

Unnamed: 0,Names,CDX_US_HY_spread,CDX_US_HY_spread1,CDX_US_HY_spread2,CDX_US_HY_spread3,CDX_US_HY_spread4,CDX_US_HY_spread5,CDX_US_HY_spread6,CDX_US_HY_spread7,CDX_US_HY_spread8,CDX_US_HY_spread9,CDX_US_HY_spread10,CDX_US_HY_spread11,CDX_US_HY_spread12,CDX_US_HY_spread13,CDX_US_HY_spread14,CDX_US_HY_spread15,best
12,intercept,0.037,0.038,0.039,-0.014,-0.014,-0.014,-0.011,-0.011,-0.011,-0.011,-0.009,-0.011,-0.011,-0.011,-0.011,-0.011,0.039
14,AIC,-1499.66,-1484.17,-1519.98,-1678.92,-1732.18,-1730.54,-1761.07,-1748.69,-1749.77,-1759.58,-1757.24,-1761.07,-1761.07,-1761.07,-1761.07,-1761.07,-1484.17
15,BIC,-1488.94,-1473.45,-1505.67,-1668.19,-1717.88,-1719.81,-1743.19,-1730.81,-1735.47,-1738.13,-1739.36,-1743.19,-1743.19,-1743.19,-1743.19,-1743.19,-1473.45
16,DW,0.2,0.19,0.2,0.37,0.46,0.44,0.43,0.42,0.41,0.43,0.44,0.43,0.43,0.43,0.43,0.43,0.46
17,LMpv,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18,MSE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,R2,0.45,0.42,0.5,0.72,0.77,0.77,0.8,0.79,0.79,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8


In [90]:
# Parte 3 - terceira tabela
### pegar os valores das variaveis

notebook_dir = os.getcwd()
file_path = os.path.abspath(
    os.path.join(notebook_dir, '..', 'excel', 'in', 'out_DB_D.xlsx') # valores padronizados da regressao
)
try:
    serie_hist = pd.read_excel(file_path)
    logging.info(f"Arquivo lido com sucesso: {file_path}")
except Exception as e:
    logging.error(f"Falha ao ler o arquivo {file_path}: {e}")
    raise


$$
\text{Contribuição}_{i,t} = \beta_{i,t} \cdot \Delta x_i
$$


In [91]:
#serie_hist.head()

In [92]:
#Pegar as colunas
cols = serie_hist.columns.tolist()
# cols

In [93]:
#coef_codes['Codes']

In [94]:
# df_main é o df dos coeficientes 
comuns = list(set(coef_codes['Codes'].tolist()) & set(cols))
comuns.insert(0, "date")
#print(comuns)

In [95]:
df_filtered_serie = serie_hist.loc[serie_hist.index[-30:], comuns]
pd.set_option('display.max_rows', None)  # Mostra todas as linhas
pd.set_option('display.max_columns', None)  # Mostra todas as colunas
# Conta o número de linhas e colunas no dataframe
num_linhas, num_colunas = df_filtered_serie.shape

#display(df_filtered_serie)

# print(f"Número de linhas: {num_linhas}")
# print(f"Número de colunas: {num_colunas}")

In [96]:
delta_x = df_filtered_serie.iloc[-1] - df_filtered_serie.iloc[0]
# print(delta_x)

In [97]:
filtered_codes = coef_codes[coef_codes['Codes'].isin(comuns)].copy()
filtered_codes = filtered_codes[['Codes', 'Names']]
#display(filtered_codes)


In [98]:
# Cria dicionários para início, fim e delta
inicios_dict = {}
fins_dict = {}
deltas_dict = {}

# Itera sobre as variáveis filtradas dinamicamente
for code in filtered_codes['Codes']:
    if code in df_filtered_serie.columns:
        inicio = df_filtered_serie.iloc[0][code]
        fim = df_filtered_serie.iloc[-1][code]
        delta = fim - inicio
    else:
        inicio = fim = delta = None  # Caso a variável não esteja no df_filtered_serie
    
    inicios_dict[code] = inicio
    fins_dict[code] = fim
    deltas_dict[code] = delta


In [99]:
# Adiciona colunas ao DataFrame com base nos dicionáriosA
filtered_codes['inicio'] = filtered_codes['Codes'].map(inicios_dict)
filtered_codes['fim'] = filtered_codes['Codes'].map(fins_dict)
filtered_codes['delta'] = filtered_codes['Codes'].map(deltas_dict)

In [100]:
#display(filtered_codes)

In [101]:
#coef_codes

In [102]:
# Identifica colunas que começam com 'CDX_US_HY_spread'
coef_cols = [col for col in coef_codes.columns if col.startswith('CDX_US_HY_spread')]

# Seleciona a linha do intercept
intercept_row = coef_codes[coef_codes['Codes'] == 'intercept']

# Cria o dicionário com chaves numéricas e valores do intercept
if not intercept_row.empty:
    intercept = {idx + 1: intercept_row.iloc[0][col] for idx, col in enumerate(coef_cols)}
else:
    intercept = {}

#print(intercept)


In [103]:
for i in range(len(filtered_codes)):
    code = filtered_codes.iloc[i]['Codes']
    delta = deltas_dict.get(code, None)

    if delta is not None:

        linha_completa = coef_codes[coef_codes['Codes'] == code]

        if not linha_completa.empty:
            for idx, col in enumerate(coef_cols, start=1): 
                coef_value = linha_completa.iloc[0][col]
                if pd.notna(coef_value):
                    filtered_codes.loc[filtered_codes.index[i], f'{idx}'] = coef_value * delta

In [104]:
#filtered_codes

In [105]:
# Reorganiza o DataFrame
colunas_numericas = sorted([col for col in filtered_codes.columns if col.isdigit()], key=int)
colunas_fixas = [col for col in filtered_codes.columns if not col.isdigit()]
nova_ordem = colunas_fixas + colunas_numericas
filtered_codes = filtered_codes[nova_ordem]
filtered_codes

Unnamed: 0,Codes,Names,inicio,fim,delta,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
4,jpm_vol,JPM FX Vol,0.007482,0.006648,-0.000834,,,,-0.004908,-0.003613,-0.003724,-0.003494,-0.003835,-0.003877,-0.003468,-0.003713,-0.003494,-0.003494,-0.003494,-0.003494,-0.003494
6,move,MOVE,0.722172,0.768208,0.046036,,,,,,,0.000497,0.000718,0.000764,0.00047,,0.000497,0.000497,0.000497,0.000497,0.000497
8,ted_spread_us,Ted Spread US (%),0.00223,0.001814,-0.000416,-0.000656,-0.000666,-0.00051,-0.000289,-0.000141,,,-7e-05,,-5.1e-05,-7.8e-05,,,,,
10,vix,VIX SPX,0.01336,0.013893,0.000533,,,,,0.000419,0.000446,0.000226,,,0.000223,0.000262,0.000226,0.000226,0.000226,0.000226,0.000226


### Initial Prediction 
$$
\text{Initial Pred}_t = \sum_i \beta_{i,t} \cdot x_i(\text{inicio}) + \text{intercept}_t
$$

In [106]:
# df_filtered_serie

In [107]:
# print("Inícios:", inicios_dict)
# print("Fins:", fins_dict)
# print("Deltas:", deltas_dict)
# print("Intercept", intercept)

In [108]:
# coef_cols

In [109]:
# Cria um dicionário chamado 'initial_pred' para armazenar a previsão de cada período
# Exemplo: {1: 0.0, 2: 0.0, ..., até o número de colunas de coeficientes}
initial_pred = {}
for i in range(len(coef_cols)):
    initial_pred[i + 1] = 0.0  # os períodos começam em 1, não 0

# Agora vamos passar por cada variável que tem um valor inicial
for code in inicios_dict:
    x_inicial = inicios_dict[code]  # valor da variável no início
    # Procurar no DataFrame a linha que tem os coeficientes dessa variável
    linha = coef_codes[coef_codes['Codes'] == code]
    # Se não achou a linha, pula para a próxima variável
    if linha.empty:
        continue
    # Para cada coluna de coeficiente (um por período)
    for i in range(len(coef_cols)):
        col = coef_cols[i]  # nome da coluna, ex: 'CDX_US_HY_spread1'
        beta = linha.iloc[0][col]  # pega o valor do coeficiente
        # Se o coeficiente não for nulo (NaN), multiplica pelo valor inicial
        if pd.notna(beta):
            initial_pred[i + 1] += beta * x_inicial
# Depois de somar todos os coeficientes * x_inicial, adicionamos o intercepto de cada período
for i in initial_pred:
    if i in intercept:
        initial_pred[i] += intercept[i]
#print("Previsão inicial para cada período:", initial_pred)

### Final Prediction
$$
\text{Final Pred}_t = \sum_i \beta_{i,t} \cdot x_i(\text{fim}) + \text{intercept}_t
$$

In [110]:
final_pred = {idx + 1: 0.0 for idx in range(len(coef_cols))}
# Loop por cada variável e seus valores finais
for code, x_final in fins_dict.items():
    row = coef_codes[coef_codes['Codes'] == code]
    if row.empty:
        continue

    for idx, col in enumerate(coef_cols, start=1):
        beta = row.iloc[0][col]
        if pd.notna(beta):
            final_pred[idx] += beta * x_final

# Adiciona o intercepto (uma vez por período)
for idx in final_pred:
    final_pred[idx] += intercept.get(idx, 0)
#print("Previsão final para cada período:", final_pred)

### Period Variation
$$
\text{Period Variation}_t = (\text{Final Pred}_t - \text{Initial Pred}_t) \times 100
$$

In [111]:
period_variation = {}

for t in initial_pred:
    final_val = final_pred.get(t, 0)
    initial_val = initial_pred.get(t, 0)
    variation = (final_val - initial_val) * 100  # pontos percentuais
    period_variation[t] = variation
#print("Variação por período:", period_variation)

In [112]:
forecast = pd.DataFrame({
    'initial_pred': pd.Series(initial_pred),
    'final_pred': pd.Series(final_pred),
    'period_variation': pd.Series(period_variation)
}).T.round(4)  # .T transforma os períodos em colunas

display(forecast)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
initial_pred,0.0409,0.0417,0.0414,0.0311,0.0301,0.0301,0.0336,0.0353,0.0357,0.0335,0.0313,0.0336,0.0336,0.0336,0.0336,0.0336
final_pred,0.0403,0.041,0.0409,0.0259,0.0267,0.0268,0.0308,0.0322,0.0326,0.0307,0.0278,0.0308,0.0308,0.0308,0.0308,0.0308
period_variation,-0.0656,-0.0666,-0.051,-0.5197,-0.3334,-0.3278,-0.2771,-0.3187,-0.3113,-0.2827,-0.3528,-0.2771,-0.2771,-0.2771,-0.2771,-0.2771
