# Notebook para manutenção do código

In [2]:
import datetime
import seaborn as sns
import numpy as np
import pandas as pd
import yfinance as yf
import local_lib as lib

## local_lib

In [3]:
#@st.cache
def etl_tesouro_historic_price():
    url = 'https://www.tesourotransparente.gov.br/ckan/dataset/df56aa42-484a-4a59-8184-7676580c81e3/resource/796d2059-14e9-44e3-80c9-2d9e30b405c1/download/PrecoTaxaTesouroDireto.csv'
    df = pd.read_csv(url, sep=';', decimal=',')
    df['data'] = pd.to_datetime(df['Data Base'], format='%d/%m/%Y')
    df['ticker'] = df['Tipo Titulo'].astype(str) + ' ' + df['Data Vencimento'].str[6:]
    df['preco_hist'] = round(df['PU Base Manha'].astype(float), 2)
    return df

#-----------------------------------------------------------------------------------------------------
#@st.cache  
def etl_benchmark_historic_price():
    # CDI
    df_cdi = pd.read_json('http://api.bcb.gov.br/dados/serie/bcdata.sgs.12/dados?formato=json')
    df_cdi['data'] = pd.to_datetime(df_cdi['data'], format='%d/%m/%Y')
    df_cdi.columns = ['data','cdi']

    # IPCA
    df_ipca = pd.read_json('http://api.bcb.gov.br/dados/serie/bcdata.sgs.433/dados?formato=json')
    df_ipca['data'] = pd.to_datetime(df_ipca['data'], format='%d/%m/%Y')
    df_ipca.columns = ['data','ipca']
    df_ipca['ipca'] = round((1 + df_ipca['ipca']) ** (1/22) - 1, 6)

    # IBOV
    df_ibov = yf.download('^BVSP', interval='1d')['Adj Close'].reset_index(drop=False)
    df_ibov.columns = ['data','ibov']
    df_ibov['ibov'] = ((df_ibov['ibov'] / df_ibov['ibov'].shift(1) - 1) * 100).fillna(0).round(6)
        
    # S&P500
    df_sp500 = yf.download('^GSPC', interval='1d')['Adj Close'].reset_index(drop=False)
    df_sp500.columns = ['data','sp500']
    df_sp500['sp500'] = ((df_sp500['sp500'] / df_sp500['sp500'].shift(1) - 1) * 100).fillna(5).round(6)

    df_final = pd.merge(df_ibov, df_sp500, on='data', how='inner')
    df_final = pd.merge(df_final, df_cdi, on='data', how='inner')
    df_final = pd.merge(df_final, df_ipca, on='data', how='left')
    df_final['ipca'] = df_final['ipca'].fillna(method='ffill')
    df_final['data'] = pd.to_datetime(df_final['data'])

    return df_final

## Tesouro Direto

In [4]:
import pandas as pd
import yfinance as yf
import local_lib as lib

### Parte 1: Tabela Dinâmica

In [5]:
# Carregando os dados inputados pelo usuário.
df_tesouro = pd.read_csv('../data/manutencao/dados_pos_home.csv')
df_tesouro['data'] = pd.to_datetime(df_tesouro['data'], format='%Y-%m-%d')
df_tesouro.head()

Unnamed: 0,data,ticker,qt,preco_mov,vl_total
0,2021-06-30,Tesouro IPCA+ 2035,1.7,2045.17,3476.79
1,2022-07-01,Tesouro IPCA+ 2035,0.0,0.0,-3.14
2,2021-11-04,Tesouro IPCA+ 2045,3.3,1069.94,3530.8
3,2022-07-01,Tesouro IPCA+ 2045,0.0,0.0,-3.48
4,2020-07-10,Tesouro Prefixado 2023,1.63,903.07,1472.0


In [6]:
# Carregando os preços diários extraídos da API do Tesouro.
df_hist_tesouro = etl_tesouro_historic_price()
df_hist_tesouro.head()

Unnamed: 0,Tipo Titulo,Data Vencimento,Data Base,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha,data,ticker,preco_hist
0,Tesouro IPCA+ com Juros Semestrais,15/08/2026,21/10/2022,5.59,5.71,4047.52,4029.61,4029.61,2022-10-21,Tesouro IPCA+ com Juros Semestrais 2026,4029.61
1,Tesouro Selic,01/03/2023,21/10/2022,0.01,0.02,12324.37,12317.63,12317.63,2022-10-21,Tesouro Selic 2023,12317.63
2,Tesouro Selic,01/09/2024,21/10/2022,0.01,0.02,12321.78,12313.19,12313.19,2022-10-21,Tesouro Selic 2024,12313.19
3,Tesouro Prefixado com Juros Semestrais,01/01/2031,21/10/2022,11.97,12.09,934.97,929.07,929.07,2022-10-21,Tesouro Prefixado com Juros Semestrais 2031,929.07
4,Tesouro IPCA+,15/05/2035,21/10/2022,5.78,5.9,1955.11,1926.52,1926.52,2022-10-21,Tesouro IPCA+ 2035,1926.52


In [7]:
# Juntando os dados do usuário com os dados da API.
df_tesouro_historico = lib.merge_historic_tesouro(df_hist_tesouro, df_tesouro)
df_tesouro_historico.head()

Unnamed: 0,Tipo Titulo,Data Vencimento,Data Base,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha,data,ticker,preco_hist,qt,preco_mov,vl_total,qt_acum,vl_atualizado
300,Tesouro IPCA+,15/05/2035,30/06/2021,4.14,4.26,2035.06,2002.91,2002.19,2021-06-30,Tesouro IPCA+ 2035,2002.19,1.7,2045.17,3476.79,1.7,3403.72
301,Tesouro IPCA+,15/05/2035,01/07/2021,4.15,4.27,2033.07,2000.97,2000.26,2021-07-01,Tesouro IPCA+ 2035,2000.26,0.0,0.0,0.0,1.7,3400.44
302,Tesouro IPCA+,15/05/2035,02/07/2021,4.13,4.25,2039.99,2007.78,2006.29,2021-07-02,Tesouro IPCA+ 2035,2006.29,0.0,0.0,0.0,1.7,3410.69
303,Tesouro IPCA+,15/05/2035,05/07/2021,4.11,4.23,2046.13,2013.83,2013.11,2021-07-05,Tesouro IPCA+ 2035,2013.11,0.0,0.0,0.0,1.7,3422.29
304,Tesouro IPCA+,15/05/2035,06/07/2021,4.12,4.24,2044.14,2011.88,2011.16,2021-07-06,Tesouro IPCA+ 2035,2011.16,0.0,0.0,0.0,1.7,3418.97


In [8]:
# Passo extra para criar uma coluna identificando o último dia do mês de cada ativo (útil para os plots)
df_tesouro_historico = lib.create_column_last_day(df_tesouro_historico)
df_tesouro_historico.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['year'] = pd.to_datetime(df_right['data']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['month'] = pd.to_datetime(df_right['data']).dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['day'] = pd.to_datetime(df_right['data']).dt.day


Unnamed: 0,Tipo Titulo,Data Vencimento,Data Base,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha,data,ticker,preco_hist,qt,preco_mov,vl_total,qt_acum,vl_atualizado,dummy_ultimo_dia,dt_competencia
0,Tesouro IPCA+,15/05/2035,30/06/2021,4.14,4.26,2035.06,2002.91,2002.19,2021-06-30,Tesouro IPCA+ 2035,2002.19,1.7,2045.17,3476.79,1.7,3403.72,1.0,2021-06-01
1,Tesouro IPCA+,15/05/2035,01/07/2021,4.15,4.27,2033.07,2000.97,2000.26,2021-07-01,Tesouro IPCA+ 2035,2000.26,0.0,0.0,0.0,1.7,3400.44,0.0,2021-07-01
2,Tesouro IPCA+,15/05/2035,02/07/2021,4.13,4.25,2039.99,2007.78,2006.29,2021-07-02,Tesouro IPCA+ 2035,2006.29,0.0,0.0,0.0,1.7,3410.69,0.0,2021-07-01
3,Tesouro IPCA+,15/05/2035,05/07/2021,4.11,4.23,2046.13,2013.83,2013.11,2021-07-05,Tesouro IPCA+ 2035,2013.11,0.0,0.0,0.0,1.7,3422.29,0.0,2021-07-01
4,Tesouro IPCA+,15/05/2035,06/07/2021,4.12,4.24,2044.14,2011.88,2011.16,2021-07-06,Tesouro IPCA+ 2035,2011.16,0.0,0.0,0.0,1.7,3418.97,0.0,2021-07-01


In [9]:
# Selecionar os tickers a serem visualizados (desnecessário neste notebook)
'''
# Filtro de tickers.
list_ticker = st.multiselect('Escolha o(s) investimento(s):',
                                df_tesouro_historico['ticker'].unique().tolist(),
                                df_tesouro_historico['ticker'].unique().tolist())
df_tesouro_historico = df_tesouro_historico.loc[df_tesouro_historico['ticker'].isin(list_ticker)]
'''

"\n# Filtro de tickers.\nlist_ticker = st.multiselect('Escolha o(s) investimento(s):',\n                                df_tesouro_historico['ticker'].unique().tolist(),\n                                df_tesouro_historico['ticker'].unique().tolist())\ndf_tesouro_historico = df_tesouro_historico.loc[df_tesouro_historico['ticker'].isin(list_ticker)]\n"

In [10]:
# Preparar os dados exclusivamente para a Tabela Dinâmica (visão 1)
df_plot = df_tesouro_historico.loc[df_tesouro_historico['dummy_ultimo_dia'] == 1]
tab1, data_col = lib.custom_pivot_table(df_plot, col_value='vl_atualizado')
tab1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['dt_competencia'] = pd.to_datetime(df['dt_competencia']).dt.date


Unnamed: 0,ticker,2020-04-01,2020-05-01,2020-06-01,2020-07-01,2020-08-01,2020-09-01,2020-10-01,2020-11-01,2020-12-01,...,2022-03-01,2022-04-01,2022-05-01,2022-06-01,2022-07-01,2022-08-01,2022-09-01,2022-10-01,2022-11-01,2022-12-01
0,Tesouro IPCA+ 2035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3246.56,3232.96,3231.14,3204.99,3095.68,3208.58,3286.29,3257.9,3240.27,3158.84
1,Tesouro IPCA+ 2045,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3674.05,3597.03,3554.6,3476.35,3255.32,3470.44,3618.65,3502.26,3445.2,3233.8
2,Tesouro Prefixado 2023,0.0,0.0,0.0,1487.99,1478.72,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Tesouro Prefixado 2026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3326.1,3285.75,3272.25,3262.7,3290.0,3408.65,3470.55,3488.75,3410.9,3417.75
4,Tesouro Selic 2024,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7010.69,7052.97,7131.71,7207.41,7283.94,7372.73,7455.93,7534.35,7610.52,7669.58
5,Tesouro Selic 2025,5187.15,5198.62,5210.0,11292.98,11418.12,19338.97,37324.92,37337.55,37520.25,...,17112.02,17213.39,17402.66,17587.99,17776.07,17992.63,18195.73,18390.09,18578.96,18724.41
6,Tesouro Selic 2027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8097.88,8155.86,15569.61,15742.53,21239.5,21487.23,22218.01,22463.95,22688.01,22845.17
7,Total,5187.15,5198.62,5210.0,12780.97,12896.84,19338.97,37324.92,37337.55,37520.25,...,42467.3,42537.96,50161.97,50481.97,55940.51,56940.26,58245.16,58637.3,58973.86,59049.55


### Parte 2: Gráfico de Linha com Benchmark

In [11]:
# Extração das variações dos benchmarks (API do BC e yfinance)
df_hist_bench = etl_benchmark_historic_price()
df_hist_bench.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,data,ibov,sp500,cdi,ipca
0,1993-04-27,0.0,1.031047,1.389667,
1,1993-04-28,-0.81633,0.002278,1.374333,
2,1993-04-29,-2.46913,0.198627,1.346333,
3,1993-04-30,1.687762,0.296199,1.302333,
4,1993-05-03,0.0,0.515684,1.305333,


In [12]:
# Junção dos dados inputados pelo usuário e os dados históricos do benchmarks
df_tesouro_historico_agg = lib.merge_historic_benchmark(df_tesouro_historico, df_hist_bench)
df_tesouro_historico_agg.head()

Unnamed: 0,data,ibov,sp500,cdi,ipca,vl_total,vl_atualizado
0,2020-04-14,1.372469,3.057259,0.014227,-0.016725,2960.86,2959.4
1,2020-04-15,-1.360144,-2.203044,0.014227,-0.016725,0.0,2959.83
2,2020-04-16,-1.292639,0.581669,0.014227,-0.016725,2221.28,5180.45
3,2020-04-17,1.513905,2.679359,0.014227,-0.016725,0.0,5181.19
4,2020-04-20,-0.021522,-1.788105,0.014227,-0.016725,0.0,5181.94


In [13]:
# Arrumando dados em um formato melhor para o gráfico.
tab2 = lib.custom_data_lineplot(df_tesouro_historico_agg, ['ibov', 'sp500', 'cdi', 'ipca'])
tab2.head()

Unnamed: 0,data,variable,value
0,2020-04-14,Carteira,2959.4
1,2020-04-15,Carteira,2959.83
2,2020-04-16,Carteira,5180.45
3,2020-04-17,Carteira,5181.19
4,2020-04-20,Carteira,5181.94


### Parte 3: KPI

In [23]:
date_interval = (datetime.datetime(2022, 7, 3), datetime.datetime(2022, 12, 1))

In [24]:
df_date = pd.DataFrame({'data': pd.date_range(df_tesouro_historico['data'].min().to_pydatetime() - datetime.timedelta(days=1),
                                                df_tesouro_historico['data'].max().to_pydatetime())})
df_kpi = pd.merge(df_date, df_tesouro_historico, on='data', how='left')
df_kpi = df_kpi.groupby('data').agg({'qt':'sum', 'qt_acum':'sum',  'vl_atualizado':'sum'}).reset_index()
df_kpi['vl_atualizado'] = np.where(df_kpi['qt_acum'] != 0, df_kpi['vl_atualizado'], np.nan)
df_kpi['vl_atualizado'] = df_kpi['vl_atualizado'].fillna(method='ffill').fillna(0)
df_kpi.head()

Unnamed: 0,data,qt,qt_acum,vl_atualizado
0,2020-04-13,0.0,0.0,0.0
1,2020-04-14,0.28,0.28,2959.4
2,2020-04-15,0.0,0.28,2959.83
3,2020-04-16,0.21,0.49,5180.45
4,2020-04-17,0.0,0.49,5181.19


In [28]:
# Cálculo dos aportes históricos.
vl_aporte = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                        (df_tesouro_historico['vl_total'] > 0) &
                                        (df_tesouro_historico['data'] <= date_interval[1]), 'vl_total'].sum()

vl_aporte_delta = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                            (df_tesouro_historico['vl_total'] > 0) &
                                            (df_tesouro_historico['data'].between(date_interval[0], date_interval[1])), 'vl_total'].sum()

# Cálculo dos valores resgatados.
vl_resgate = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                        (df_tesouro_historico['vl_total'] < 0) &
                                        (df_tesouro_historico['data'] <= date_interval[1]), 'vl_total'].sum()

vl_resgate_delta = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                            (df_tesouro_historico['vl_total'] < 0) &
                                            (df_tesouro_historico['data'].between(date_interval[0], date_interval[1])), 'vl_total'].sum()

# Cálculo do valor patrimonial.
vl_patrimonio = df_kpi.loc[df_kpi['data'] == date_interval[1], 'vl_atualizado'].sum()
vl_patrimonio_delta = vl_patrimonio - df_kpi.loc[df_kpi['data'] == date_interval[0], 'vl_atualizado'].sum()

# Cálculo do rendimento.
rendimento_nominal = round((vl_patrimonio - vl_resgate - vl_aporte) / vl_aporte * 100 , 1)
if  vl_aporte_delta != 0:
    rendimento_nominal_delta = round((vl_patrimonio_delta - vl_resgate_delta - vl_aporte_delta) / vl_aporte_delta * 100 , 1)
else:
    rendimento_nominal_delta = 0

vl_aporte, vl_aporte_delta, vl_resgate, vl_resgate_delta, vl_patrimonio, vl_patrimonio_delta, rendimento_nominal, rendimento_nominal_delta

(86689.84, 5779.64, -33237.02, 0.0, 58991.38, 8434.96, 6.4, 45.9)