# Notebook para manutenção do código

In [1]:
import datetime
import seaborn as sns
import numpy as np
import pandas as pd
import yfinance as yf
import local_lib as lib

## local_lib

In [2]:
#@st.cache
def etl_tesouro_historic_price():
    url = 'https://www.tesourotransparente.gov.br/ckan/dataset/df56aa42-484a-4a59-8184-7676580c81e3/resource/796d2059-14e9-44e3-80c9-2d9e30b405c1/download/PrecoTaxaTesouroDireto.csv'
    df = pd.read_csv(url, sep=';', decimal=',')
    df['data'] = pd.to_datetime(df['Data Base'], format='%d/%m/%Y')
    df['ticker'] = df['Tipo Titulo'].astype(str) + ' ' + df['Data Vencimento'].str[6:]
    df['preco_hist'] = round(df['PU Base Manha'].astype(float), 2)
    return df

#-----------------------------------------------------------------------------------------------------
#@st.cache  
def etl_benchmark_historic_price():
    # CDI
    df_cdi = pd.read_json('http://api.bcb.gov.br/dados/serie/bcdata.sgs.12/dados?formato=json')
    df_cdi['data'] = pd.to_datetime(df_cdi['data'], format='%d/%m/%Y')
    df_cdi.columns = ['data','cdi']

    # IPCA
    df_ipca = pd.read_json('http://api.bcb.gov.br/dados/serie/bcdata.sgs.433/dados?formato=json')
    df_ipca['data'] = pd.to_datetime(df_ipca['data'], format='%d/%m/%Y')
    df_ipca.columns = ['data','ipca']
    df_ipca['ipca'] = round((1 + df_ipca['ipca']) ** (1/22) - 1, 6)

    # IBOV
    df_ibov = yf.download('^BVSP', interval='1d')['Adj Close'].reset_index(drop=False)
    df_ibov.columns = ['data','ibov']
    df_ibov['ibov'] = ((df_ibov['ibov'] / df_ibov['ibov'].shift(1) - 1) * 100).fillna(0).round(6)
        
    # S&P500
    df_sp500 = yf.download('^GSPC', interval='1d')['Adj Close'].reset_index(drop=False)
    df_sp500.columns = ['data','sp500']
    df_sp500['sp500'] = ((df_sp500['sp500'] / df_sp500['sp500'].shift(1) - 1) * 100).fillna(5).round(6)

    df_final = pd.merge(df_ibov, df_sp500, on='data', how='inner')
    df_final = pd.merge(df_final, df_cdi, on='data', how='inner')
    df_final = pd.merge(df_final, df_ipca, on='data', how='left')
    df_final['ipca'] = df_final['ipca'].fillna(method='ffill')
    df_final['data'] = pd.to_datetime(df_final['data'])

    return df_final


# ------------------------------------------------------------------------------------------------------
#@st.cache
def etl_bolsa_historic_price(list_ticker_b3: list, start_date: str, end_date: str) -> np.array:

    # Utilizando a api do yf
    long_string = ' '.join([i + '.SA' for i in list_ticker_b3])
    df_price = yf.download(long_string, start=start_date, end=end_date, group_by='column')['Adj Close'].reset_index()

    # Ajustes na base
    df_price.columns = ['data'] + list(list_ticker_b3)    
    df_price['data'] = pd.to_datetime(df_price['data'])
    df_price = df_price.fillna(0).round(2)
    
    return df_price

## Home

In [3]:
import datetime
import json
import numpy as np
import pandas as pd
from PIL import Image
import os
import streamlit as st

In [4]:
# Extra script
uploaded_files = [
    '../data/kenji/movimentacao-2019.xlsx',
    '../data/kenji/movimentacao-2020.xlsx',
    '../data/kenji/movimentacao-2021.xlsx',
    '../data/kenji/movimentacao-2022.xlsx',
]

In [5]:
# Extração.
for i in uploaded_files:
    df = pd.read_excel(i, engine='openpyxl')

    # No primeiro caso criaremos um dataframe que consolidará todas as movimentações.
    if i == uploaded_files[0]:
        df_all = df
    else:
        df_all = pd.concat([df_all, df], axis=0, ignore_index=True).drop_duplicates(keep='last')

df_all.head()

  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")
  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0,Entrada/Saída,Data,Movimentação,Produto,Instituição,Quantidade,Preço unitário,Valor da Operação
0,Credito,17/12/2019,Transferência - Liquidação,CEBR3 - CIA ENERGETICA DE BRASILIA,MODAL DTVM LTDA,100.0,53.69,5369.0
1,Credito,17/12/2019,Transferência - Liquidação,ITSA4 - ITAUSA S/A,MODAL DTVM LTDA,100.0,13.6,1360.0
2,Debito,11/12/2019,Transferência - Liquidação,CCRO3 - CCR S.A.,MODAL DTVM LTDA,300.0,17.5,5250.0
3,Debito,09/12/2019,Transferência - Liquidação,USIM5 - USINAS SIDERURGICAS DE MINAS GERAIS S....,MODAL DTVM LTDA,100.0,8.8,880.0
4,Credito,06/12/2019,Transferência - Liquidação,MOVI3 - MOVIDA,MODAL DTVM LTDA,300.0,16.97,5091.0


In [6]:
# Tratamentos:
# (geral) Nome e dtype.
dict_dtype = {'credito_ou_debito': str,
                'data': str,
                'tp_movimento': str,
                'ativo': str,
                'instituicao': str,
                'qt_abs': float,
                'preco_mov': float,
                'vl_total_abs': float}

df_all.columns = list(dict_dtype.keys())
df_all['preco_mov'].replace('-', 0, inplace=True)
df_all['vl_total_abs'].replace('-', 0, inplace=True)
df_all = df_all.astype(dict_dtype)
df_all['data'] = pd.to_datetime(df_all['data'], format='%d/%m/%Y')
df_all.head()

Unnamed: 0,credito_ou_debito,data,tp_movimento,ativo,instituicao,qt_abs,preco_mov,vl_total_abs
0,Credito,2019-12-17,Transferência - Liquidação,CEBR3 - CIA ENERGETICA DE BRASILIA,MODAL DTVM LTDA,100.0,53.69,5369.0
1,Credito,2019-12-17,Transferência - Liquidação,ITSA4 - ITAUSA S/A,MODAL DTVM LTDA,100.0,13.6,1360.0
2,Debito,2019-12-11,Transferência - Liquidação,CCRO3 - CCR S.A.,MODAL DTVM LTDA,300.0,17.5,5250.0
3,Debito,2019-12-09,Transferência - Liquidação,USIM5 - USINAS SIDERURGICAS DE MINAS GERAIS S....,MODAL DTVM LTDA,100.0,8.8,880.0
4,Credito,2019-12-06,Transferência - Liquidação,MOVI3 - MOVIDA,MODAL DTVM LTDA,300.0,16.97,5091.0


In [7]:
# Check (apagar)
df_all.loc[df_all['ativo'].str.contains('B3SA3')].sort_values('data').head()
#df_all.loc[df_all['tp_movimento'].str.contains('Desdobro')].sort_values('data')

Unnamed: 0,credito_ou_debito,data,tp_movimento,ativo,instituicao,qt_abs,preco_mov,vl_total_abs
49,Credito,2020-08-03,Transferência - Liquidação,"B3SA3 - B3 S.A. – BRASIL, BOLSA, BALCÃO",MODAL DTVM LTDA,100.0,64.74,6474.0
32,Credito,2020-10-07,Juros Sobre Capital Próprio,"B3SA3 - B3 S.A. – BRASIL, BOLSA, BALCÃO",MODAL DTVM LTDA,100.0,0.15,12.57
33,Credito,2020-10-07,Dividendo,"B3SA3 - B3 S.A. – BRASIL, BOLSA, BALCÃO",MODAL DTVM LTDA,100.0,0.65,64.87
165,Credito,2021-01-08,Dividendo,"B3SA3 - B3 S.A. – BRASIL, BOLSA, BALCÃO",MODAL DTVM LTDA,100.0,0.41,41.04
164,Credito,2021-01-08,Juros Sobre Capital Próprio,"B3SA3 - B3 S.A. – BRASIL, BOLSA, BALCÃO",MODAL DTVM LTDA,100.0,0.15,12.49


In [8]:
# (geral) Nova variável: classificação do ativo.
df_all['tp_ativo'] = np.select(
    [
        (df_all['ativo'].str.upper()).str.contains('TESOURO'),
        df_all['ativo'].str.split(' - ', 0).str[0].str.len() == 5,
        df_all['ativo'].str.split(' - ', 0).str[0].str.len() == 6,
        df_all['ativo'].str.contains('DEB'),
        df_all['ativo'].str.contains('|'.join(['CDB', 'RDB', 'LCA', 'LCI']))
    ],
    [
        'Tipo 1: tesouro',
        'Tipo 2: ações',
        'Tipo 3: BDR',
        'Tipo 4: debêntures',
        'Tipo 5: renda fixa privada'
    ],'?'
)

# (geral) Nova variável: ticker.
df_all['ticker'] = np.select(
    [
        df_all['tp_ativo'] == 'Tipo 4: debêntures',
        df_all['tp_ativo'] == 'Tipo 5: renda fixa privada'
    ],
    [   
        df_all['ativo'].str[5:12],
        df_all['ativo'].str[5:17]
    ], df_all['ativo'].str.split(' - ').str[0]
)

# (bolsa) Ajuste específico de ações: zerar a quantidade de compra/venda em caso de dividendos e juros sobre capital próprio.
df_all['qt_abs'] = np.where(df_all['tp_movimento'].isin(['Transferência - Liquidação', 'Bonificação em Ativos', 'Desdobro']), df_all['qt_abs'], 0)

# (geral) Nova variável: variação na quantidade de ativos.
df_all['qt'] = df_all['qt_abs'] * np.where(df_all['credito_ou_debito'] == 'Credito', 1, -1)

# (geral) Nova variável: variação na quantidade no valor total.
df_all['vl_total'] = df_all['vl_total_abs'] * np.where(df_all['credito_ou_debito'] == 'Credito', 1, -1)

# (bolsa) Nova variável: flag se a negociação é um provento (dividendo, juros sobre capital próprio ou leilão)
df_all['evento'] = np.select(
    [
        df_all['tp_movimento'].isin(['Dividendo', 'Juros Sobre Capital Próprio', 'Fração em Ativos', 'Leilão de Fração']),
        df_all['tp_movimento'].isin(['Bonificação em Ativos']),
        df_all['tp_movimento'].isin(['Desdobro', 'Grupamento', ]),
        df_all['tp_movimento'].isin(['Transferência - Liquidação', 'Compra', 'Venda', 'COMPRA / VENDA', 'COMPRA/VENDA DEFINITIVA/CESSAO']),
        df_all['tp_movimento'].isin(['Cobrança de Taxa Semestral', 'Atualização'])
    ],
    [
        'dividendo_ou_jcp',
        'bonificacao',
        'split',
        'compra_ou_venda',
        'outros'
    ],
    '?')

df_all.head()

Unnamed: 0,credito_ou_debito,data,tp_movimento,ativo,instituicao,qt_abs,preco_mov,vl_total_abs,tp_ativo,ticker,qt,vl_total,evento
0,Credito,2019-12-17,Transferência - Liquidação,CEBR3 - CIA ENERGETICA DE BRASILIA,MODAL DTVM LTDA,100.0,53.69,5369.0,Tipo 2: ações,CEBR3,100.0,5369.0,compra_ou_venda
1,Credito,2019-12-17,Transferência - Liquidação,ITSA4 - ITAUSA S/A,MODAL DTVM LTDA,100.0,13.6,1360.0,Tipo 2: ações,ITSA4,100.0,1360.0,compra_ou_venda
2,Debito,2019-12-11,Transferência - Liquidação,CCRO3 - CCR S.A.,MODAL DTVM LTDA,300.0,17.5,5250.0,Tipo 2: ações,CCRO3,-300.0,-5250.0,compra_ou_venda
3,Debito,2019-12-09,Transferência - Liquidação,USIM5 - USINAS SIDERURGICAS DE MINAS GERAIS S....,MODAL DTVM LTDA,100.0,8.8,880.0,Tipo 2: ações,USIM5,-100.0,-880.0,compra_ou_venda
4,Credito,2019-12-06,Transferência - Liquidação,MOVI3 - MOVIDA,MODAL DTVM LTDA,300.0,16.97,5091.0,Tipo 2: ações,MOVI3,300.0,5091.0,compra_ou_venda


In [9]:
#Check (apagar)
df_all['tp_movimento'].unique()

#a =df_all.loc[df_all['ticker'] == 'ITSA4'].sort_values('data')
#a#['provento'] = np.where(a['tp_movimento'].isin(['Dividendo', 'Juros Sobre Capital Próprio', 'Fração em Ativos', 'Leilão de Fração']), 1, 0)
#a['provento'].value_counts()

#a['negociacao_ou_provento'] = np.where(a['tp_movimento'].str.isin(['Transferência - Liquidação']))
#a['qt_abs'] = np.where(a['tp_movimento'].isin(['Transferência - Liquidação', 'Bonificação em Ativos']), a['qt_abs'], 0)

#with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
#    display(a)

array(['Transferência - Liquidação', 'Dividendo',
       'Juros Sobre Capital Próprio', 'Desdobro', 'Compra', 'Venda',
       'Bonificação em Ativos', 'Atualização',
       'Cobrança de Taxa Semestral', 'COMPRA/VENDA DEFINITIVA/CESSAO',
       'Leilão de Fração', 'Fração em Ativos', 'Grupamento',
       'COMPRA / VENDA'], dtype=object)

In [10]:
# (f) Unificar múltiplas compras/vendas em diferentes corretoras.
# Essa etapa necessariamente é a última, pois aplicaremos cálculo sobre quantidade e preço.
# Ao final, teremos o preço médio de compras/venda 
# tp_movimento foi removido pois podem haver compras e vendas o mesmo dia (caso de daytrade)
#df_all = df_all.groupby(['tp_ativo','ticker','data']).agg({'qt':'sum', 'vl_total':'sum'}).reset_index(drop=False)
#df_all['preco_mov'] = np.where(df_all['qt'] != 0, round(df_all['vl_total'] / df_all['qt'], 2), 0)

In [11]:
# Futuro: criar uma visão geral de todos os investimento.s

In [10]:
# Filtrar apenas Tesouro para a página seguinte.
def only_tesouro(df):
    df = df.groupby(['tp_ativo','ticker','data']).agg({'qt':'sum', 'vl_total':'sum'}).reset_index(drop=False)
    df['preco_mov'] = np.where(df['qt'] != 0, round(df['vl_total'] / df['qt'], 2), 0)
    df = df.loc[df['tp_ativo'] == 'Tipo 1: tesouro'].sort_values(by=['ticker','data'], ascending=True)
    
    return df[['data', 'ticker', 'qt', 'preco_mov', 'vl_total']]

df_tesouro = only_tesouro(df_all)
df_tesouro.head()

Unnamed: 0,data,ticker,qt,preco_mov,vl_total
0,2021-06-30,Tesouro IPCA+ 2035,0.0,0.0,3476.79
1,2022-07-01,Tesouro IPCA+ 2035,0.0,0.0,-3.14
2,2021-11-04,Tesouro IPCA+ 2045,0.0,0.0,3530.8
3,2022-07-01,Tesouro IPCA+ 2045,0.0,0.0,-3.48
4,2020-07-10,Tesouro Prefixado 2023,0.0,0.0,1472.0


In [13]:
# Filtrar ações e BDR para a página seguinte.
def only_bolsa(df):
    df = df.loc[(df['tp_ativo'] == 'Tipo 2: ações') | (df['tp_ativo'] == 'Tipo 3: BDR')].sort_values(by=['ticker','data'], ascending=True)
    df = df[['data', 'ticker', 'tp_movimento', 'evento', 'qt', 'preco_mov', 'vl_total']]
    df = df.groupby(['data', 'ticker','evento']).agg({'qt':'sum', 'vl_total':'sum'}).reset_index(drop=False)
    return df
    
df_bolsa = only_bolsa(df_all)
df_bolsa.head()

Unnamed: 0,data,ticker,evento,qt,vl_total
0,2019-11-05,BPAN4,compra_ou_venda,600.0,5502.0
1,2019-11-06,MGLU3,compra_ou_venda,-100.0,-4499.0
2,2019-11-08,BPAN4,compra_ou_venda,-600.0,-5598.0
3,2019-11-08,CCRO3,compra_ou_venda,300.0,5046.0
4,2019-11-12,CYRE3,compra_ou_venda,100.0,2725.0


In [14]:
# Check (apagar)
a = df_bolsa.loc[df_bolsa['ticker'] == 'ITSA4'].sort_values('data')
a

Unnamed: 0,data,ticker,evento,qt,vl_total
9,2019-11-19,ITSA4,compra_ou_venda,100.0,1355.0
20,2019-12-17,ITSA4,compra_ou_venda,100.0,1360.0
21,2020-01-02,ITSA4,dividendo_ou_jcp,0.0,2.0
30,2020-03-06,ITSA4,dividendo_ou_jcp,0.0,82.67
37,2020-04-01,ITSA4,dividendo_ou_jcp,0.0,4.0
44,2020-07-01,ITSA4,dividendo_ou_jcp,0.0,4.0
50,2020-08-26,ITSA4,dividendo_ou_jcp,0.0,4.0
52,2020-10-01,ITSA4,dividendo_ou_jcp,0.0,4.0
58,2020-12-03,ITSA4,compra_ou_venda,159.0,1741.87
60,2020-12-15,ITSA4,compra_ou_venda,200.0,2260.0


## Tesouro Direto

In [15]:
import pandas as pd
import yfinance as yf
import local_lib as lib

### Parte 1: Tabela Dinâmica

In [16]:
# Carregando os dados inputados pelo usuário.
df_tesouro = pd.read_csv('../data/manutencao/dados_pos_home.csv')
df_tesouro['data'] = pd.to_datetime(df_tesouro['data'], format='%Y-%m-%d')
df_tesouro.head()

Unnamed: 0,data,ticker,qt,preco_mov,vl_total
0,2021-06-30,Tesouro IPCA+ 2035,1.7,2045.17,3476.79
1,2022-07-01,Tesouro IPCA+ 2035,0.0,0.0,-3.14
2,2021-11-04,Tesouro IPCA+ 2045,3.3,1069.94,3530.8
3,2022-07-01,Tesouro IPCA+ 2045,0.0,0.0,-3.48
4,2020-07-10,Tesouro Prefixado 2023,1.63,903.07,1472.0


In [17]:
# Carregando os preços diários extraídos da API do Tesouro.
df_hist_tesouro = etl_tesouro_historic_price()
df_hist_tesouro.head()

Unnamed: 0,Tipo Titulo,Data Vencimento,Data Base,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha,data,ticker,preco_hist
0,Tesouro IGPM+ com Juros Semestrais,01/01/2031,17/11/2020,3.3,3.42,8730.95,8660.71,8652.01,2020-11-17,Tesouro IGPM+ com Juros Semestrais 2031,8652.01
1,Tesouro Prefixado,01/01/2021,17/11/2020,1.95,2.07,997.62,997.48,997.4,2020-11-17,Tesouro Prefixado 2021,997.4
2,Tesouro IPCA+,15/05/2045,17/11/2020,4.12,4.24,1257.75,1222.88,1222.48,2020-11-17,Tesouro IPCA+ 2045,1222.48
3,Tesouro IPCA+ com Juros Semestrais,15/05/2055,17/11/2020,4.27,4.39,4409.4,4323.18,4321.77,2020-11-17,Tesouro IPCA+ com Juros Semestrais 2055,4321.77
4,Tesouro IPCA+ com Juros Semestrais,15/08/2030,17/11/2020,3.37,3.49,4140.03,4103.46,4102.26,2020-11-17,Tesouro IPCA+ com Juros Semestrais 2030,4102.26


In [18]:
# Juntando os dados do usuário com os dados da API.
df_tesouro_historico = lib.merge_historic_tesouro(df_hist_tesouro, df_tesouro)
df_tesouro_historico.head()

Unnamed: 0,Tipo Titulo,Data Vencimento,Data Base,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha,data,ticker,preco_hist,qt,preco_mov,vl_total,qt_acum,vl_atualizado
300,Tesouro IPCA+,15/05/2035,30/06/2021,4.14,4.26,2035.06,2002.91,2002.19,2021-06-30,Tesouro IPCA+ 2035,2002.19,1.7,2045.17,3476.79,1.7,3403.72
301,Tesouro IPCA+,15/05/2035,01/07/2021,4.15,4.27,2033.07,2000.97,2000.26,2021-07-01,Tesouro IPCA+ 2035,2000.26,0.0,0.0,0.0,1.7,3400.44
302,Tesouro IPCA+,15/05/2035,02/07/2021,4.13,4.25,2039.99,2007.78,2006.29,2021-07-02,Tesouro IPCA+ 2035,2006.29,0.0,0.0,0.0,1.7,3410.69
303,Tesouro IPCA+,15/05/2035,05/07/2021,4.11,4.23,2046.13,2013.83,2013.11,2021-07-05,Tesouro IPCA+ 2035,2013.11,0.0,0.0,0.0,1.7,3422.29
304,Tesouro IPCA+,15/05/2035,06/07/2021,4.12,4.24,2044.14,2011.88,2011.16,2021-07-06,Tesouro IPCA+ 2035,2011.16,0.0,0.0,0.0,1.7,3418.97


In [19]:
# Passo extra para criar uma coluna identificando o último dia do mês de cada ativo (útil para os plots)
df_tesouro_historico = lib.create_column_last_day(df_tesouro_historico)
df_tesouro_historico.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['year'] = pd.to_datetime(df_right['data']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['month'] = pd.to_datetime(df_right['data']).dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['day'] = pd.to_datetime(df_right['data']).dt.day


Unnamed: 0,Tipo Titulo,Data Vencimento,Data Base,Taxa Compra Manha,Taxa Venda Manha,PU Compra Manha,PU Venda Manha,PU Base Manha,data,ticker,preco_hist,qt,preco_mov,vl_total,qt_acum,vl_atualizado,dummy_ultimo_dia,dt_competencia
0,Tesouro IPCA+,15/05/2035,30/06/2021,4.14,4.26,2035.06,2002.91,2002.19,2021-06-30,Tesouro IPCA+ 2035,2002.19,1.7,2045.17,3476.79,1.7,3403.72,1.0,2021-06-01
1,Tesouro IPCA+,15/05/2035,01/07/2021,4.15,4.27,2033.07,2000.97,2000.26,2021-07-01,Tesouro IPCA+ 2035,2000.26,0.0,0.0,0.0,1.7,3400.44,0.0,2021-07-01
2,Tesouro IPCA+,15/05/2035,02/07/2021,4.13,4.25,2039.99,2007.78,2006.29,2021-07-02,Tesouro IPCA+ 2035,2006.29,0.0,0.0,0.0,1.7,3410.69,0.0,2021-07-01
3,Tesouro IPCA+,15/05/2035,05/07/2021,4.11,4.23,2046.13,2013.83,2013.11,2021-07-05,Tesouro IPCA+ 2035,2013.11,0.0,0.0,0.0,1.7,3422.29,0.0,2021-07-01
4,Tesouro IPCA+,15/05/2035,06/07/2021,4.12,4.24,2044.14,2011.88,2011.16,2021-07-06,Tesouro IPCA+ 2035,2011.16,0.0,0.0,0.0,1.7,3418.97,0.0,2021-07-01


In [20]:
# Selecionar os tickers a serem visualizados (desnecessário neste notebook)
'''
# Filtro de tickers.
list_ticker = st.multiselect('Escolha o(s) investimento(s):',
                                df_tesouro_historico['ticker'].unique().tolist(),
                                df_tesouro_historico['ticker'].unique().tolist())
df_tesouro_historico = df_tesouro_historico.loc[df_tesouro_historico['ticker'].isin(list_ticker)]
'''

"\n# Filtro de tickers.\nlist_ticker = st.multiselect('Escolha o(s) investimento(s):',\n                                df_tesouro_historico['ticker'].unique().tolist(),\n                                df_tesouro_historico['ticker'].unique().tolist())\ndf_tesouro_historico = df_tesouro_historico.loc[df_tesouro_historico['ticker'].isin(list_ticker)]\n"

In [21]:
# Preparar os dados exclusivamente para a Tabela Dinâmica (visão 1)
df_plot = df_tesouro_historico.loc[df_tesouro_historico['dummy_ultimo_dia'] == 1]
tab1, data_col = lib.custom_pivot_table(df_plot, col_value='vl_atualizado')
tab1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['dt_competencia'] = pd.to_datetime(df['dt_competencia']).dt.date


Unnamed: 0,ticker,April/2020,May/2020,June/2020,July/2020,August/2020,September/2020,October/2020,November/2020,December/2020,...,March/2022,April/2022,May/2022,June/2022,July/2022,August/2022,September/2022,October/2022,November/2022,December/2022
0,Tesouro IPCA+ 2035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3246.56,3232.96,3231.14,3204.99,3095.68,3208.58,3286.29,3257.9,3240.27,3231.85
1,Tesouro IPCA+ 2045,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3674.05,3597.03,3554.6,3476.35,3255.32,3470.44,3618.65,3502.26,3445.2,3350.75
2,Tesouro Prefixado 2023,0.0,0.0,0.0,1487.99,1478.72,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Tesouro Prefixado 2026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3326.1,3285.75,3272.25,3262.7,3290.0,3408.65,3470.55,3488.75,3410.9,3453.75
4,Tesouro Selic 2024,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7010.69,7052.97,7131.71,7207.41,7283.94,7372.73,7455.93,7534.35,7610.52,7689.73
5,Tesouro Selic 2025,5187.15,5198.62,5210.0,11292.98,11418.12,19338.97,37324.92,37337.55,37520.25,...,17112.02,17213.39,17402.66,17587.99,17776.07,17992.63,18195.73,18390.09,18578.96,18773.63
6,Tesouro Selic 2027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8097.88,8155.86,15569.61,15742.53,21239.5,21487.23,22218.01,22463.95,22688.01,22904.54
7,Total,5187.15,5198.62,5210.0,12780.97,12896.84,19338.97,37324.92,37337.55,37520.25,...,42467.3,42537.96,50161.97,50481.97,55940.51,56940.26,58245.16,58637.3,58973.86,59404.25


### Parte 2: Gráfico de Linha com Benchmark

In [22]:
# Extração das variações dos benchmarks (API do BC e yfinance)
df_hist_bench = etl_benchmark_historic_price()
df_hist_bench.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,data,ibov,sp500,cdi,ipca
0,1993-04-27,0.0,1.031047,1.389667,
1,1993-04-28,-0.81633,0.002278,1.374333,
2,1993-04-29,-2.46913,0.198627,1.346333,
3,1993-04-30,1.687762,0.296199,1.302333,
4,1993-05-03,0.0,0.515684,1.305333,


In [23]:
# Junção dos dados inputados pelo usuário e os dados históricos do benchmarks
df_tesouro_historico_agg = lib.merge_historic_benchmark(df_tesouro_historico, df_hist_bench)
df_tesouro_historico_agg.head()

Unnamed: 0,data,ibov,sp500,cdi,ipca,vl_total,vl_atualizado
0,2020-04-14,1.372469,3.057259,0.014227,-0.016725,2960.86,2959.4
1,2020-04-15,-1.360144,-2.203044,0.014227,-0.016725,0.0,2959.83
2,2020-04-16,-1.292639,0.581669,0.014227,-0.016725,2221.28,5180.45
3,2020-04-17,1.513905,2.679359,0.014227,-0.016725,0.0,5181.19
4,2020-04-20,-0.021522,-1.788105,0.014227,-0.016725,0.0,5181.94


In [24]:
# Arrumando dados em um formato melhor para o gráfico.
tab2 = lib.custom_data_lineplot(df_tesouro_historico_agg, ['ibov', 'sp500', 'cdi', 'ipca'])
tab2.head()

Unnamed: 0,data,variable,value
0,2020-04-14,Carteira,2959.4
1,2020-04-15,Carteira,2959.83
2,2020-04-16,Carteira,5180.45
3,2020-04-17,Carteira,5181.19
4,2020-04-20,Carteira,5181.94


### Parte 3: KPI

In [25]:
date_interval = (datetime.datetime(2022, 7, 3), datetime.datetime(2022, 12, 1))
date_interval

(datetime.datetime(2022, 7, 3, 0, 0), datetime.datetime(2022, 12, 1, 0, 0))

In [26]:
df_date = pd.DataFrame({'data': pd.date_range(df_tesouro_historico['data'].min().to_pydatetime() - datetime.timedelta(days=1),
                                                df_tesouro_historico['data'].max().to_pydatetime())})
df_kpi = pd.merge(df_date, df_tesouro_historico, on='data', how='left')
df_kpi = df_kpi.groupby('data').agg({'qt':'sum', 'qt_acum':'sum',  'vl_atualizado':'sum'}).reset_index()
df_kpi['vl_atualizado'] = np.where(df_kpi['qt_acum'] != 0, df_kpi['vl_atualizado'], np.nan)
df_kpi['vl_atualizado'] = df_kpi['vl_atualizado'].fillna(method='ffill').fillna(0)
df_kpi.head()

Unnamed: 0,data,qt,qt_acum,vl_atualizado
0,2020-04-13,0.0,0.0,0.0
1,2020-04-14,0.28,0.28,2959.4
2,2020-04-15,0.0,0.28,2959.83
3,2020-04-16,0.21,0.49,5180.45
4,2020-04-17,0.0,0.49,5181.19


In [27]:
# Cálculo dos aportes históricos.
vl_aporte = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                        (df_tesouro_historico['vl_total'] > 0) &
                                        (df_tesouro_historico['data'] <= date_interval[1]), 'vl_total'].sum()

vl_aporte_delta = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                            (df_tesouro_historico['vl_total'] > 0) &
                                            (df_tesouro_historico['data'].between(date_interval[0], date_interval[1])), 'vl_total'].sum()

# Cálculo dos valores resgatados.
vl_resgate = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                        (df_tesouro_historico['vl_total'] < 0) &
                                        (df_tesouro_historico['data'] <= date_interval[1]), 'vl_total'].sum()

vl_resgate_delta = df_tesouro_historico.loc[(df_tesouro_historico['qt'] != 0) &
                                            (df_tesouro_historico['vl_total'] < 0) &
                                            (df_tesouro_historico['data'].between(date_interval[0], date_interval[1])), 'vl_total'].sum()

# Cálculo do valor patrimonial.
vl_patrimonio = df_kpi.loc[df_kpi['data'] == date_interval[1], 'vl_atualizado'].sum()
vl_patrimonio_delta = vl_patrimonio - df_kpi.loc[df_kpi['data'] == date_interval[0], 'vl_atualizado'].sum()

# Cálculo do rendimento.
rendimento_nominal = round((vl_patrimonio - vl_resgate - vl_aporte) / vl_aporte * 100 , 1)
if  vl_aporte_delta != 0:
    rendimento_nominal_delta = round((vl_patrimonio_delta - vl_resgate_delta - vl_aporte_delta) / vl_aporte_delta * 100 , 1)
else:
    rendimento_nominal_delta = 0

vl_aporte, vl_aporte_delta, vl_resgate, vl_resgate_delta, vl_patrimonio, vl_patrimonio_delta, rendimento_nominal, rendimento_nominal_delta

(86689.84, 5779.64, -33237.02, 0.0, 58991.38, 8434.96, 6.4, 45.9)

## Bolsa

In [28]:
import pandas as pd
import yfinance as yf

In [29]:
df_bolsa['data'] = pd.to_datetime(df_bolsa['data'], format='%Y-%m-%d')
df_bolsa.head()

Unnamed: 0,data,ticker,evento,qt,vl_total
0,2019-11-05,BPAN4,compra_ou_venda,600.0,5502.0
1,2019-11-06,MGLU3,compra_ou_venda,-100.0,-4499.0
2,2019-11-08,BPAN4,compra_ou_venda,-600.0,-5598.0
3,2019-11-08,CCRO3,compra_ou_venda,300.0,5046.0
4,2019-11-12,CYRE3,compra_ou_venda,100.0,2725.0


In [30]:
# Salvar lá encima!
def etl_bolsa_historic_price(list_ticker_b3: list, start_date: str, end_date: str) -> np.array:

    # Utilizando a api do yf
    list_ticker_yf = [i + '.SA' for i in list_ticker_b3]
    long_string = ' '.join(list_ticker_yf)
    yf_df = yf.download(long_string, start=start_date, end=end_date, group_by='column', actions=True, interval='1d')
    
    # Obter o preço histórico e os eventos de agrupamento/desdobramento de ações
    df_price = yf_df['Close'].reset_index().sort_values('Date', ascending=True).round(2)#.fillna(method='ffill')

    # Ajustes gerais na base
    df_price.columns = ['data'] + list(list_ticker_b3)    
    df_price['data'] = pd.to_datetime(df_price['data'])
    df_price = pd.melt(df_price, id_vars=['data'], value_vars=list(list_ticker_b3), var_name='ticker', value_name='preco')
    
    return df_price

In [31]:
# Passo 1: extrair dados atualizados de cada ticker.
df_bolsa_historico = etl_bolsa_historic_price(list_ticker_b3=df_bolsa['ticker'].sort_values().unique(),
                                              start_date=df_bolsa['data'].min(),
                                              end_date=df_bolsa['data'].max())
df_bolsa_historico.head()

[*********************100%***********************]  20 of 20 completed


Unnamed: 0,data,ticker,preco
0,2019-11-05,B3SA3,16.43
1,2019-11-06,B3SA3,16.3
2,2019-11-07,B3SA3,16.47
3,2019-11-08,B3SA3,16.23
4,2019-11-11,B3SA3,16.7


In [32]:
"""
# Experimentando como usar o stock split
start_date = df_bolsa.loc[df_bolsa['ticker'] == 'B3SA3', 'data'].dt.date.min()
end_date = df_bolsa.loc[df_bolsa['ticker'] == 'B3SA3', 'data'].dt.date.max()
long_string = 'B3SA3.SA ITSA4.SA'

#start_date
yf_df = yf.download(long_string, start=start_date, end=end_date, group_by='column', actions=True, interval='1wk')
yf_df_close = yf_df['Close'].reset_index().sort_values('Date', ascending=False).round(2).fillna(method='ffill')
yf_df_split = yf_df['Stock Splits'].reset_index().sort_values('Date', ascending=False).cumsum().round(2).fillna(method='ffill').replace(0, 1)

display(df_bolsa.loc[df_bolsa['ticker'] == 'B3SA3'])
#display(yf_df)
display(yf_df_close.sort_values('Date'))
display(yf_df_split.sort_index())

for i in ['B3SA3.SA']:
    yf_df_close[i] = yf_df_close[i] * (yf_df_split[i])

df_price = yf_df_close.sort_values('Date', ascending=True)
display(df_price)
"""

"\n# Experimentando como usar o stock split\nstart_date = df_bolsa.loc[df_bolsa['ticker'] == 'B3SA3', 'data'].dt.date.min()\nend_date = df_bolsa.loc[df_bolsa['ticker'] == 'B3SA3', 'data'].dt.date.max()\nlong_string = 'B3SA3.SA ITSA4.SA'\n\n#start_date\nyf_df = yf.download(long_string, start=start_date, end=end_date, group_by='column', actions=True, interval='1wk')\nyf_df_close = yf_df['Close'].reset_index().sort_values('Date', ascending=False).round(2).fillna(method='ffill')\nyf_df_split = yf_df['Stock Splits'].reset_index().sort_values('Date', ascending=False).cumsum().round(2).fillna(method='ffill').replace(0, 1)\n\ndisplay(df_bolsa.loc[df_bolsa['ticker'] == 'B3SA3'])\n#display(yf_df)\ndisplay(yf_df_close.sort_values('Date'))\ndisplay(yf_df_split.sort_index())\n\nfor i in ['B3SA3.SA']:\n    yf_df_close[i] = yf_df_close[i] * (yf_df_split[i])\n\ndf_price = yf_df_close.sort_values('Date', ascending=True)\ndisplay(df_price)\n"

In [57]:
# Passo 2: juntar a carteira com os preços históricos.
df_bolsa2 = pd.merge(df_bolsa_historico, df_bolsa, on=['data', 'ticker'], how='left').fillna(0).sort_values(['ticker', 'data'])
df_bolsa2.head()

Unnamed: 0,data,ticker,preco,evento,qt,vl_total
0,2019-11-05,B3SA3,16.43,0,0.0,0.0
1,2019-11-06,B3SA3,16.3,0,0.0,0.0
2,2019-11-07,B3SA3,16.47,0,0.0,0.0
3,2019-11-08,B3SA3,16.23,0,0.0,0.0
4,2019-11-11,B3SA3,16.7,0,0.0,0.0


In [58]:
# Passo 3: calcular a quantidade acumulada.
for i in df_bolsa2['ticker'].sort_values().unique():
    df_bolsa2.loc[df_bolsa2['ticker'] == i, 'qt_acum'] = df_bolsa2.loc[df_bolsa2['ticker'] == i, 'qt'].cumsum(skipna=True)
    
df_bolsa_acum = df_bolsa2.loc[(df_bolsa2['qt'] != 0) | (df_bolsa2['qt_acum'] != 0)]

display(df_bolsa_acum.head(2))

Unnamed: 0,data,ticker,preco,evento,qt,vl_total,qt_acum
182,2020-08-03,B3SA3,21.3,compra_ou_venda,100.0,6474.0,100.0
183,2020-08-04,B3SA3,21.07,0,0.0,0.0,100.0


In [148]:
# Passo 4: corrigir preço histórico por agrupamento/desdobramento de ações.
def custom_price_adjustment_for_split(df):
    df = df.sort_values(['ticker', 'data'], ascending=False)
    for i in df['ticker'].unique():
        array_event = df.loc[df['ticker'] == i, 'evento']

        # Caso 1: não tem spit.
        if len(array_event[array_event == 'split']) == 0:
            df.loc[df_bolsa_acum['ticker'] == i, 'preco_fix'] = df.loc[df['ticker'] == i, 'preco']

        # Caso 2: tem split(s).
        else:
            list_index = array_event[array_event == 'split'].index.tolist()
            for j in list_index:
                df.loc[j-1, 'multiplicador_split'] = df.loc[j, 'qt_acum'] / df.loc[j - 1, 'qt_acum']
        df.loc[df['ticker'] == i, 'multiplicador_split'] = df.loc[df['ticker'] == i, 'multiplicador_split'].fillna(0).cumsum()

    # Corrigindo os preços por splits.
    df['multiplicador_split'] = df['multiplicador_split'].replace(0, 1)
    df['preco_fix'] = df['preco'] * df['multiplicador_split']
    df['vl_atualizado_fix'] = df['preco_fix'] * df['qt_acum']
    df = df.sort_values(['ticker', 'data'], ascending=True)
    return df

#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
#    display(df_preco_fix)

In [149]:
df_bolsa_split = custom_price_adjustment_for_split(df_bolsa_acum)
df_bolsa_split.head()

Unnamed: 0,data,ticker,preco,evento,qt,vl_total,qt_acum,multiplicador_split,preco_fix,vl_atualizado_fix
182,2020-08-03,B3SA3,21.3,compra_ou_venda,100.0,6474.0,100.0,3.0,63.9,6390.0
183,2020-08-04,B3SA3,21.07,0,0.0,0.0,100.0,3.0,63.21,6321.0
184,2020-08-05,B3SA3,21.17,0,0.0,0.0,100.0,3.0,63.51,6351.0
185,2020-08-06,B3SA3,21.35,0,0.0,0.0,100.0,3.0,64.05,6405.0
186,2020-08-07,B3SA3,20.85,0,0.0,0.0,100.0,3.0,62.55,6255.0


In [150]:
# Passo 5: criar uma coluna identificando o último dia do mês de cada ativo (útil para os plots)
df_bolsa_split = lib.create_column_last_day(df_bolsa_split)
df_bolsa_split.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['year'] = pd.to_datetime(df_right['data']).dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['month'] = pd.to_datetime(df_right['data']).dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_right['day'] = pd.to_datetime(df_right['data']).dt.day


Unnamed: 0,data,ticker,preco,evento,qt,vl_total,qt_acum,multiplicador_split,preco_fix,vl_atualizado_fix,dummy_ultimo_dia,dt_competencia
0,2020-08-03,B3SA3,21.3,compra_ou_venda,100.0,6474.0,100.0,3.0,63.9,6390.0,0.0,2020-08-01
1,2020-08-04,B3SA3,21.07,0,0.0,0.0,100.0,3.0,63.21,6321.0,0.0,2020-08-01
2,2020-08-05,B3SA3,21.17,0,0.0,0.0,100.0,3.0,63.51,6351.0,0.0,2020-08-01
3,2020-08-06,B3SA3,21.35,0,0.0,0.0,100.0,3.0,64.05,6405.0,0.0,2020-08-01
4,2020-08-07,B3SA3,20.85,0,0.0,0.0,100.0,3.0,62.55,6255.0,0.0,2020-08-01


In [151]:
# Passo 6: criar tabela dinâmica customizada
df_plot = df_bolsa_split.loc[df_bolsa_split['dummy_ultimo_dia'] == 1]
df_plot, data_col = lib.custom_pivot_table(df_plot, col_value='vl_atualizado_fix')

df_plot.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['dt_competencia'] = pd.to_datetime(df['dt_competencia']).dt.date


Unnamed: 0,ticker,November/2019,December/2019,January/2020,February/2020,March/2020,April/2020,May/2020,June/2020,July/2020,...,February/2022,March/2022,April/2022,May/2022,June/2022,July/2022,August/2022,September/2022,October/2022,November/2022
0,B3SA3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4374.0,4713.0,3990.0,3831.0,3288.0,3327.0,3555.0,3915.0,4512.0,3765.0
1,BOVA11,0.0,0.0,0.0,0.0,2774.0,3088.4,3366.0,3664.8,3971.6,...,6425.1,6820.4,6100.6,6323.62,5605.0,5858.7,6243.97,6281.14,6616.85,6330.11
2,BPAN4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CAML3,0.0,0.0,2697.0,2538.0,2337.0,2448.0,2979.0,3366.0,3693.0,...,2745.0,2895.0,2574.0,2877.0,2952.0,2745.0,3003.0,2847.0,3063.0,2793.0
4,CCRO3,5157.0,0.0,0.0,3266.0,2346.0,2470.0,2940.0,2900.0,2992.0,...,2352.0,2740.0,2482.0,2668.0,2506.0,2598.0,2758.0,2506.0,2592.0,2350.0


-----------

Print melhorado!

```python
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(df)
```

In [163]:
# Teste
a = ['a', 'b', 'c']
for index, value in enumerate(a):
    print(index, value)

0 a
1 b
2 c


In [166]:
a = ['a', 'b', 'c']
b = [4, 5, 6]    
for value_a, value_b in zip(a, b):
    print(value_a, value_b)

a 4
b 5
c 6
