# Webscraping B3 - Coleta da composição dos índices teóricos

<https://www.b3.com.br/pt_br/market-data-e-indices/indices/indices-amplos/indice-ibovespa-ibovespa-composicao-da-carteira.htm/>

In [4]:
import pandas as pd

In [5]:
class INDEX:
    IBOVESPA = 'ibov'
    IBRX_100 = 'ibrx'
    SMALL_CAPS = 'smll'
    DIVIDENDOS = 'idiv'
    FINANCEIRO = 'ifnc'
    INDUSTRIAL = 'indx'
    MATERIAIS_BASICOS = 'imat'
    UTILIDADE_PUBLICA = 'util'
    CONSUMO = 'icon'
    ENERGIA_ELETRIA = 'iee'
    IMOBILIARIO = 'imob'
    FIIS = 'ifix'
    MIDLARGE_CAPS = 'mlcx'
    COMMODITIES = 'icb'
    BDRS_GLOBAL = 'bdrx'
    SP500 = 'sp500'

In [16]:
def get_index_constituents(index: str = INDEX.IBOVESPA, to_list=False, yfinance_ticker=True):
    """
    Função que fara o wabscrap dos dados do índice selecionado

    :params
        index: O índice que se quer buscar a composição
        to_list: Marcar 'True' caso queira que a função retorna uma lista com os  tickers que
                 compõem o índice selecionado
        yfinance_ticker: Caso seja utilizado o YFinance para baixar os dados de cotação,
                         marcar 'True'

    return: Um dataframe com as informações dos tickers do índice selecionado ou uma lista
            somente com os tickers, a depender do parâmtro 'to_list'.
    """
    if str.lower(index) == INDEX.SP500:        
        overall = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]
        ticker_list = overall.Symbol
        ticker_list = ticker_list.tolist()

        if yfinance_ticker:
            ticker_list = list(map(lambda x: x.replace('.', '-'), ticker_list))

        if to_list:
            return ticker_list
        
        return overall
    else:
        url = f'http://bvmf.bmfbovespa.com.br/indices/ResumoCarteiraTeorica.aspx?Indice={str.upper(index)}&idioma=pt-br)'
        overall = pd.read_html(url, decimal=',', thousands='.', index_col='Código')[0][:-1]
        ticker_list = overall.index.tolist()
        list(map(lambda x: x + '.SA', ticker_list))

        if yfinance_ticker:
            ticker_list = list(map(lambda x: x + '.SA', ticker_list))

        if to_list:
            return ticker_list

        # necessário retornar
        # [0], pois a tabela é o primeiro elemento da lista retornada
        #[:-1], para retornar todos os tickers da tabela
        return overall

# Ibovespa

In [17]:
ibov = get_index_constituents(INDEX.IBOVESPA)
ibov

Unnamed: 0_level_0,Ação,Tipo,Qtde. Teórica,Part. (%)
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABEV3,AMBEV S/A,ON,4355174839,2.953
ASAI3,ASSAI,ON NM,157635935,0.554
AZUL4,AZUL,PN N2,327283207,0.645
B3SA3,B3,ON NM,1930877944,5.167
BBAS3,BRASIL,ON ERJ NM,1283197221,1.760
...,...,...,...,...
VALE3,VALE,ON NM,2837320141,13.770
VIVT3,TELEF BRASIL,ON EJ,444219852,0.947
VVAR3,VIAVAREJO,ON NM,1595083594,0.951
WEGE3,WEG,ON ED NM,741148001,2.847


# IBRX 100

In [91]:
ibrx = get_index_constituents(INDEX.IBRX_100)
ibrx

Unnamed: 0_level_0,Ação,Tipo,Qtde. Teórica,Part. (%)
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABEV3,AMBEV S/A,ON,4355174839,2.544
ALPA4,ALPARGATAS,PN N1,168770200,0.253
ALSO3,ALIANSCSONAE,ON NM,127376805,0.123
AMAR3,LOJAS MARISA,ON NM,111476495,0.024
ASAI3,ASSAI,ON NM,157635935,0.477
...,...,...,...,...
VALE3,VALE,ON NM,5128642481,21.441
VIVT3,TELEF BRASIL,ON EJ,444219852,0.816
VVAR3,VIAVAREJO,ON NM,1595083594,0.819
WEGE3,WEG,ON ED NM,741148001,2.452


# Small caps

In [92]:
smll = get_index_constituents(INDEX.SMALL_CAPS)
smll

Unnamed: 0_level_0,Ação,Tipo,Qtde. Teórica,Part. (%)
Código,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AALR3,ALLIAR,ON NM,46716473,0.125
ABCB4,ABC BRASIL,PN N2,73745621,0.340
ALSO3,ALIANSCSONAE,ON NM,127376805,0.862
ALUP11,ALUPAR,UNT N2,143807403,0.989
AMAR3,LOJAS MARISA,ON NM,111476495,0.168
...,...,...,...,...
VIVA3,VIVARA S.A.,ON NM,93368726,0.661
VLID3,VALID,ON NM,82549842,0.176
VULC3,VULCABRAS,ON NM,74685291,0.159
WIZS3,WIZ S.A.,ON NM,77453769,0.149


# Concatenando índices

In [93]:
pd.concat([ibov, ibrx], keys=['IBOV', 'SMLL'], axis=1)

Unnamed: 0_level_0,IBOV,IBOV,IBOV,IBOV,SMLL,SMLL,SMLL,SMLL
Unnamed: 0_level_1,Ação,Tipo,Qtde. Teórica,Part. (%),Ação,Tipo,Qtde. Teórica,Part. (%)
Código,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
ABEV3,AMBEV S/A,ON,4355174839,2.953,AMBEV S/A,ON,4355174839,2.544
ASAI3,ASSAI,ON NM,157635935,0.554,ASSAI,ON NM,157635935,0.477
AZUL4,AZUL,PN N2,327283207,0.645,AZUL,PN N2,327283207,0.556
B3SA3,B3,ON NM,1930877944,5.167,B3,ON NM,2043531244,4.711
BBAS3,BRASIL,ON ERJ NM,1283197221,1.760,BRASIL,ON ERJ NM,1283197221,1.516
...,...,...,...,...,...,...,...,...
NEOE3,,,,,NEOENERGIA,ON NM,226115915,0.150
PSSA3,,,,,PORTO SEGURO,ON NM,93637072,0.171
RAPT4,,,,,RANDON PART,PN N1,177421406,0.095
SAPR11,,,,,SANEPAR,UNT N2,200991234,0.166
