In [1]:
# Importando bibliotecas

import requests # Biblioteca de conexão para links da web
import pandas as pd # Biblioteca de manipulação de dataframes
import datetime as dt # Biblioteca de manipulação com datas
import yfinance as yf # Biblioteca de extração de dados de tickers
from bs4 import BeautifulSoup # Biblioteca de webscrapping

In [2]:
# Webscrapping de tickers

url = 'https://www.infomoney.com.br/cotacoes/empresas-b3/' # Define o endereço de onde a lista de tickers será extraída

response = requests.get(url).text # Estabelece conexão com o endereço

soup = BeautifulSoup(response, 'html.parser') # Parsing do conteúdo do endereço

tags = soup.find_all('a')

def tickers_fun():

    for tag in tags:

        if tag.has_attr('href') and len(tag.text) >= 5 and len(tag.text) <= 7 and tag.get_text(strip = True) != '':

            yield tag.get_text(strip = True) + '.SA'

tickers_gen = tickers_fun()

tickers = list(tickers_gen)

tickers

['AMOB3.SA',
 'BHIA3.SA',
 'RZAT11.SA',
 'GRWA11.SA',
 'CRAA11.SA',
 'ZAMP3.SA',
 'HGAG11.SA',
 'BBGO11.SA',
 'AGRX11.SA',
 'PLCA11.SA',
 'RURA11.SA',
 'SNAG11.SA',
 'GCRA11.SA',
 'VCRA11.SA',
 'KNCA11.SA',
 'NCRA11.SA',
 'CPTR11.SA',
 'FGAA11.SA',
 'EGAF11.SA',
 'VGIA11.SA',
 'LSAG11.SA',
 'N2ET34.SA',
 'M1TA34.SA',
 'FOOD11.SA',
 'AERI3F.SA',
 'AERI3.SA',
 'ICBR3.SA',
 'DOTZ3F.SA',
 'DOTZ3.SA',
 'GOLL3.SA',
 'VIIA3F.SA',
 'ARML3.SA',
 'MLAS3.SA',
 'CBAV3.SA',
 'TTEN3.SA',
 'BRBI11.SA',
 'NINJ3.SA',
 'ATEA3.SA',
 'MODL4.SA',
 'MODL11.SA',
 'MODL3.SA',
 'VITT3.SA',
 'KRSA3.SA',
 'CXSE3.SA',
 'RIOS3.SA',
 'HCAR3.SA',
 'GGPS3.SA',
 'MATD3.SA',
 'ALLD3.SA',
 'BLAU3.SA',
 'ATMP3.SA',
 'ASAI3.SA',
 'JSLG3.SA',
 'CMIN3.SA',
 'ELMD3.SA',
 'ORVR3.SA',
 'OPCT3.SA',
 'WEST3.SA',
 'CSED3.SA',
 'BMOB3.SA',
 'JALL3.SA',
 'MBLY3.SA',
 'ESPA3.SA',
 'VAMO3.SA',
 'INTB3.SA',
 'CJCT11.SA',
 'BMLC11.SA',
 'RECR11.SA',
 'URPR11.SA',
 'DEVA11.SA',
 'MFAI11.SA',
 'NGRD3.SA',
 'AVLL3.SA',
 'RRRP3.SA',
 'ENJU

In [None]:
# Criação de dataframe com dados de fechamento de ações

def ticker_data_fun():

    for ticker in tickers:

        ticker_data = yf.Ticker(ticker).history(period = 'max').reset_index()

        ticker_data['Date'] = ticker_data['Date'].apply(lambda x: x.date())

        ticker_data = ticker_data.rename(columns = {'Close': ticker})

        yield ticker_data[['Date', ticker]]

ticker_data_gen = ticker_data_fun()

tickers_df = next(ticker_data_gen)

for ticker_data in ticker_data_gen:

    tickers_df = pd.merge(tickers_df, ticker_data, on = 'Date', how = 'outer')

tickers_df

AGRX11.SA: Period 'max' is invalid, must be one of ['1d', '5d']
NCRA11.SA: Period 'max' is invalid, must be one of ['1d', '5d']
AERI3F.SA: Period 'max' is invalid, must be one of ['1d', '5d']
$ICBR3.SA: possibly delisted; no timezone found
DOTZ3F.SA: Period 'max' is invalid, must be one of ['1d', '5d']
$GOLL3.SA: possibly delisted; no price data found  (1d 1926-01-30 -> 2025-01-05) (Yahoo error = "No data found, symbol may be delisted")
$VIIA3F.SA: possibly delisted; no timezone found
$ATEA3.SA: possibly delisted; no timezone found
$MODL4.SA: possibly delisted; no timezone found
$MODL11.SA: possibly delisted; no timezone found
$MODL3.SA: possibly delisted; no timezone found
$RIOS3.SA: possibly delisted; no timezone found
$HCAR3.SA: possibly delisted; no timezone found
$RRRP3.SA: possibly delisted; no timezone found
$CONX3.SA: possibly delisted; no timezone found
$PASS3.SA: possibly delisted; no timezone found
$BOAS3.SA: possibly delisted; no timezone found
SIMH3F.SA: Period 'max' is in

In [64]:
# Limpeza de dados

tickers_df2 = tickers_df[tickers_df['Date'] > dt.datetime.strptime('2020-01-01', r'%Y-%m-%d').date()].dropna(axis = 1).dropna().set_index('Date')

tickers_df2

Unnamed: 0_level_0,BHIA3.SA,ZAMP3.SA,M1TA34.SA,ATMP3.SA,BMLC11.SA,AZZA3.SA,CEDO4.SA,NFLX34.SA,NIKE34.SA,MCDC34.SA,...,TAEE4.SA,TAEE3.SA,TAEE11.SA,SBSP3.SA,RNEW11.SA,GEPA4.SA,GEPA3.SA,CMIG4.SA,CMIG3.SA,AFLT3.SA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,256.571564,17.289736,30.005892,8.92,67.328812,57.763344,8.310000,26.608400,38.175407,36.082245,...,6.551213,6.471365,19.695971,54.307034,12.40,28.527172,27.581211,3.734344,4.701401,9.163268
2020-01-03,251.103287,16.990776,30.179716,8.94,68.844032,56.940868,8.700000,26.415600,38.857666,36.344936,...,6.475840,6.586583,19.670427,53.374260,12.43,29.132526,27.581211,3.693989,4.740730,9.155110
2020-01-06,251.103287,17.170153,30.818016,9.10,68.514641,56.588375,8.420000,27.219999,38.998463,37.026852,...,6.500964,6.618588,19.791769,52.558086,12.31,29.854296,27.581211,3.745106,4.719553,9.146949
2020-01-07,254.821716,17.279770,31.074478,8.98,69.186607,56.850487,8.510000,26.944000,39.428421,37.014259,...,6.400465,6.554579,19.555471,51.661194,12.49,29.021940,27.581211,3.882319,4.946455,9.106152
2020-01-08,253.728058,17.170153,31.273947,9.19,69.239304,56.037041,8.510000,27.640200,39.262096,37.485657,...,6.318810,6.554579,19.357489,50.898830,12.58,29.865942,27.598454,3.989937,5.073519,9.089832
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-26,2.770000,2.290000,133.350006,1.11,97.989998,28.830000,23.290001,114.709999,47.209999,91.699997,...,11.090000,11.000000,32.840000,88.559998,2.92,29.510000,27.000000,11.050000,14.650000,6.840000
2024-12-27,2.830000,2.270000,132.169998,1.11,97.989998,29.139999,23.290001,113.139999,47.279999,90.879997,...,11.060000,11.080000,32.869999,88.290001,2.98,30.000000,27.000000,11.040000,14.790000,7.300000
2024-12-30,2.890000,2.260000,131.500000,1.22,97.989998,29.580000,23.290001,111.000000,46.340000,89.839996,...,11.070000,10.910000,32.860001,88.500000,2.95,29.750000,29.000000,11.110000,14.630000,7.150000
2025-01-02,2.840000,2.100000,131.479996,1.22,97.889999,29.139999,23.290001,108.940002,46.200001,90.070000,...,10.980000,10.940000,32.919998,88.470001,2.82,29.750000,31.000000,11.080000,14.640000,7.490000


In [74]:
# Gerando dateframe de retornos

rate_df = tickers_df2 / tickers_df2.shift(1)

rate_df = rate_df.iloc[1:, :]

rate_df

Unnamed: 0_level_0,BHIA3.SA,ZAMP3.SA,M1TA34.SA,ATMP3.SA,BMLC11.SA,AZZA3.SA,CEDO4.SA,NFLX34.SA,NIKE34.SA,MCDC34.SA,...,TAEE4.SA,TAEE3.SA,TAEE11.SA,SBSP3.SA,RNEW11.SA,GEPA4.SA,GEPA3.SA,CMIG4.SA,CMIG3.SA,AFLT3.SA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-03,0.978687,0.982709,1.005793,1.002242,1.022505,0.985761,1.046931,0.992754,1.017872,1.007280,...,0.988495,1.017804,0.998703,0.982824,1.002419,1.021220,1.000000,0.989193,1.008365,0.999110
2020-01-06,1.000000,1.010557,1.021150,1.017897,0.995215,0.993809,0.967816,1.030452,1.003623,1.018762,...,1.003880,1.004859,1.006169,0.984708,0.990346,1.024775,1.000000,1.013838,0.995533,0.999109
2020-01-07,1.014808,1.006384,1.008322,0.986813,1.009808,1.004632,1.010689,0.989860,1.011025,0.999660,...,0.984541,0.990329,0.988061,0.982935,1.014622,0.972119,1.000000,1.036638,1.048077,0.995540
2020-01-08,0.995708,0.993656,1.006419,1.023385,1.000762,0.985691,1.000000,1.025839,0.995782,1.012736,...,0.987242,1.000000,0.989876,0.985243,1.007206,1.029082,1.000625,1.027720,1.025688,0.998208
2020-01-09,1.005172,0.994776,1.010160,0.973885,0.984777,0.988710,1.022327,1.000166,1.010855,1.019871,...,1.000994,0.987304,0.987463,1.015330,0.981717,0.998830,1.000000,0.991908,0.981515,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-26,0.971930,0.995652,1.008699,1.009091,1.031582,0.982618,1.100662,1.021460,0.985801,1.010802,...,1.009099,1.011029,1.007671,1.001810,0.973333,0.999661,1.000000,1.013372,1.012146,0.954881
2024-12-27,1.021661,0.991266,0.991151,1.000000,1.000000,1.010753,1.000000,0.986313,1.001483,0.991058,...,0.997295,1.007273,1.000913,0.996951,1.020548,1.016605,1.000000,0.999095,1.009556,1.067251
2024-12-30,1.021201,0.995595,0.994931,1.099099,1.000000,1.015100,1.000000,0.981085,0.980118,0.988556,...,1.000904,0.984657,0.999696,1.002379,0.989933,0.991667,1.074074,1.006341,0.989182,0.979452
2025-01-02,0.982699,0.929204,0.999848,1.000000,0.998980,0.985125,1.000000,0.981441,0.996979,1.002560,...,0.991870,1.002750,1.001826,0.999661,0.955932,1.000000,1.068966,0.997300,1.000684,1.047552
