## Imports
---

In [1]:
import pandas as pd
import yfinance as yf
from urllib.request import Request, urlopen
from datetime import datetime, timedelta
import os

## Helper functions
---

In [2]:
start_date = (datetime.now() - timedelta(days=18 * 30)).strftime('%Y-%m-%d')

## Scraping
---

In [3]:
url = "https://investnews.com.br/financas/veja-a-lista-completa-dos-bdrs-disponiveis-para-pessoas-fisicas-na-b3/"
headers = {"User-Agent": "Mozilla/5.0"}
html = urlopen(Request(url, headers=headers)).read()
bdr_raw = pd.read_html(html)[0]

bdr_raw.head()

Unnamed: 0,0,1,2,3
0,EMPRESA,CÓDIGO,SETOR,PAÍS DE ORIGEM
1,3M,MMMC34,Indústria diversificada,EUA
2,AB INBEV,ABUD34,Bebidas,Bélgica
3,ABB LTD,A1BB34,Industrial,Switzerland
4,ABBOTT,ABTT34,Farmacêuticos,EUA


## Data processing
---

In [4]:
bdr_raw.columns = bdr_raw.iloc[0]
bdr = bdr_raw.drop(index=0).reset_index(drop=True)

## Transform
---

In [5]:
tickers = [f"{codigo}.SA" for codigo in bdr['CÓDIGO']]

In [6]:
prices = yf.download(tickers, start=start_date)['Close']

prices.head()

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  708 of 708 completed

72 Failed downloads:
['C1HI34.SA', 'N1LS34.SA', 'ISBC34.SA', 'K1CS34.SA', 'FLTC34.SA', 'P1IO34.SA', 'Z2EN34.SA', 'B1BL34.SA', 'INGG3.SA', 'A1MB34.SA', 'LBRN34.SA', 'E1XC34MONDE.SA', 'S2QS34.SA', 'I1NF34.SA', 'W1RK34.SA', 'A1MX34.SA', 'NUBR33.SA', 'D1RE34.SA', 'SLBC34.SA', 'B1GN34.SA', 'C1TX34.SA', 'M1RO34.SA', 'V1AR34.SA', 'RDSA34.SA', 'PTCH34.SA', 'BHPG34.SA', 'C1HL34.SA', 'F1RI34.SA', 'S2TO34.SA', 'A1BM34.SA', 'TWTR34.SA', 'F1RC34.SA', 'X1LN34.SA', 'A1YX34.SA', 'E1FX34.SA', 'T2PX34.SA', 'CAJI34.SA', 'FBOK34.SA', 'W1BK34.SA', 'C1SU34.SA', 'P1KI34.SA', 'L1IN34.SA', 'A1BB34.SA', 'ATVI34.SA', 'A2MC34.SA', 'A1NT34.SA', 'S1PL34.SA', 'E1NI34.SA', 'L1BT34.SA', 'D1IS34.SA', 'C1XO34.SA', 'P1BC34.SA', 'C1ER34.SA', 'U1NL34.SA', 'TIFF34.SA', 'RDSA35.SA', 'C1OU34.SA', 'CRHP34.SA', 'LSXM34.SA', 'DCVY35.SA', 'A1LX34.SA', 'L1BT35.SA', 'C1EO34.SA', 'LSXM35.SA', 'E2PD34.SA', 'A1UA34.SA', 'M1XI34.SA', 'S1GE34.SA', 'A2VL34.SA', 'L1

Ticker,A1AP34.SA,A1BB34.SA,A1BM34.SA,A1CR34.SA,A1DI34.SA,A1DM34.SA,A1EE34.SA,A1EG34.SA,A1EN34.SA,A1EP34.SA,...,XRXB34.SA,YUMR34.SA,Z1BH34.SA,Z1BR34.SA,Z1IO34.SA,Z1OM34.SA,Z1TO34.SA,Z1TS34.SA,Z2EN34.SA,Z2LL34.SA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-12-04,16.713531,,,40.032116,406.985931,357.412659,185.639633,26.343971,244.749222,186.937866,...,60.927998,307.691071,170.943161,39.240002,162.140198,13.35,25.921345,55.953598,,15.76
2023-12-05,15.989411,,,40.032116,444.809601,355.9729,185.639633,26.343971,245.715652,186.937866,...,60.927998,307.691071,170.943161,39.240002,162.140198,13.32,25.921345,54.644051,,15.76
2023-12-06,16.527609,,,40.032116,444.809601,347.694489,185.639633,26.842133,245.715652,188.521683,...,60.927998,307.691071,170.943161,39.240002,162.140198,14.08,25.921345,55.060722,,15.76
2023-12-07,16.801603,,,43.593822,444.809601,352.305481,185.639633,26.842133,245.715652,188.521683,...,60.927998,307.691071,170.943161,39.240002,162.140198,14.11,25.921345,55.179771,,15.76
2023-12-08,16.772245,,,41.506889,451.149872,353.00592,185.639633,26.842133,245.715652,188.905655,...,60.927998,307.691071,170.943161,39.240002,162.140198,14.23,25.921345,55.953598,,15.76


In [7]:
prices = prices.dropna(axis=1, how='all')

## Load
---

In [8]:
out_dir = os.path.join(os.pardir, "output")
os.makedirs(out_dir, exist_ok=True)

In [9]:
file_path = os.path.join(out_dir, "bdr_prices.csv")
prices.to_csv(file_path)

In [10]:
print(f"file saved successfully to: {file_path}")

file saved successfully to: ../output/bdr_prices.csv
