In [1]:
import pandas as pd
import yfinance as yf

In [2]:
###Building the ticker for yfinance
df = pd.read_csv("../data/processed/stocks-basic-info.csv")

def get_main_ticker(tickers):
    tickers = tickers.split(";")
    
    for ticker in tickers:
        if ticker[4] == "4": return ticker
        
    for ticker in tickers:
        if ticker[4] == "3": return ticker
        
    return tickers[0]

def get_yf_ticker(tickers):
    ticker = get_main_ticker(tickers)
    return ticker + ".SA"

In [3]:
tickers = df["TICKERS"].iloc[1]

bbas3 = yf.Ticker(get_yf_ticker(tickers))

In [4]:
# get all stock info
bbas3.info

{'address1': 'EdifIcio Banco do Brasil',
 'address2': 'Quadra 5, Lote B Autarquias Norte Federal District',
 'city': 'Brasília',
 'state': 'DF',
 'country': 'Brazil',
 'phone': '55 80 0729 5285',
 'website': 'https://www.bb.com.br',
 'industry': 'Banks - Regional',
 'industryKey': 'banks-regional',
 'industryDisp': 'Banks - Regional',
 'sector': 'Financial Services',
 'sectorKey': 'financial-services',
 'sectorDisp': 'Financial Services',
 'longBusinessSummary': 'Banco do Brasil S.A., together with its subsidiaries, provides banking products and services for individuals, companies, and public sectors in Brazil and internationally. The company operates through Banking; Investments; Resource Management; Insurance, Pension, and Capitalization; Payments Method; and Other segments. The Banking segment offers various products and services, including deposits, credit operations, and other services to retail, wholesale, and government markets, as well as to micro-entrepreneurs and informal sec

In [5]:
# get historical market data
hist = bbas3.history(period="1y")

In [6]:
bbas3.history_metadata

{'currency': 'BRL',
 'symbol': 'BBAS3.SA',
 'exchangeName': 'SAO',
 'fullExchangeName': 'São Paulo',
 'instrumentType': 'EQUITY',
 'firstTradeDate': 946900800,
 'regularMarketTime': 1729022820,
 'hasPrePostMarketData': False,
 'gmtoffset': -10800,
 'timezone': 'BRT',
 'exchangeTimezoneName': 'America/Sao_Paulo',
 'regularMarketPrice': 26.47,
 'fiftyTwoWeekHigh': 0.0,
 'fiftyTwoWeekLow': 0.0,
 'regularMarketDayHigh': 0.0,
 'regularMarketDayLow': 0.0,
 'regularMarketVolume': 0,
 'longName': 'Banco do Brasil S.A.',
 'shortName': 'BRASIL      ON      NM',
 'chartPreviousClose': 24.41,
 'priceHint': 2,
 'currentTradingPeriod': {'pre': {'timezone': 'BRT',
   'start': 1729082700,
   'end': 1729083600,
   'gmtoffset': -10800},
  'regular': {'timezone': 'BRT',
   'start': 1729083600,
   'end': 1729108800,
   'gmtoffset': -10800},
  'post': {'timezone': 'BRT',
   'start': 1729108800,
   'end': 1729112400,
   'gmtoffset': -10800}},
 'dataGranularity': '1d',
 'range': '1y',
 'validRanges': ['1d',


In [7]:
bbas3.actions

Unnamed: 0_level_0,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-12-12 00:00:00-03:00,0.171153,0.0
2024-03-12 00:00:00-03:00,0.205018,0.0
2024-04-16 00:00:00-03:00,0.0,2.0
2024-06-12 00:00:00-03:00,0.457947,0.0
2024-06-14 00:00:00-03:00,0.20424,0.0
2024-08-22 00:00:00-03:00,0.474655,0.0
2024-09-12 00:00:00-03:00,0.186602,0.0


In [8]:
bbas3.dividends

Date
2023-12-12 00:00:00-03:00    0.171153
2024-03-12 00:00:00-03:00    0.205018
2024-06-12 00:00:00-03:00    0.457947
2024-06-14 00:00:00-03:00    0.204240
2024-08-22 00:00:00-03:00    0.474655
2024-09-12 00:00:00-03:00    0.186602
Name: Dividends, dtype: float64

In [24]:
bbas3.splits

Date
2024-04-16 00:00:00-03:00    2.0
Name: Stock Splits, dtype: float64

In [143]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [144]:
tk = get_main_ticker(tickers)
url = "https://www.dadosdemercado.com.br/acoes/" + tk + "/dividendos"
page = requests.get(url)

soup = BeautifulSoup(page.content, "html.parser")

In [145]:
cells = soup \
    .find(class_="table-container") \
    .find_all("td")
    
df_dividends = pd.DataFrame()
    
for row_idx in range(int(len(cells) / 5)):
    row_start = row_idx * 5
    row_end = row_start + 5
    row = cells[row_start : row_end]
    
    dividend = pd.DataFrame(
        {
        "DATE": row[2].text, 
        "VALUE": float(row[1].text.replace("*", "").replace(",", ".")), 
        "TYPE": row[0].text
        },
        index=[df_dividends.shape[0]]
        )
    
    df_dividends = pd.concat([df_dividends, dividend])
    
df_dividends["DATE"] = pd.to_datetime(df_dividends["DATE"], format="%d/%m/%Y")
df_dividends

Unnamed: 0,DATE,VALUE,TYPE
0,2024-09-11,0.186602,JCP
1,2024-08-21,0.314481,JCP
2,2024-08-21,0.151861,Dividendo
3,2024-08-21,0.005606,Dividendo
4,2024-08-21,0.002707,Dividendo
...,...,...,...
156,2007-12-25,0.071121,JCP
157,2007-11-13,0.042861,Dividendo
158,2007-09-21,0.067322,JCP
159,2007-08-14,0.019767,Dividendo


In [146]:
hist_clean = hist["Close"]
hist_clean.index = pd.to_datetime(hist_clean.index.date)
hist_clean.index.name = "DATE"
hist_clean = pd.merge(hist_clean, df_dividends.groupby("DATE")["VALUE"].sum(), how="left", on="DATE")
hist_clean = hist_clean.fillna(0)
hist_clean["Dividends_1y"] = hist_clean["VALUE"].rolling('365D').sum()
hist_clean = hist_clean.reset_index()
hist_clean["Dividend_Yield"] = hist_clean["Dividends_1y"] / hist_clean["Close"]
hist_clean

Unnamed: 0,DATE,Close,VALUE,Dividends_1y,Dividend_Yield
0,2022-10-04,17.311417,0.0,0.000000,0.000000
1,2022-10-05,17.589422,0.0,0.000000,0.000000
2,2022-10-06,17.902729,0.0,0.000000,0.000000
3,2022-10-07,17.942448,0.0,0.000000,0.000000
4,2022-10-10,17.682095,0.0,0.000000,0.000000
...,...,...,...,...,...
497,2024-09-30,27.180000,0.0,2.518359,0.092655
498,2024-10-01,27.080000,0.0,2.518359,0.092997
499,2024-10-02,27.150000,0.0,2.518359,0.092757
500,2024-10-03,26.900000,0.0,2.518359,0.093619
