In [238]:
import pandas as pd
import numpy as np
import yfinance as yf
import investpy as inv

In [239]:
br = inv.get_stocks(country="brazil")
br

Unnamed: 0,country,name,full_name,isin,currency,symbol
0,brazil,ABC BRASIL PN,Banco ABC Brasil SA,BRABCBACNPR4,BRL,ABCB4
1,brazil,BRASILAGRO ON,BrasilAgro - Co ON NM,BRAGROACNOR7,BRL,AGRO3
2,brazil,RUMO ON NM,RUMO Logistica Operadora Multimodal SA,BRRAILACNOR9,BRL,RAIL3
3,brazil,ALPARGATAS ON,Alpargatas SA,BRALPAACNOR0,BRL,ALPA3
4,brazil,ALPARGATAS PN,Alpargatas SA,BRALPAACNPR7,BRL,ALPA4
...,...,...,...,...,...,...
744,brazil,Integral Brei Reit,Fdo Inv Imob Fof Integral Brei Reit,BRIBFFCTF007,BRL,IBFF11
745,brazil,Vbi Cri,Fi Imobiliario Vbi Cri,BRCVBICTF001,BRL,CVBI11
746,brazil,Hedge Realty,Hedge Realty Devl Fdo Inv Imob Etf,BRHRDFCTF000,BRL,HRDF11
747,brazil,Rb Cap,Rb Cap Desenvolvimento Res Iii Fii,BRRSPDCTF006,BRL,RSPD11


In [240]:
# Reset the index to make Date a column
def transform_yf_data(data_stock):
    data_stock.reset_index(inplace=True)  
    data_stock['Date'] = data_stock['Date'].dt.strftime('%Y-%m-%d')
    data_stock.columns = data_stock.columns.droplevel(1)
    data_stock.columns.name = None 
    data_stock.set_index("Date", inplace = True)
    return data_stock


In [241]:
import glob

files = glob.glob("data/new_stocks/*.csv")
tickerss =[]
for file in files:
    # Extract the part after the last '/' and remove the '.csv' suffix
    ticker = file.split("\\")[-1][:-4]
    tickerss.append(ticker)
tickerss

['ABEV3.SA',
 'AZUL4.SA',
 'B3SA3.SA',
 'BBAS3.SA',
 'BBDC4.SA',
 'CMIG4.SA',
 'COGN3.SA',
 'CPLE6.SA',
 'CVCB3.SA',
 'FNOR11.SA',
 'GGBR4.SA',
 'HAPV3.SA',
 'ITSA4.SA',
 'ITUB4.SA',
 'LREN3.SA',
 'MGLU3.SA',
 'MRVE3.SA',
 'PDGR3.SA',
 'PETR3.SA',
 'PETR4.SA',
 'RAIL3.SA',
 'USIM5.SA',
 'VALE3.SA']

In [242]:

def fetch_stocks_above_volume(data, volume_threshold=10_000_000):
    # Get all the stocks
    # tickers = data['symbol'] + ".SA"
    tickers = data
    qualified_stocks = {}

    for ticker in tickers:
        try:
            data_stock = yf.download(ticker, start="2010-01-01", end="2024-11-15")
            data_stock = transform_yf_data(data_stock)
            # Filter for 2024 data
            data_stock_2024 = data_stock[data_stock.index > "2023-12-31"].copy()
            if data_stock_2024.empty:
                print(f"No 2024 data available for {ticker}")
                continue

            # Calculate average volume
            avg_volume = int(data_stock_2024['Volume'].mean())
            if avg_volume >= volume_threshold:
                print(f"{ticker} qualifies with avg volume: {avg_volume}")
                qualified_stocks[ticker] = data_stock
        except Exception as e:
            print(f"Failed to fetch {ticker}: {e}")

    return qualified_stocks

In [243]:

def calculate_volatility(ticker, data):
    # Filter for 2024 data
    data_stock = data[data.index > "2023-12-31"].copy()
    try:
        # Calculate log returns
        data_stock['Log Return'] = np.log(data_stock['Adj Close'] / data_stock['Adj Close'].shift(1))

        # Calculate annualized volatility
        volatility = np.std(data_stock['Log Return'].dropna()) * np.sqrt(252)
        return volatility
    except Exception as e:
        print(f"Failed to calculate volatility for {ticker}: {e}")
        return None


In [244]:
# Main workflow
volume_threshold = 10_000_000
stocks = fetch_stocks_above_volume(tickerss, volume_threshold)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


ABEV3.SA qualifies with avg volume: 27689635
AZUL4.SA qualifies with avg volume: 19454729


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


B3SA3.SA qualifies with avg volume: 42203145
BBAS3.SA qualifies with avg volume: 16670633


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

BBDC4.SA qualifies with avg volume: 38019779
CMIG4.SA qualifies with avg volume: 13622689
COGN3.SA qualifies with avg volume: 34866591



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


CPLE6.SA qualifies with avg volume: 14299732
CVCB3.SA qualifies with avg volume: 17802943
FNOR11.SA qualifies with avg volume: 10109470


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

GGBR4.SA qualifies with avg volume: 12141407
HAPV3.SA qualifies with avg volume: 64878583
ITSA4.SA qualifies with avg volume: 21161116



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


ITUB4.SA qualifies with avg volume: 23148391
LREN3.SA qualifies with avg volume: 16791762


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

MGLU3.SA qualifies with avg volume: 18905270
MRVE3.SA qualifies with avg volume: 15100688
PDGR3.SA qualifies with avg volume: 16484622



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


PETR3.SA qualifies with avg volume: 10877329
PETR4.SA qualifies with avg volume: 37207772
RAIL3.SA qualifies with avg volume: 11783735


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

USIM5.SA qualifies with avg volume: 13736536
VALE3.SA qualifies with avg volume: 23347270





In [245]:
import os
import json

results = []

for ticker, stock in stocks.items():
    vol = calculate_volatility(ticker, stock)
    if vol is not None:
        results.append({"Stock": ticker, "Volatility": vol})
        file_path = os.path.join("data/new_stocks/", f"{ticker}.csv")
        stock.to_csv(file_path)

file_path = "data/new_stocks/volatility.json"
with open(file_path, "w") as f:
    json.dump(results, f)
# Convert to DataFrame for better representation
df_results = pd.DataFrame(results)
print(df_results)

        Stock  Volatility
0    ABEV3.SA    0.178437
1    AZUL4.SA    0.753509
2    B3SA3.SA    0.270644
3    BBAS3.SA    0.160384
4    BBDC4.SA    0.278162
5    CMIG4.SA    0.250826
6    COGN3.SA    0.549224
7    CPLE6.SA    0.187297
8    CVCB3.SA    0.589290
9   FNOR11.SA    0.629313
10   GGBR4.SA    0.279886
11   HAPV3.SA    0.387972
12   ITSA4.SA    0.168577
13   ITUB4.SA    0.173786
14   LREN3.SA    0.405542
15   MGLU3.SA    0.583931
16   MRVE3.SA    0.461754
17   PDGR3.SA    2.373320
18   PETR3.SA    0.281009
19   PETR4.SA    0.254988
20   RAIL3.SA    0.244958
21   USIM5.SA    0.480460
22   VALE3.SA    0.226241
