# Analisador De Correlação Entre Ações

In [304]:
import yfinance as yf
from datetime import datetime, timedelta
import os
import pandas as pd

In [305]:
WALLET = ["PETR4", "AGRO3"]
STOCKS_UNDER_EVALUATION = ["ITUB4", "PETZ3"]
INTERVAL_DAYS = 365

In [306]:
def verify_file_data(start_date, end_date, history):
    latest_date_str = history.iloc[-1].Date
    latest_date = datetime.strptime(latest_date_str, '%Y-%m-%d')

    difference = (end_date - latest_date).days 
    outdated_file = difference > 3

    if(outdated_file):
        start_date = latest_date + timedelta(days=1)

    return outdated_file, start_date

In [307]:
def parse_result(data):
    data_list = data.reset_index().to_dict(orient='records')    
    df = pd.DataFrame(data_list)
    
    new_column_names = {}
    for col in df.columns:
        if isinstance(col, tuple):
            new_column_names[col] = col[0] 
        else:
            new_column_names[col] = col

    df.rename(columns=new_column_names, inplace=True)
    df = df.round(2)
    
    return df

In [308]:
def get_history_from_yahoo(start_date, end_date, asset):
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')

    print('obtendo dados do "yahoo finances"')
    data = yf.download(f"{asset}.SA", start=start_date_str, end=end_date_str)
    return parse_result(data)

In [309]:
def build_file_name(asset):
    return f"csv_files/{asset.lower()}_historical_data.csv"

In [310]:
def get_history(asset):        
    end_date = datetime.today()
    start_date = end_date - timedelta(days=INTERVAL_DAYS)
    
    file = build_file_name(asset)
    
    if(os.path.exists(file)):
        print('retornando arquivo local: ', file)
        historical_data = pd.read_csv(file)
        outdated_file, new_start_date = verify_file_data(start_date, end_date, historical_data)
        
        if(outdated_file):
            new_historical_data = get_history_from_yahoo(new_start_date, end_date, asset)
            new_historical_data.to_csv(file, mode='a', header=False, index=False)
            historical_data = pd.concat([historical_data, new_historical_data], ignore_index=True)

            if 'Date' in historical_data.columns:
                historical_data['Date'] = pd.to_datetime(historical_data['Date']).dt.strftime('%Y-%m-%d')
    else:
        historical_data = get_history_from_yahoo(start_date, end_date, asset)
        historical_data.to_csv(file, index=False)

    return historical_data                           

In [312]:
get_history("PETZ3")
# get_history("PETR4")
# get_history("AGRO3")
# get_history("ITUB4")

2023-12-13 16:48:59.546426 2024-12-12 16:48:59.546426 csv_files/petz3_historical_data.csv
retornando arquivo local:  csv_files/petz3_historical_data.csv


Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-12-13,4.15,4.42,4.44,4.12,4.21,13530900
1,2023-12-14,3.95,4.20,4.73,4.19,4.48,21645100
2,2023-12-15,3.69,3.93,4.25,3.90,4.24,27631000
3,2023-12-18,3.63,3.86,4.01,3.81,3.99,19765500
4,2023-12-19,3.53,3.76,3.92,3.75,3.89,23051400
...,...,...,...,...,...,...,...
246,2024-12-05,4.20,4.20,4.27,4.15,4.21,6549900
247,2024-12-06,3.99,3.99,4.22,3.99,4.20,6327700
248,2024-12-09,3.81,3.81,4.10,3.75,3.98,10485900
249,2024-12-10,4.10,4.10,4.14,3.80,3.90,10448200
