# Analisador De Correlação Entre Ações

In [78]:
import yfinance as yf
from datetime import datetime, timedelta
import os
import pandas as pd

In [79]:
WALLET = ["PETR4", "AGRO3"]
STOCKS_UNDER_EVALUATION = ["ITUB4", "PETZ3"]
INTERVAL_DAYS = 365

In [80]:
def verify_file_data(start_date, end_date, history):
    latest_date_str = history.iloc[-1].Date
    latest_date = datetime.strptime(latest_date_str, '%Y-%m-%d')

    difference = (end_date - latest_date).days 
    outdated_file = difference > 3

    if(outdated_file):
        start_date = latest_date + timedelta(days=1)

    return outdated_file, start_date

In [81]:
def parse_result(data):
    data_list = data.reset_index().to_dict(orient='records')    
    df = pd.DataFrame(data_list)
    
    new_column_names = {}
    for col in df.columns:
        if isinstance(col, tuple):
            new_column_names[col] = col[0] 
        else:
            new_column_names[col] = col

    df.rename(columns=new_column_names, inplace=True)
    df = df.round(2)
    
    return df

In [82]:
def get_history_from_yahoo(start_date, end_date, asset):
    start_date_str = start_date.strftime('%Y-%m-%d')
    end_date_str = end_date.strftime('%Y-%m-%d')

    print('obtendo dados do "yahoo finances"')
    data = yf.download(f"{asset}.SA", start=start_date_str, end=end_date_str)
    return parse_result(data)

In [83]:
def build_file_name(asset):
    return f"csv_files/{asset.lower()}_historical_data.csv"

In [84]:
def get_history(asset):        
    end_date = datetime.today()
    start_date = end_date - timedelta(days=INTERVAL_DAYS)
    
    file = build_file_name(asset)
    
    if(os.path.exists(file)):
        print('retornando arquivo local: ', file)
        historical_data = pd.read_csv(file)
        outdated_file, new_start_date = verify_file_data(start_date, end_date, historical_data)
        
        if(outdated_file):
            new_historical_data = get_history_from_yahoo(new_start_date, end_date, asset)
            new_historical_data.to_csv(file, mode='a', header=False, index=False)
            historical_data = pd.concat([historical_data, new_historical_data], ignore_index=True)

    else:
        historical_data = get_history_from_yahoo(start_date, end_date, asset)
        historical_data.to_csv(file, index=False)

    if 'Date' in historical_data.columns:
        historical_data['Date'] = pd.to_datetime(historical_data['Date']).dt.strftime('%Y-%m-%d')
        
    return historical_data                           

In [85]:
def classify_correlation(correlation):
    if correlation <= -0.7:
        return "Forte correlação negativa"
    elif -0.7 < correlation <= -0.3:
        return "Fraca correlação negativa"
    elif -0.3 < correlation < 0.3:
        return "Não correlacionado"
    elif 0.3 <= correlation < 0.7:
        return "Fraca correlação positiva"
    else:
        return "Forte correlação positiva"

In [86]:
def calculate_correlation(asset1, asset2):
    history1 = get_history(asset1)
    history2 = get_history(asset2)

    close1 = history1[['Date', 'Close']].rename(columns={'Close': f'Close_{asset1}'})
    close2 = history2[['Date', 'Close']].rename(columns={'Close': f'Close_{asset2}'})

    combined_data = pd.merge(close1, close2, on='Date')
    correlation = combined_data[f'Close_{asset1}'].corr(combined_data[f'Close_{asset2}'])
    correlation = float(round(correlation, 2))

    classification = classify_correlation(correlation)
    return {'correlation':  correlation, 'result': classification}

In [87]:
calculate_correlation("CSNA3", "CPLE6")

retornando arquivo local:  csv_files/csna3_historical_data.csv
retornando arquivo local:  csv_files/cple6_historical_data.csv


{'correlation': 0.11, 'result': 'Não correlacionado'}

In [88]:
# get_history("PETZ3")
# get_history("PETR4")
# get_history("AGRO3")
# get_history("ITUB4")
# get_history("SANB11")