In [None]:
import sys
if sys.path[0].endswith('/src'):
    sys.path.insert(0, sys.path[0].removesuffix('/src'))
print('Path:', sys.path)

import yfinance as yf
import pandas as pd
import datetime
import pytz
import os

from src.calcEMA import *

In [None]:
def process(load_cache = True):
    data_file = sys.path[0] + '/src/data/ibov.csv'
    print('Carregando Dataset...')
    dataset = load_dataset(load_cache, data_file)     
    print('Iniciando Calculo RSI e EMAs...')

    emas_dataset = pd.DataFrame()
    for symbol in get_tickers():
        emas_dataset = pd.concat([emas_dataset, run_calc_emas(dataset[dataset['symbol'] == symbol], 'adj_close')])
    
    print('Lista ordenada por *Ações com Desconto*:', emas_dataset.index.max())
    print_descontados(emas_dataset)


    emas_dataset.to_csv(
            data_file,
            sep=';',
            )
    return emas_dataset

In [None]:
def load_dataset(load_cache = True, data_file = './src/data/ibov.json') -> pd.DataFrame:
    symbols = get_tickers()    
    if ( load_cache and os.path.exists(data_file)):
        dataset = pd.read_json(data_file, orient='records', date_unit='s')
        dataset.index = pd.to_datetime(dataset['date_time'])
        dataset['date_import'] = pd.to_datetime(dataset['date_import'])
        dataset.index.name = 'date'
    else:
        data = download_data('2013-01-01', symbols)
        dataset = convert_downloaded_data(data)

    print(dataset.info())
    return dataset

In [None]:
def get_tickers() -> list:
    filename = sys.path[0] + '/src/data/tickers_list_to_analisys.csv'
    print('Tickers List File:', filename)
    tickers = pd.read_csv(filename)
    tickers['symbol'] += '.SA'
    return list(tickers['symbol'])

In [None]:
def convert_downloaded_data(tickers_history: pd.DataFrame) -> pd.DataFrame:
    symbols = []
    for symbol, _ in tickers_history.columns:
        symbols.append(symbol)
    # Remove duplicates
    symbols = list(set(symbols))

    new_df = pd.DataFrame()
    for s in symbols:
        aux = tickers_history[s].copy()
        aux['symbol'] = s
        new_df = pd.concat([new_df, aux], axis=0)
    
    new_df.dropna(how='any', axis=0, inplace=True) 
    new_df.rename(columns={'Adj Close': 'adj_close', 'Close': 'close', 'High': 'high',
                  'Low': 'low', 'Open': 'open', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
    new_df.index.name = 'date'
    new_df['date_time'] = pd.to_datetime(new_df.index)
    new_df['date_import'] = pd.to_datetime(datetime.datetime.now(tz=pytz.UTC))
    return new_df

In [None]:
def download_data(start_date='', tickers=[]) -> pd.DataFrame:
    if start_date == '':
        year = datetime.datetime.today().year
        start_date = str(year) + '-01-01'

    print('Baixando dados [start_date]: ' + start_date)
    print('Symbols: ', tickers)
    data = yf.download(tickers, start=start_date,
                       threads=20, group_by='ticker')
    return data

In [None]:
def print_descontados(df: pd.DataFrame):
    filter = df[df.index == df.index.max()]
    print(filter.sort_values(by='ema_200p_diff', ascending=True))

## Validando Código

In [None]:
dataset = process()

In [None]:
dataset.groupby(by='symbol').count()

In [None]:
dataset.to_csv('teste.csv', sep=';', index=False)

In [None]:
dataset.isna()

# Testando RSI

In [24]:
def cRsi(df: pd.DataFrame, close_price='close', window=14):
    '''
    # Create two copies of the Closing price Series
    change_up = df.copy()
    change_down = df.copy()

    # Calculate the rolling average of average up and average down
    avg_up = change_up[close_price].rolling(14).mean()
    avg_down = change_down[close_price].rolling(14).mean().abs()

    rsi = 100 * avg_up / (avg_up + avg_down)
    _df = df.copy()
    _df['rsi'] = rsi
    return _df
    '''
    aux = df.copy()
    try:
        aux['change'] = aux[close_price].diff()
        aux['gain'] = aux.change.mask(aux.change < 0, 0.0)
        aux['loss'] = -aux.change.mask(aux.change > 0, -0.0)
        aux['avg_gain'] = rma(aux.gain.to_numpy(), window)
        aux['avg_loss'] = rma(aux.loss.to_numpy(), window)

        aux['rs'] = aux.avg_gain / aux.avg_loss
        aux['rsi'] = 100 - (100 / (1 + aux.rs))


    except Exception as error:
        print('Erro no calculo do RSI> ', df['symbol'], ' - Data: ', df['date_time'])
        print(error)
        aux['rsi'] = 0.0
    finally:
        aux.drop(columns=['change', 'gain', 'loss', 'avg_gain', 'avg_loss', 'rs'], inplace=True, errors='ignore')
    return aux

In [None]:
data2 = load_dataset(load_cache=True)
data2

In [28]:
data22 = pd.DataFrame()
print(get_tickers())
for symbol in get_tickers():  
  print(symbol, ':', data2[data2['symbol'] == symbol]['symbol'].count())
  if data2[data2['symbol'] == symbol]['symbol'].count() > 14:
    rsi_df = cRsi(data2[data2['symbol'] == symbol])
    data22 = pd.concat([data22, rsi_df])
    print(rsi_df.tail(1)[['symbol', 'rsi']])
data22

Tickers List File: /home/marcelo/des/analise_ativos_mercado_financeiro/src/data/tickers_list_to_analisys.csv
['RRRP3.SA', 'ALPA4.SA', 'ABEV3.SA', 'AMER3.SA', 'ARZZ3.SA', 'ASAI3.SA', 'AZUL4.SA', 'B3SA3.SA', 'BPAN4.SA', 'BBSE3.SA', 'BRML3.SA', 'BBDC3.SA', 'BBDC4.SA', 'BRAP4.SA', 'BBAS3.SA', 'BRKM5.SA', 'BRFS3.SA', 'BPAC11.SA', 'CRFB3.SA', 'CCRO3.SA', 'CMIG4.SA', 'CIEL3.SA', 'COGN3.SA', 'CPLE6.SA', 'CSAN3.SA', 'CPFE3.SA', 'CMIN3.SA', 'CVCB3.SA', 'CYRE3.SA', 'DXCO3.SA', 'ECOR3.SA', 'ELET3.SA', 'ELET6.SA', 'EMBR3.SA', 'ENBR3.SA', 'ENGI11.SA', 'ENEV3.SA', 'EGIE3.SA', 'EQTL3.SA', 'EZTC3.SA', 'FLRY3.SA', 'GGBR4.SA', 'GOAU4.SA', 'GOLL4.SA', 'NTCO3.SA', 'SOMA3.SA', 'HAPV3.SA', 'HYPE3.SA', 'IGTI11.SA', 'ITSA4.SA', 'ITUB4.SA', 'JBSS3.SA', 'KLBN11.SA', 'RENT3.SA', 'LWSA3.SA', 'LREN3.SA', 'MGLU3.SA', 'MRFG3.SA', 'CASH3.SA', 'BEEF3.SA', 'MRVE3.SA', 'MULT3.SA', 'PCAR3.SA', 'PETR3.SA', 'PETR4.SA', 'PRIO3.SA', 'PETZ3.SA', 'QUAL3.SA', 'RADL3.SA', 'RAIZ4.SA', 'RDOR3.SA', 'RAIL3.SA', 'SBSP3.SA', 'SANB11.SA

Unnamed: 0_level_0,open,high,low,close,adj_close,volume,symbol,date_time,date_import,rsi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-11-13,21.000000,21.350000,20.799999,20.799999,20.799999,470000.0,RRRP3.SA,2020-11-13,2023-06-25 16:09:44.713796+00:00,
2020-11-16,21.150000,21.250000,20.969999,21.000000,21.000000,175800.0,RRRP3.SA,2020-11-16,2023-06-25 16:09:44.713796+00:00,
2020-11-17,20.940001,20.990000,20.650000,20.990000,20.990000,256600.0,RRRP3.SA,2020-11-17,2023-06-25 16:09:44.713796+00:00,
2020-11-18,20.990000,21.070000,20.900000,20.900000,20.900000,96600.0,RRRP3.SA,2020-11-18,2023-06-25 16:09:44.713796+00:00,
2020-11-19,21.000000,21.040001,20.820000,20.900000,20.900000,84300.0,RRRP3.SA,2020-11-19,2023-06-25 16:09:44.713796+00:00,
...,...,...,...,...,...,...,...,...,...,...
2023-06-19,18.700001,19.270000,18.540001,19.000000,19.000000,4670900.0,YDUQ3.SA,2023-06-19,2023-06-25 16:09:44.713796+00:00,81.656978
2023-06-20,18.930000,19.459999,18.709999,19.459999,19.459999,5030300.0,YDUQ3.SA,2023-06-20,2023-06-25 16:09:44.713796+00:00,82.895336
2023-06-21,19.629999,21.180000,19.549999,20.350000,20.350000,12077100.0,YDUQ3.SA,2023-06-21,2023-06-25 16:09:44.713796+00:00,85.004688
2023-06-22,20.100000,20.270000,19.080000,20.030001,20.030001,6985900.0,YDUQ3.SA,2023-06-22,2023-06-25 16:09:44.713796+00:00,81.130657


In [25]:
__df = data2[data2['symbol'] == symbol]
print(__df['symbol'].count())
print(__df.tail(1))
rsi_df = cRsi(__df)
rsi_df

647
                 open  high    low      close  adj_close     volume    symbol  \
date                                                                            
2023-06-23  31.389999  31.4  30.73  31.059999  31.059999  5706700.0  RRRP3.SA   

            date_time                      date_import  
date                                                    
2023-06-23 2023-06-23 2023-06-25 16:09:44.713796+00:00  


Unnamed: 0_level_0,open,high,low,close,adj_close,volume,symbol,date_time,date_import,rsi
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-11-13,21.000000,21.350000,20.799999,20.799999,20.799999,470000.0,RRRP3.SA,2020-11-13,2023-06-25 16:09:44.713796+00:00,
2020-11-16,21.150000,21.250000,20.969999,21.000000,21.000000,175800.0,RRRP3.SA,2020-11-16,2023-06-25 16:09:44.713796+00:00,
2020-11-17,20.940001,20.990000,20.650000,20.990000,20.990000,256600.0,RRRP3.SA,2020-11-17,2023-06-25 16:09:44.713796+00:00,
2020-11-18,20.990000,21.070000,20.900000,20.900000,20.900000,96600.0,RRRP3.SA,2020-11-18,2023-06-25 16:09:44.713796+00:00,
2020-11-19,21.000000,21.040001,20.820000,20.900000,20.900000,84300.0,RRRP3.SA,2020-11-19,2023-06-25 16:09:44.713796+00:00,
...,...,...,...,...,...,...,...,...,...,...
2023-06-19,32.529999,32.990002,32.200001,32.779999,32.779999,3523100.0,RRRP3.SA,2023-06-19,2023-06-25 16:09:44.713796+00:00,56.970242
2023-06-20,32.700001,32.869999,32.049999,32.740002,32.740002,4954700.0,RRRP3.SA,2023-06-20,2023-06-25 16:09:44.713796+00:00,56.663526
2023-06-21,32.700001,32.939999,32.049999,32.639999,32.639999,4670300.0,RRRP3.SA,2023-06-21,2023-06-25 16:09:44.713796+00:00,55.853857
2023-06-22,32.090000,32.299999,31.150000,31.650000,31.650000,5925000.0,RRRP3.SA,2023-06-22,2023-06-25 16:09:44.713796+00:00,48.469946


In [None]:
ultimo_dia = data22.index.max()

# data22[data22.index == ultimo_dia]
data22.info()

In [None]:
data3 = yf.download(get_tickers(), start='2013-01-01',
                       threads=20, group_by='ticker')

In [None]:
data3

In [None]:
new_data3 = convert_downloaded_data(data3)
new_data3

In [None]:
new_data3.dropna(how='any', axis=0, inplace=True)

new_data3

In [None]:
new_data3.groupby(by='symbol').count()