In [83]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import os
import json
import datetime

from pmdarima.arima import ADFTest
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.stattools import adfuller
from pmdarima.arima import auto_arima

plt.style.use('fivethirtyeight')

In [84]:
def get_finance_data(ticker, period='max', interval='1d'):
    tkr = yf.Ticker(ticker)
    df = tkr.history(period=period, interval=interval)
    return df

def test_unit_root(s):
    adf = adfuller(s)
    return adf[0] > adf[4]['1%']

def stationary_test(s):
    adf_test = ADFTest(alpha=0.01)
    return not adf_test.should_diff(s)[1]

def get_arima_model(s, is_seasonal=False):
    is_stat = test_unit_root(s)
    arima_model = auto_arima(s, stationary=is_stat, start_p=0, start_d=0, start_q=0, max_p=10, max_d=10, max_q=10, start_P=0, start_D=0, 
                             start_Q=0, max_P=15, max_D=15, max_Q=15, m=15, seasonal=is_seasonal, error_action='warn', trace=True, 
                             suppress_warnings=True, stepwise=False, random_state=20, n_fits=50, n_jobs=-1)
    return arima_model

In [93]:
def run_arima_coach(yticker_list, days_force_update=0):

    n_steps = 2
    must_run = False
    hoje = datetime.datetime.now().strftime("%Y-%m-%d")
    
    if not os.path.isdir("files"):
        os.mkdir("files")
    
    for yticker in yticker_list:
        ticker = yticker
        if os.path.isfile("files/db.json"):
            with open('files/db.json','r+') as jfile:
                jdata = json.load(jfile)
                if (ticker in jdata.keys()):
                    print("Dados de "+ticker+" já existentes.")
                    if ("ARIMA" in jdata[ticker]):
                        print("Dados atulizados em "+jdata[ticker]["ARIMA"]["train_date"])
                        if ("train_date" in jdata[ticker]["ARIMA"]):
                            if (days_force_update > 0):
                                train_date = datetime.datetime.strptime(jdata[ticker]["ARIMA"]["train_date"], '%Y-%m-%d')
                                must_run = ((hoje - train_date).days) > days_force_update
                                print((hoje - train_date).days)
                        else:
                            must_run = True
                    else:
                        must_run = True
                else:
                    print("ticker "+ticker+" não encontrado")
                    must_run = True
                if must_run:
                    print("capturando dados do yFinance: "+yticker)
                    data = get_finance_data(yticker)
                    data.dropna(subset=['Low'], inplace=True)
                    train = data['Low'][:len(data)-n_steps+1]
                    test = data['Low'][-n_steps:]
                    jdata[ticker]={}
                    jdata[ticker].update({"yticker":yticker})
                    jfile.seek(0)
                    json.dump(jdata, jfile)
                    
        my_arima = []
        if "ARIMA" not in jdata[ticker]:
            print("rodando auto arima para "+ticker)
            arima_model = get_arima_model(train)
            my_arima.append(arima_model.order[0])
            my_arima.append(arima_model.order[1])
            my_arima.append(arima_model.order[2])
            my_arima.append(arima_model.seasonal_order[0])
            my_arima.append(arima_model.seasonal_order[1])
            my_arima.append(arima_model.seasonal_order[2])
            my_arima.append(arima_model.seasonal_order[3])
            with open('files/db.json','r+') as jfile:
                jdata[ticker].update({"ARIMA":{"parametros":my_arima,"train_date":hoje}})
                jfile.seek(0)
                json.dump(jdata, jfile)
        
        jfile.close()

In [94]:
teste = ["RNDP11.SA","OIBR3.SA","VILG11.SA","BBFI11B.SA","OIBR3.SA","PETR4.SA"]

run_arima_coach(teste, days_force_update=2)

Dados de RNDP11.SA já existentes.
Dados atulizados em 2021-01-03


TypeError: unsupported operand type(s) for -: 'str' and 'datetime.datetime'

In [33]:
data = get_finance_data("OIBR3.SA")
data.isnull().sum()

Open            0
High            0
Low             0
Close           0
Volume          0
Dividends       0
Stock Splits    0
dtype: int64

In [32]:
data.dropna(subset=["Low"], inplace=True)
print(data)

                 Open       High        Low      Close     Volume  Dividends  \
Date                                                                           
2002-06-06  61.394237  72.228542  61.394237  72.156342          1        0.0   
2002-06-07  69.339365  70.783951  69.339365  70.783951          0        0.0   
2002-06-10  68.617081  72.228554  68.617081  72.228554          3        0.0   
2002-06-11  72.156355  72.156355  70.061669  70.711761          0        0.0   
2002-06-12  69.700562  70.061661  68.617075  70.061661          2        0.0   
...               ...        ...        ...        ...        ...        ...   
2020-12-22   2.140000   2.170000   2.100000   2.110000  128774800        0.0   
2020-12-23   2.120000   2.190000   2.090000   2.160000  119614800        0.0   
2020-12-28   2.180000   2.230000   2.140000   2.220000  102316300        0.0   
2020-12-29   2.240000   2.260000   2.160000   2.180000  155718200        0.0   
2020-12-30   2.180000   2.210000   2.160