In [4]:
from sklearn.metrics import r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt
from datetime import datetime
import yfinance as yf
import pandas as pd
import numpy as np

SEED = 42
np.random.seed(SEED)
scaler = MinMaxScaler()
EMPRESA = 'EGIE3.SA'

In [48]:
from prophet import Prophet

def perform_prophet(df, df_teste):
    # Prepare the training data
    # print(f'{X_train = }')
    # print(f'{y_train = }')
    # train_data = pd.DataFrame({'ds': y_train.index, 'y': y_train.values})

    # Initialize and fit the Prophet model
    model = Prophet()
    model.fit(df)

    # Make predictions
    forecast = model.predict(df_teste)

    cd_lr = r2_score(df_teste['y'], forecast['yhat'].values)
    return (model, cd_lr)
    # # Return the predicted values
    # return forecast['yhat'].values

In [6]:
def tratar_dados(df):
    # Adicionando Médias Moveis de 5 e 21 dias
    df['mm5d'] = df['Close'].rolling(5).mean()
    df['mm21d'] = df['Close'].rolling(21).mean()
    df['Close'] = df['Close'].shift(-1)

    df.drop(columns=['Dividends','Stock Splits'], inplace=True)
    df.dropna(inplace=True)

    X = scaler.fit_transform(df.drop(columns=['Close']))
    y = df['Close']
    return df, X, y

In [7]:
def get_dados_empresa(empresa):
    dados_empresa = yf.Ticker(empresa).history(period='max')
    return tratar_dados(dados_empresa)

In [8]:
def split_treino_teste(X, y, qtd_validacao=0):
    qtd_linhas = len(X)
    qtd_linhas_treino = round(.75 * qtd_linhas)
    qtd_linhas_teste = qtd_linhas - qtd_validacao

    X_train = X[:qtd_linhas_treino]
    y_train = y[:qtd_linhas_treino]

    X_test = X[qtd_linhas_treino:qtd_linhas_teste]
    y_test = y[qtd_linhas_treino:qtd_linhas_teste]

    X_valid = X[qtd_linhas_teste:]
    y_valid = y[qtd_linhas_teste:]

    return (X_train, y_train, X_test, y_test, X_valid, y_valid)

In [9]:
def split_dados_empresa(empresa, qtd_validacao=0):
    dados, X, y = get_dados_empresa(empresa)
    X_train, y_train, X_test, y_test, X_valid, y_valid = split_treino_teste(X, y, qtd_validacao=qtd_validacao)
    return dados, X_train, y_train, X_test, y_test, X_valid, y_valid

In [55]:
def criar_treinar_modelo(empresa = None, X_train = None, y_train = None, X_test = None, y_test = None, X_valid = None, y_valid = None, modelo = None):
    if (X_train == None or y_train == None or X_test == None or y_test == None or X_valid == None or y_valid == None) and empresa == None:
        raise Exception("Deve-se passar algum parâmetro")

    if empresa != None:
        (_, X_train, y_train, X_test, y_test, X_valid, y_valid) = split_dados_empresa(empresa)

    df = pd.DataFrame({'ds': [pd.to_datetime(y).tz_localize(None) for y in y_train.index], 'y': y_train.values})
    df_teste = pd.DataFrame({'ds': [pd.to_datetime(y).tz_localize(None) for y in y_test.index], 'y': y_test.values})

    (lr, cd_lr) = perform_prophet(df, df_teste)
    # (nn, cd_nn) = perform_neural_network(X_train, y_train, X_test, y_test)
    print(f'{cd_lr = }')
    return lr


In [46]:

df_teste = pd.DataFrame({'ds': [pd.to_datetime(y).tz_localize(None) for y in y_test.index], 'y': y_test.values})

In [47]:
df_teste

array([[Timestamp('2018-12-05 00:00:00'), 25.19394302368164],
       [Timestamp('2018-12-06 00:00:00'), 25.341915130615234],
       [Timestamp('2018-12-07 00:00:00'), 25.068744659423828],
       ...,
       [Timestamp('2024-06-25 00:00:00'), 44.29999923706055],
       [Timestamp('2024-06-26 00:00:00'), 44.400001525878906],
       [Timestamp('2024-06-27 00:00:00'), 44.290000915527344]],
      dtype=object)

In [53]:
def prever_proximo_dia(modelo, empresa):
    dados = yf.Ticker(empresa).history(period='max')
    dados, X, y = tratar_dados(dados)
    dados = pd.DataFrame(X).tail(1)
    dados = dados.rename(columns={0: 'Open', 1: 'High', 2: 'Low', 3: 'Close', 4: 'Volume', 5: 'mm5d', 6: 'mm21d'})
    dados['ds'] = [pd.to_datetime('2024-07-19').tz_localize(None)]
    display(dados)
    return modelo.predict(dados)

In [56]:
(dados, X_train, y_train, X_test, y_test, X_valid, y_valid) = split_dados_empresa(EMPRESA,qtd_validacao=15)
modelo = criar_treinar_modelo(EMPRESA)

17:14:49 - cmdstanpy - INFO - Chain [1] start processing
17:14:51 - cmdstanpy - INFO - Chain [1] done processing


cd_lr = -6.1731176123851075


In [57]:

prever_proximo_dia(modelo, EMPRESA)

Unnamed: 0,Open,High,Low,Close,Volume,mm5d,ds
5579,1.0,0.99869,0.993616,0.025489,1.0,1.0,2024-07-19


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2024-07-19,28.772767,28.018393,29.684418,28.772767,28.772767,0.09651,0.09651,0.09651,-0.056294,-0.056294,-0.056294,0.152805,0.152805,0.152805,0.0,0.0,0.0,28.869278


In [31]:
y_train.index[0]

Timestamp('2002-03-29 00:00:00-0300', tz='America/Sao_Paulo')

In [33]:
df = pd.DataFrame({'ds': [pd.to_datetime(y).tz_localize(None) for y in y_train.index], 'y': y_train.values})

In [34]:
df

Unnamed: 0,ds,y
0,2002-03-29,1.425817
1,2002-04-01,1.349091
2,2002-04-02,1.310729
3,2002-04-03,1.368273
4,2002-04-04,1.390651
...,...,...
4180,2018-11-28,24.954931
4181,2018-11-29,24.476883
4182,2018-11-30,24.641924
4183,2018-12-03,24.755741


In [None]:
y_pred = modelo.predict(X_valid)
# y_valid

# datas = y_valid.index
y_valid.reset_index(drop=True, inplace=True)


colunas = {
    'data': datas,
    'real': y_valid,
    'predicao': y_pred
}

colunas['real']
# plt.plot(colunas.items)

In [None]:
colunas = {
    'data': datas,
    'real': y_valid,
    'predicao': y_pred
}

plt.plot(colunas['data'], colunas['real'].shift(1), marker='o', label='real')
plt.plot(colunas['data'], colunas['predicao'], marker='o', label='predicao')
plt.legend()

In [None]:
prever_proximo_dia(modelo, EMPRESA)