In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import optuna
import os
import matplotlib.pyplot as plt
import random
import tensorflow as tf
from keras.models import Model
from keras.callbacks import History

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from joblib import dump
from typing import Callable, Dict, List

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = [10, 5]
plt.rc('figure', autolayout=True)

2024-01-12 08:34:34.375193: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from src.model import create_model
from src.callbacks import create_callbacks
from src.optimizer import optimizer
from src.variables import tickers, period, observation_window, SEED, n_trials_optuna, epochs, verbose

In [3]:
graphics: bool = True
verbose: int = 1
ticker = tickers[0]

In [4]:
random.seed(SEED)
np.random.seed(SEED)


print(f"training predictive model for the asset {ticker}...")

training predictive model for the asset ABEV3.SA...


In [5]:
data: pd.DataFrame = pd.read_csv(f'./results/data/{ticker}.csv', index_col='Date', parse_dates=True)
label: str = f"{ticker} period {data.index.min().strftime('%Y-%m-%d')} - {data.index.max().strftime('%Y-%m-%d')}"

In [6]:
label

'ABEV3.SA period 2014-01-09 - 2023-01-03'

In [7]:
df_train_valid: pd.DataFrame = data[data.index < pd.Timestamp(period['boundary'])][['Adj Close']]
df_test: pd.DataFrame = data[data.index >= pd.Timestamp(period['boundary'])][['Adj Close']]

In [8]:
df_train_valid

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2014-01-09,11.184703
2014-01-10,11.256698
2014-01-13,11.151981
2014-01-14,11.119260
2014-01-15,11.227573
...,...
2020-12-28,13.636804
2020-12-29,13.792456
2020-12-30,13.533036
2021-01-04,13.386031


In [9]:
df_test

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2021-01-06,13.948107
2021-01-07,13.878927
2021-01-08,14.294000
2021-01-11,13.922165
2021-01-12,14.527477
...,...
2022-12-27,13.798742
2022-12-28,13.827251
2022-12-29,13.798742
2023-01-02,13.485133


In [10]:
scaler: MinMaxScaler = MinMaxScaler()

In [11]:
scaled_df_train_valid: np.ndarray = scaler.fit_transform(df_train_valid)
scaled_df_test: np.ndarray = scaler.transform(df_test)

In [12]:
if not os.path.exists(f'./results/serialized objects/{ticker}'):
    os.makedirs(f'./results/serialized objects/{ticker}')

dump(scaler, f'./results/serialized objects/{ticker}/scaler - {label}.joblib')

['./results/serialized objects/ABEV3.SA/scaler - ABEV3.SA period 2014-01-09 - 2023-01-03.joblib']

In [13]:
if graphics:
    plt.title(f"Divisão de data Treino/Validação - {ticker}")
    plt.plot(df_train_valid.index, df_train_valid['Adj Close'], label = 'Dados de Treino e Validação') 
    plt.plot(df_test.index, df_test['Adj Close'], label = 'Dados de Teste') 
    plt.xticks(rotation=45)
    plt.legend(loc = 'best')

    if not os.path.exists(f'./results/graphics/{ticker}/{label}/Training'):
        os.makedirs(f'./results/graphics/{ticker}/{label}/Training')

    plt.savefig(f'./results/graphics/{ticker}/{label}/Training/Divisão de data Treino-Validação.png')
    plt.close()

In [14]:
X_train_valid: List[np.ndarray] = []
y_train_valid: List[np.ndarray] = []

for i in range(observation_window['stepsBack'], len(scaled_df_train_valid) - observation_window['stepsFoward']):
    X_train_valid.append(scaled_df_train_valid[i - observation_window['stepsBack']:i, 0])
    y_train_valid.append(scaled_df_train_valid[i:i + observation_window['stepsFoward'], 0])

X_train_valid: np.ndarray = np.array(X_train_valid)
y_train_valid: np.ndarray = np.array(y_train_valid)
X_train_valid: np.ndarray = np.reshape(X_train_valid, (X_train_valid.shape[0], X_train_valid.shape[1], 1))

X_train: np.ndarray
X_valid: np.ndarray
y_train: np.ndarray
y_valid: np.ndarray
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_valid,
    y_train_valid,
    test_size = 0.2,
    shuffle = True,
    random_state = SEED
)

In [15]:
X_test: List[np.ndarray] = []
y_test: List[np.ndarray] = []

for i in range(observation_window['stepsBack'], len(scaled_df_test) - observation_window['stepsFoward']):
    X_test.append(scaled_df_test[i - observation_window['stepsBack']:i, 0])
    y_test.append(scaled_df_test[i:i + observation_window['stepsFoward'], 0])

X_test: np.ndarray = np.array(X_test)
y_test: np.ndarray = np.array(y_test)
X_test: np.ndarray = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [16]:
if not os.path.exists(f'./sqlite/{ticker}'):
    os.makedirs(f'./sqlite/{ticker}')

In [17]:
study: optuna.study.Study = optuna.create_study(
    study_name=label,
    storage=f"sqlite:///./sqlite/{ticker}/{label}.db",
    load_if_exists=True,
    direction='minimize'
)

[I 2024-01-12 08:34:49,459] Using an existing study with name 'ABEV3.SA period 2014-01-09 - 2023-01-03' instead of creating a new one.


In [None]:
study.optimize(
    lambda trial: optimizer(
        trial,
        create_model,
        create_callbacks,
        X_train,
        y_train,
        X_valid,
        y_valid,
        ticker,
        label,
        epochs,
        observation_window,
        verbose
    ),
    n_trials = n_trials_optuna
)

2024-01-12 08:35:05.729014: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:65:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-12 08:35:05.775541: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:65:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-12 08:35:05.775610: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:65:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-12 08:35:05.779661: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:65:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-01-12 08:35:05.779740: I tensorflow/compile

Epoch 1/3


2024-01-12 08:35:17.903361: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2024-01-12 08:35:18.484627: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-01-12 08:35:18.507152: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f426802e270 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-01-12 08:35:18.507212: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3070, Compute Capability 8.6


In [18]:
model: Model = create_model(
    optim = study.best_params['optim'],
    layers = study.best_params['layers'],
    n_lstm = study.best_params['n_lstm'],
    dropoutFoward = study.best_params['dropoutFoward'],
    stepsBack = observation_window['stepsBack'],
    stepsFoward = observation_window['stepsFoward']
)

ValueError: Record does not exist.

In [None]:
history: History = model.fit(
    X_train,
    y_train,
    epochs=epochs,
    batch_size=study.best_params['batch_size'],
    validation_data = (X_valid, y_valid),
    callbacks = create_callbacks(ticker, label, False, verbose)
)

In [None]:
model.save(f'./results/trained models/{ticker}/{label}.h5')

In [None]:
if graphics:
    for i in range(observation_window['stepsBack'], len(scaled_df_test) - observation_window['stepsFoward']):
        X_temp: np.ndarray = scaled_df_test[i - observation_window['stepsBack']:i, 0]
        y_temp: np.ndarray = scaled_df_test[i:i + observation_window['stepsFoward'], 0]

        scaled_return_forecast: np.ndarray = model.predict(X_temp.reshape(1, -1, 1))

        adj_close_stepback: np.ndarray = scaler.inverse_transform(X_temp.reshape(-1, 1))
        adj_close_real: np.ndarray = scaler.inverse_transform(y_temp.reshape(-1, 1))
        adj_close_forecast: np.ndarray = scaler.inverse_transform(scaled_return_forecast)

        pd_adj_close_stepback: pd.Series = pd.Series(
            adj_close_stepback.reshape(-1),
            index=df_test.index[i - observation_window['stepsBack']:i])

        pd_adj_close_forecast: pd.Series = pd.Series(
            adj_close_forecast.reshape(-1),
            index=df_test.index[i:i + observation_window['stepsFoward']])

        pd_adj_close_real: pd.Series = pd.Series(
            adj_close_real.reshape(-1),
            index=df_test.index[i:i + observation_window['stepsFoward']])

        # Plot
        plt.title(f"Previsão de {ticker} - {pd_adj_close_stepback.index[-1].strftime('%Y-%m-%d')}")
        plt.plot(pd_adj_close_stepback, label = 'Adj Close (SteBack)') 
        plt.plot(pd_adj_close_forecast, label = 'Adj Close Forecast') 
        plt.plot(pd_adj_close_real, color = 'orange', label = 'Adj Close') 
        plt.xticks(rotation=45)
        plt.legend(loc = 'best')

        if not os.path.exists(f'./results/graphics/{ticker}/{label}/Training'):
            os.makedirs(f'./results/graphics/{ticker}/{label}/Training')

        plt.savefig(f'./results/graphics/{ticker}/{label}/Training/{pd_adj_close_stepback.index[-1].strftime("%Y-%m-%d")}.png')
        plt.close()