In [7]:
# Carrega as variáveis de embiente usadas pelo notebook. Estas variáveis devem ser criadas no arquivo .env, localizada no mesmo diretório do notebook
# As seguintes variáveis devem ser criadas:
# - export STR_CON=<string de conexão com o banco de dados relacional>
# - export RAW_DATA_PATH=<caminho onde os arquivos contendo o histórico de preço de cada ativo será armazenado>
# - export DATASET_PATH=<caminho onde os arquivos contendo o histórico de preço e os indicadores técnicos serão armazenados>
# - export MODELS_PATH=<caminnho onde os modelos treinados serão armazenados>
# - export TRAIN_DATASET=<caminho onde os datasets estandarizados e formatados para o treinamento serão armazenados>
%load_ext dotenv
%dotenv

# Garante que os scripts Python sejam recarregados em cada execução do notebook.
%load_ext autoreload
%autoreload 2

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
import pandas as pd
import numpy as np
from data_preparation import PreProcess

preprocess = PreProcess()
df_train = preprocess.read_dataset_from_parquet("data/train_dataset/indicadores_com_sinais/train_data.parquet")
df_test = preprocess.read_dataset_from_parquet("data/train_dataset/indicadores_com_sinais/test_data.parquet")

X_Train = np.array([val[0] for val in df_train.loc[df_train.label == 1].values])
X_Test = np.array([val[0] for val in df_test.loc[df_test.label == 1].values])

In [22]:
X_Train.shape

(21555, 45, 9)

In [23]:
from keras.layers import Input, LSTM, RepeatVector, Dense, LeakyReLU, TimeDistributed
from keras.models import Model, Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils import plot_model


input_data = Input((X_Train.shape[1], X_Train.shape[2]) )
# Encoder
encoder = LSTM(32, input_shape=X_Train.shape, return_sequences=True)(input_data)
encoder = LSTM(16, input_shape=X_Train.shape)(encoder)
# _encoder = Sequential([
#     LSTM(50, input_shape=X_Train[0].shape)
# ])

# _decoder = Sequential([
#     RepeatVector(X_Train.shape[1]),
#     LSTM(50, return_sequences=True),
#     TimeDistributed(Dense(X_Train.shape[2]))])

bridge = RepeatVector(X_Train.shape[1])(encoder)

# Decoder
decoder = LSTM(16, input_shape=X_Train.shape, return_sequences=True)(bridge)
decoder = LSTM(32, input_shape=X_Train.shape, return_sequences=True)(decoder)
decoder = TimeDistributed(Dense(X_Train.shape[2]))(decoder)

autoencoder = Model(input_data, decoder)
# model = Sequential([_encoder, _decoder])
# model.compile(optimizer='adam', loss='mse')

autoencoder.summary()
plot_model(autoencoder, 'autoencoder_compress.png', show_shapes=True)
# plot_model(_decoder, 'autoencoder_decompress.png', show_shapes=True)

autoencoder.compile(loss='mae', 
                    optimizer='adam')

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 45, 9)]           0         
                                                                 
 lstm_4 (LSTM)               (None, 45, 32)            5376      
                                                                 
 lstm_5 (LSTM)               (None, 16)                3136      
                                                                 
 repeat_vector_1 (RepeatVect  (None, 45, 16)           0         
 or)                                                             
                                                                 
 lstm_6 (LSTM)               (None, 45, 16)            2112      
                                                                 
 lstm_7 (LSTM)               (None, 45, 32)            6272      
                                                           

In [26]:
from keras.callbacks import ReduceLROnPlateau

batch_size = 32
epochs = 200

cp_early_stopping = EarlyStopping(monitor='val_loss', 
                               patience=10, 
                               min_delta=0.001, 
                               mode='min')
                               
cp_model_checkpoint = ModelCheckpoint("data/tf_models/autoencoder/autoencoder_{epoch:02d}-{val_loss:.4f}.h5",
                                monitor=f'val_loss',
                                save_best_only=True,
                                verbose=1,
                                mode="min")

cb_reduce_lr_on_plateu = ReduceLROnPlateau(monitor=f"val_loss",
                                            factor=0.1,
                                            patience=10,
                                            verbose=0,
                                            mode="min",
                                            min_delta=0.0001,
                                            cooldown=0,
                                            min_lr=0.0000001)

autoencoder.fit(X_Train, X_Train, 
                batch_size=batch_size, 
                epochs=epochs, 
                validation_data=(X_Test, X_Test), 
                callbacks=[cp_early_stopping, cp_model_checkpoint, cb_reduce_lr_on_plateu])

# loss 0.04623

Epoch 1/200
Epoch 1: val_loss improved from inf to 0.04814, saving model to data/tf_models/autoencoder/autoencoder_01-0.0481.h5
Epoch 2/200
Epoch 2: val_loss did not improve from 0.04814
Epoch 3/200
Epoch 3: val_loss improved from 0.04814 to 0.04808, saving model to data/tf_models/autoencoder/autoencoder_03-0.0481.h5
Epoch 4/200
Epoch 4: val_loss improved from 0.04808 to 0.04797, saving model to data/tf_models/autoencoder/autoencoder_04-0.0480.h5
Epoch 5/200
Epoch 5: val_loss did not improve from 0.04797
Epoch 6/200
Epoch 6: val_loss improved from 0.04797 to 0.04794, saving model to data/tf_models/autoencoder/autoencoder_06-0.0479.h5
Epoch 7/200
Epoch 7: val_loss did not improve from 0.04794
Epoch 8/200
Epoch 8: val_loss improved from 0.04794 to 0.04743, saving model to data/tf_models/autoencoder/autoencoder_08-0.0474.h5
Epoch 9/200
Epoch 9: val_loss improved from 0.04743 to 0.04672, saving model to data/tf_models/autoencoder/autoencoder_09-0.0467.h5
Epoch 10/200
Epoch 10: val_loss imp

<keras.callbacks.History at 0x1617e1700>

In [6]:
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from keras.models import load_model

model = load_model("data/tf_models/final_model/model-53-0.5709.h5")

df_test = preprocess.read_dataset_from_parquet("data/train_dataset/indicadores_com_sinais/test_data.parquet")

X_TS_Test = np.array([val[0] for val in df_test.values])
Y_test = df_test.pop('label')
X_SIG_Test = df_test[df_test.columns[~df_test.columns.isin(['series'])]].values

Y_test_predict = (model.predict([X_TS_Test, X_SIG_Test]).flatten() > 0.5).astype(int)
print(f"Accuracy: {accuracy_score(Y_test.values, Y_test_predict):.4f}")
print("Matriz de confusao:\n", confusion_matrix(Y_test.values, Y_test_predict))
print(f"Precision: {precision_score(Y_test.values, Y_test_predict):.4f}")
print(f"Recall: {recall_score(Y_test.values, Y_test_predict):.4f}")
print(f"F1 Score: {f1_score(Y_test.values, Y_test_predict):.4f}")

2023-01-16 19:17:53.760417: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Accuracy: 0.6111
Matriz de confusao:
 [[5427  499]
 [3299  541]]
Precision: 0.5202
Recall: 0.1409
F1 Score: 0.2217


In [None]:
from sklearn.linear_model import LogisticRegressionCV

logit = LogisticRegressionCV(cv=10, max_iter=500, n_jobs=-1)
logit.fit(X_Train_repr, df_train['label'].values)

In [None]:
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

Y_test_predict = logit.predict(X_Test_repr)
print(accuracy_score(df_test['label'].values, Y_test_predict))
print(confusion_matrix(df_test['label'].values, Y_test_predict))
print(precision_score(df_test['label'].values, Y_test_predict))
print(recall_score(df_test['label'].values, Y_test_predict))
print(f1_score(df_test['label'].values, Y_test_predict))

0.6305157874092959
[[6343   70]
 [3698   87]]
0.554140127388535
0.02298546895640687
0.04414003044140031
