In [1]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from tradingUtils import *
from indicadores import *
import labeling as lb
import backtesting
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
from bokeh.io import output_notebook
output_notebook()
backtesting.set_bokeh_output(notebook=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def prepare_data(olhc):
    # Calculando os indicadores e normalizando-os
    data = agg_indicators(olhc)
    data = normalize_indicators(data)

    # Rotulando os dados
    y = np.array(lb.labelData(olhc, 0.1)).ravel()

    # Eliminando as linhas com NaN
    data["y"] = y
    data = data.dropna()

    # Convertendo para numpy arrays, caso ainda não estejam
    X = np.array(data)[:, :-1]
    y = np.array(data)[:, -1]

    # # Divide os dados em conjuntos de treinamento e teste
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test

## Fazendo a Rede Neural

In [3]:
def mlp(olhc, hidden_layers=(100, 100, 100), activation='logistic', 
        solver='adam', max_iter=500, random_state=42):
    """
    Treina um modelo de rede neural MLP e retorna as previsões e o relatório de classificação.

    Parâmetros:
    - data: DataFrame contendo os dados de entrada.
    - hidden_layers: Tupla com o tamanho das camadas ocultas.
    - activation: Função de ativação a ser usada.
    - solver: Algoritmo de otimização a ser usado.
    - max_iter: Número máximo de iterações.
    - random_state: Semente para a geração de números aleatórios.

    Retorna:
    - y_pred_mlp: Previsões das classes no conjunto de teste.
    - report: Relatório de classificação.
    """
    # Preparando os dados
    X_train, X_test, y_train, y_test = prepare_data(olhc)

    # Definindo a rede neural com múltiplas camadas
    mlp = MLPClassifier(hidden_layer_sizes = hidden_layers,
                        activation = activation,
                        solver = solver,
                        max_iter = max_iter,
                        random_state = random_state)

    # Treina a rede neural
    mlp.fit(X_train, y_train)

    # Faz previsões de classe
    y_pred_mlp = mlp.predict(X_test)

    # Exibe o relatório de classificação para o MLP
    report = classification_report(y_test, y_pred_mlp)
    print(report)

    return mlp

In [4]:
# mlp(prio_data)

## Fazendo o modelo de Random Forest

In [5]:
def random_forest(olhc, n_estimators=100, max_depth=None, random_state=42):
    """
    Treina um modelo de Random Forest e retorna as previsões e o relatório de classificação.

    Parâmetros:
    - data: DataFrame com as features e a variável alvo.
    - target_column: Nome da coluna alvo no DataFrame.
    - n_estimators: Número de árvores na floresta.
    - max_depth: Profundidade máxima das árvores (None para ilimitado).
    - random_state: Semente para a geração de números aleatórios.

    Retorna:
    - y_pred_rf: Previsões das classes no conjunto de teste.
    - report: Relatório de classificação.
    """
    # Preparando os dados
    X_train, X_test, y_train, y_test = prepare_data(olhc)

    # Definindo o modelo Random Forest
    rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, 
                                 random_state=random_state)

    # Treina o modelo Random Forest
    rf.fit(X_train, y_train)

    # Faz previsões de classe
    y_pred_rf = rf.predict(X_test)

    # Exibe o relatório de classificação para o Random Forest
    report = classification_report(y_test, y_pred_rf)
    print(report)

    return rf

In [6]:
# random_forest(prio_data)

## Fazendo o Modelo Gradient Boosting

In [7]:
def gradient_boosting(olhc, random_state=42):
    """
    Treina um modelo de Gradient Boosting e retorna as previsões e o relatório de classificação.

    Parâmetros:
    - data: DataFrame com as features e a variável alvo.
    - target_column: Nome da coluna alvo no DataFrame.
    - random_state: Semente para a geração de números aleatórios.

    Retorna:
    - y_pred_gb: Previsões das classes no conjunto de teste.
    - report: Relatório de classificação.
    """
    # Preparando os dados
    X_train, X_test, y_train, y_test = prepare_data(olhc)

    # Definindo o modelo Gradient Boosting
    gb_model = GradientBoostingClassifier(random_state=random_state)

    # Treina o modelo Gradient Boosting
    gb_model.fit(X_train, y_train)

    # Faz previsões de classe
    y_pred_gb = gb_model.predict(X_test)

    # Exibe o relatório de classificação para o Gradient Boosting
    report = classification_report(y_test, y_pred_gb)
    print(report)

    return gb_model

In [8]:
# gradient_boosting(prio_data)

In [9]:
# # Faz previsões de probabilidade
# y_pred_probs = rf.predict_proba(X_test)

# # Identifica a classe com maior probabilidade
# y_pred_indices = np.argmax(y_pred_probs, axis=1)

# # Mapeia os índices para as classes de interesse
# class_mapping = {0: 0, 1: 1, 2: -1}  # ajuste conforme necessário para sua classificação
# y_pred = np.vectorize(class_mapping.get)(y_pred_indices)

# # Avalia o modelo
# print(classification_report(y_test, y_pred, target_names=["Desfazer Posição", "Compra", "Short"]))

# Backtesting

In [15]:
# Função a ser executada: faz o backtesting para um dado modelo e ano
def backtesting_model(year, olhc, model, **kwargs):
    olhc_copy = agg_indicators(olhc)
    olhc_copy = normalize_indicators(olhc_copy)
    olhc_copy = olhc_copy.dropna()
    data_backtest = olhc_copy[olhc_copy.index.year == year]
    # print(data_backtest.index.size)
    data_train_and_test = olhc[olhc.index.year != year]
    # Treinando o modelo
    model = model(data_train_and_test, **kwargs)
    # Calculando a política para aquele ano
    pred = model.predict(data_backtest)
    olhc_copy = olhc[olhc.index.year == year]
    policy = pd.Series(pred, index = olhc_copy.index)
    print(policy)
    # Backtest
    olhc_copy["Signal"] = 0
    olhc_copy.loc[policy.index, "Signal"] = policy

    return olhc_copy

In [16]:
dados_rf = backtesting_model(2023, prio_data, random_forest)
dados_mlp = backtesting_model(2023, prio_data, mlp)
dados_gb = backtesting_model(2023, prio_data, gradient_boosting)

  self._psar[i] = high2
  self._psar[i] = high2


              precision    recall  f1-score   support

        -1.0       0.48      0.55      0.51       301
         0.0       0.12      0.03      0.05        30
         1.0       0.47      0.44      0.45       292

    accuracy                           0.47       623
   macro avg       0.36      0.34      0.34       623
weighted avg       0.46      0.47      0.46       623

Date
2023-01-02    1.0
2023-01-03    1.0
2023-01-04   -1.0
2023-01-05    1.0
2023-01-06    1.0
             ... 
2023-12-21    1.0
2023-12-22    1.0
2023-12-26    1.0
2023-12-27    1.0
2023-12-28    1.0
Length: 248, dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  olhc_copy["Signal"] = 0
  self._psar[i] = high2
  self._psar[i] = high2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  olhc_copy["Signal"] = 0
  self._psar[i] = high2


              precision    recall  f1-score   support

        -1.0       0.51      0.53      0.52       301
         0.0       0.21      0.13      0.16        30
         1.0       0.52      0.52      0.52       292

    accuracy                           0.51       623
   macro avg       0.42      0.40      0.40       623
weighted avg       0.50      0.51      0.51       623

Date
2023-01-02    1.0
2023-01-03   -1.0
2023-01-04   -1.0
2023-01-05    1.0
2023-01-06    1.0
             ... 
2023-12-21    1.0
2023-12-22    1.0
2023-12-26    1.0
2023-12-27    1.0
2023-12-28    1.0
Length: 248, dtype: float64


  self._psar[i] = high2


              precision    recall  f1-score   support

        -1.0       0.49      0.57      0.53       301
         0.0       0.22      0.07      0.10        30
         1.0       0.48      0.42      0.45       292

    accuracy                           0.48       623
   macro avg       0.40      0.36      0.36       623
weighted avg       0.47      0.48      0.47       623

Date
2023-01-02    1.0
2023-01-03    1.0
2023-01-04   -1.0
2023-01-05   -1.0
2023-01-06   -1.0
             ... 
2023-12-21    1.0
2023-12-22    1.0
2023-12-26    1.0
2023-12-27    1.0
2023-12-28    1.0
Length: 248, dtype: float64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  olhc_copy["Signal"] = 0


In [17]:
bt = Backtest(dados_rf, OurStrategy, cash=10000)
stats = bt.run()

# Exibindo o resultado
bt.plot()
print(stats)

  .resample(resample_rule, label='left')


Start                     2023-01-02 00:00:00
End                       2023-12-28 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                   99.193548
Equity Final [$]                  9748.742335
Equity Peak [$]                  12116.340598
Return [%]                          -2.512577
Buy & Hold Return [%]               25.272039
Return (Ann.) [%]                    -2.55258
Volatility (Ann.) [%]               36.969501
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                  -45.198958
Avg. Drawdown [%]                  -45.198958
Max. Drawdown Duration      350 days 00:00:00
Avg. Drawdown Duration      350 days 00:00:00
# Trades                                   67
Win Rate [%]                        46.268657
Best Trade [%]                      15.466816
Worst Trade [%]                    -10.356369
Avg. Trade [%]                    

In [18]:
bt = Backtest(dados_mlp, OurStrategy, cash=10000)
stats = bt.run()

# Exibindo o resultado
bt.plot()
print(stats)

  .resample(resample_rule, label='left')


Start                     2023-01-02 00:00:00
End                       2023-12-28 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                   99.193548
Equity Final [$]                  5501.587054
Equity Peak [$]                  10486.860882
Return [%]                         -44.984129
Buy & Hold Return [%]               25.272039
Return (Ann.) [%]                  -45.511819
Volatility (Ann.) [%]               20.349884
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                  -48.293516
Avg. Drawdown [%]                  -25.719656
Max. Drawdown Duration      352 days 00:00:00
Avg. Drawdown Duration      178 days 00:00:00
# Trades                                   64
Win Rate [%]                          42.1875
Best Trade [%]                      10.205845
Worst Trade [%]                    -11.081734
Avg. Trade [%]                    

In [19]:
bt = Backtest(dados_gb, OurStrategy, cash=10000)
stats = bt.run()

# Exibindo o resultado
bt.plot()
print(stats)

  .resample(resample_rule, label='left')


Start                     2023-01-02 00:00:00
End                       2023-12-28 00:00:00
Duration                    360 days 00:00:00
Exposure Time [%]                   99.193548
Equity Final [$]                  9199.612279
Equity Peak [$]                  10208.739706
Return [%]                          -8.003877
Buy & Hold Return [%]               25.272039
Return (Ann.) [%]                   -8.127579
Volatility (Ann.) [%]               33.528452
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                  -31.173738
Avg. Drawdown [%]                  -31.173738
Max. Drawdown Duration      357 days 00:00:00
Avg. Drawdown Duration      357 days 00:00:00
# Trades                                   69
Win Rate [%]                        47.826087
Best Trade [%]                      14.917118
Worst Trade [%]                      -9.04624
Avg. Trade [%]                    