# 🚀 Melhoria Avançada de Modelos para Previsão de Ativos
Neste notebook, vamos:
- Adicionar novas features ao dataset
- Criar um target com zona neutra para evitar ruído
- Treinar modelos RandomForest e XGBoost com mais profundidade
- Ajustar e empilhar camadas LSTM
- Comparar métricas de forma equilibrada

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
ticker = 'AAPL'
df = pd.read_csv(f'../data/{ticker}_ativo_com_indicadores.csv')
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)
df.dropna(inplace=True)

In [3]:
# Novas features
df['returns'] = df['close'].pct_change()
df['returns_lag_3'] = df['returns'].shift(3)
df['returns_lag_5'] = df['returns'].shift(5)
df['price_diff'] = df['close'] - df['open']
df['vol_rolling'] = df['volume'].rolling(window=10).mean()
df.dropna(inplace=True)

In [4]:
# Target com zona neutra
df['target'] = df['close'].shift(-3)
df['target_class'] = np.where(df['target'] > df['close'] * 1.002, 1,
                         np.where(df['target'] < df['close'] * 0.998, 0, -1))
df = df[df['target_class'] != -1]

In [5]:
# Separar features e target
features = ['open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal',
            'sma_20', 'ema_20', 'bb_upper', 'bb_lower',
            'returns', 'returns_lag_3', 'returns_lag_5', 'price_diff', 'vol_rolling']
X = df[features]
y = df['target_class']
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)

In [6]:
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)
print('Distribuição após SMOTE:', pd.Series(y_res).value_counts())

Distribuição após SMOTE: target_class
1    380
0    380
Name: count, dtype: int64


In [7]:
rf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=42)
rf.fit(X_res, y_res)
y_pred_rf = rf.predict(X_test)
print('🔍 RandomForest')
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

🔍 RandomForest
[[71 19]
 [66 30]]
              precision    recall  f1-score   support

           0       0.52      0.79      0.63        90
           1       0.61      0.31      0.41        96

    accuracy                           0.54       186
   macro avg       0.57      0.55      0.52       186
weighted avg       0.57      0.54      0.52       186



In [8]:
xgb = XGBClassifier(n_estimators=200, max_depth=5, learning_rate=0.05,
                    use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_res, y_res)
y_pred_xgb = xgb.predict(X_test)
print('🔍 XGBoost')
print(confusion_matrix(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

🔍 XGBoost
[[75 15]
 [68 28]]
              precision    recall  f1-score   support

           0       0.52      0.83      0.64        90
           1       0.65      0.29      0.40        96

    accuracy                           0.55       186
   macro avg       0.59      0.56      0.52       186
weighted avg       0.59      0.55      0.52       186



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [9]:
# Preparar dados para LSTM
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

def create_sequences(X, y, window_size=10):
    Xs, ys = [], []
    for i in range(len(X) - window_size):
        Xs.append(X[i:i+window_size])
        ys.append(y[i+window_size])
    return np.array(Xs), np.array(ys)

X_seq, y_seq = create_sequences(X_scaled, y.values)

In [10]:
split = int(len(X_seq) * 0.8)
X_train_lstm, X_test_lstm = X_seq[:split], X_seq[split:]
y_train_lstm, y_test_lstm = y_seq[:split], y_seq[split:]

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
model.fit(X_train_lstm, y_train_lstm, epochs=20, batch_size=32, validation_data=(X_test_lstm, y_test_lstm), verbose=1)

  super().__init__(**kwargs)


Epoch 1/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.5166 - loss: 0.6943 - val_accuracy: 0.5380 - val_loss: 0.6919
Epoch 2/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5111 - loss: 0.6879 - val_accuracy: 0.5109 - val_loss: 0.6936
Epoch 3/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5747 - loss: 0.6874 - val_accuracy: 0.5109 - val_loss: 0.6950
Epoch 4/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5450 - loss: 0.6891 - val_accuracy: 0.5109 - val_loss: 0.6968
Epoch 5/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5427 - loss: 0.6928 - val_accuracy: 0.5109 - val_loss: 0.6916
Epoch 6/20
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5179 - loss: 0.6938 - val_accuracy: 0.5109 - val_loss: 0.6912
Epoch 7/20
[1m23/23[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x24bde7f9ac0>

In [11]:
loss, acc = model.evaluate(X_test_lstm, y_test_lstm, verbose=0)
print(f"✅ Acurácia do LSTM: {acc:.2f}")

✅ Acurácia do LSTM: 0.46
