In [1]:
"""
========================================
SCRIPT 1: COLETA DE DADOS
Vers√£o Simplificada - APENAS dados DI√ÅRIOS
========================================
"""

import yfinance as yf
import pandas as pd
from datetime import datetime
import os

class StockDataCollector:
    """Coletor simplificado - apenas dados di√°rios"""
    
    def __init__(self, symbol='SPY'):
        self.symbol = symbol
        self.df = None
        
    def collect_data(self, start_date='2010-01-01'):
        """
        Coleta dados di√°rios desde start_date at√© hoje
        
        Par√¢metros:
        - start_date: Data inicial (padr√£o: 2010-01-01)
                     Use '2000-01-01' para mais dados
                     Use '2015-01-01' para menos dados
        """
        print(f"\n{'='*80}")
        print(f"COLETANDO DADOS: {self.symbol}")
        print(f"{'='*80}")
        print(f"üìä Intervalo: DI√ÅRIO (1d)")
        print(f"üìÖ Per√≠odo: {start_date} at√© hoje")
        
        # Coletar dados
        end_date = datetime.now().strftime('%Y-%m-%d')
        
        self.df = yf.download(
            self.symbol,
            start=start_date,
            end=end_date,
            interval='1d',
            progress=False,
            auto_adjust=True  # Ajusta automaticamente por splits/dividendos
        )
        
        # Verificar se conseguiu dados
        if len(self.df) == 0:
            print(f"\n‚ùå ERRO: Nenhum dado coletado para {self.symbol}")
            print(f"   Verifique se o ticker est√° correto")
            return None
        
        print(f"\n‚úÖ Coleta bem-sucedida!")
        print(f"   üìä Total de dias: {len(self.df)}")
        print(f"   üìÖ Per√≠odo: {self.df.index[0].date()} at√© {self.df.index[-1].date()}")
        print(f"   üìà Colunas: {list(self.df.columns)}")
        
        return self.df
    
    def save_data(self, filename=None):
        """Salva dados coletados"""
        if self.df is None:
            print("‚ö†Ô∏è Nenhum dado para salvar. Execute collect_data() primeiro.")
            return None
        
        if filename is None:
            filename = f'{self.symbol.lower()}_raw.csv'
        
        filepath = os.path.join('/kaggle/working/', filename)
        self.df.to_csv(filepath)
        
        print(f"\nüíæ Dados salvos: {filepath}")
        return filepath
    
    def get_info(self):
        """Mostra informa√ß√µes detalhadas dos dados"""
        if self.df is None:
            print("‚ö†Ô∏è Nenhum dado dispon√≠vel")
            return
        
        print(f"\n{'='*80}")
        print(f"INFORMA√á√ïES DO DATASET: {self.symbol}")
        print(f"{'='*80}")
        print(f"\nüìä Dimens√µes: {self.df.shape[0]} linhas √ó {self.df.shape[1]} colunas")
        print(f"üìÖ Per√≠odo: {self.df.index[0].date()} at√© {self.df.index[-1].date()}")
        print(f"‚è∞ Dura√ß√£o: {(self.df.index[-1] - self.df.index[0]).days} dias")
        
        print(f"\nüìà Primeiras 3 linhas:")
        print(self.df.head(3))
        
        print(f"\nüìà √öltimas 3 linhas:")
        print(self.df.tail(3))
        
        print(f"\nüìä Estat√≠sticas:")
        print(self.df.describe())
        
        print(f"\n‚ö†Ô∏è Valores ausentes:")
        missing = self.df.isnull().sum()
        if missing.sum() > 0:
            print(missing[missing > 0])
        else:
            print("   ‚úÖ Nenhum valor ausente")
        
        print(f"{'='*80}\n")


def main():
    """Fun√ß√£o principal - executa coleta completa"""
    
    # Configura√ß√£o
    SYMBOL = 'SPY'  # ‚Üê MUDE AQUI: 'SPY', 'GOLD', 'MSFT', 'AAPL', etc.
    START_DATE = '2010-01-01'  # ‚Üê MUDE AQUI para mais/menos dados
    
    print("\n" + "="*80)
    print("COLETA DE DADOS - VERS√ÉO SIMPLIFICADA")
    print("="*80)
    print(f"\nConfigura√ß√£o:")
    print(f"   Ticker: {SYMBOL}")
    print(f"   Data inicial: {START_DATE}")
    print(f"   Intervalo: Di√°rio (1d)")
    
    # Criar coletor
    collector = StockDataCollector(symbol=SYMBOL)
    
    # Coletar dados
    df = collector.collect_data(start_date=START_DATE)
    
    if df is None:
        return None
    
    # Mostrar informa√ß√µes
    collector.get_info()
    
    # Salvar
    filepath = collector.save_data()
    
    print("\n" + "="*80)
    print("‚úÖ COLETA CONCLU√çDA COM SUCESSO!")
    print("="*80)
    print(f"üìÅ Arquivo salvo: {filepath}")
    print(f"üìä Total de linhas: {len(df)}")
    print(f"\n‚û°Ô∏è Pr√≥ximo passo: Execute o script de pr√©-processamento")
    
    return collector


if __name__ == "__main__":
    collector = main()


COLETA DE DADOS - VERS√ÉO SIMPLIFICADA

Configura√ß√£o:
   Ticker: SPY
   Data inicial: 2010-01-01
   Intervalo: Di√°rio (1d)

COLETANDO DADOS: SPY
üìä Intervalo: DI√ÅRIO (1d)
üìÖ Per√≠odo: 2010-01-01 at√© hoje

‚úÖ Coleta bem-sucedida!
   üìä Total de dias: 4050
   üìÖ Per√≠odo: 2010-01-04 at√© 2026-02-09
   üìà Colunas: [('Close', 'SPY'), ('High', 'SPY'), ('Low', 'SPY'), ('Open', 'SPY'), ('Volume', 'SPY')]

INFORMA√á√ïES DO DATASET: SPY

üìä Dimens√µes: 4050 linhas √ó 5 colunas
üìÖ Per√≠odo: 2010-01-04 at√© 2026-02-09
‚è∞ Dura√ß√£o: 5880 dias

üìà Primeiras 3 linhas:
Price           Close       High        Low       Open     Volume
Ticker            SPY        SPY        SPY        SPY        SPY
Date                                                             
2010-01-04  85.027992  85.073007  83.662503  84.307735  118944600
2010-01-05  85.253052  85.290567  84.667843  84.975456  111579900
2010-01-06  85.313080  85.523154  85.103005  85.170527  116074400

üìà √öltimas 3 li

In [2]:
# Verificar GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("GPU Devices: ", tf.config.list_physical_devices('GPU'))

# Se quiser for√ßar uso de GPU
if len(tf.config.list_physical_devices('GPU')) > 0:
    print("‚úÖ GPU detectada! TensorFlow usar√° GPU automaticamente.")
else:
    print("‚ö†Ô∏è Nenhuma GPU detectada. Usando CPU.")

NameError: name 'tf' is not defined

In [None]:
"""
========================================
SCRIPT 2: PR√â-PROCESSAMENTO (CORRIGIDO)
Vers√£o Limpa - SEM data leakage
+ Convers√£o de tipos
========================================
"""

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pickle
import os


class StockDataPreprocessor:
    """Pr√©-processador limpo e auditado"""
    
    def __init__(self, df):
        self.df = df.copy()
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.feature_columns = []
    
    def convert_to_numeric(self):
        """
        ‚úÖ Converte todas as colunas para num√©rico
        Resolve problema de dados lidos como string
        """
        print("\nüîÑ Convertendo colunas para num√©rico...")
        
        numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
        
        for col in numeric_columns:
            if col in self.df.columns:
                # Converter para num√©rico, erros viram NaN
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
        
        # Verificar tipos
        print(f"   ‚úì Tipos ap√≥s convers√£o:")
        for col in numeric_columns:
            if col in self.df.columns:
                print(f"      {col}: {self.df[col].dtype}")
        
        # Verificar NaN ap√≥s convers√£o
        nan_count = self.df[numeric_columns].isnull().sum()
        if nan_count.sum() > 0:
            print(f"\n   ‚ö†Ô∏è NaNs ap√≥s convers√£o (valores n√£o-num√©ricos):")
            print(nan_count[nan_count > 0])
        else:
            print(f"   ‚úì Sem NaNs ap√≥s convers√£o")
        
        return self.df
        
    def handle_missing_values(self):
        """
        ‚úÖ Trata valores ausentes SEM usar informa√ß√£o futura
        """
        print("\nüîß Tratando valores ausentes...")
        
        print(f"Valores ausentes ANTES:")
        nan_count = self.df.isnull().sum()
        if nan_count.sum() > 0:
            print(nan_count[nan_count > 0])
        else:
            print("   ‚úÖ Nenhum")
        
        # Forward fill apenas
        self.df = self.df.ffill()
        
        # Remover NaN restantes do in√≠cio
        if self.df.isnull().any().any():
            rows_before = len(self.df)
            self.df = self.df.dropna()
            print(f"   ‚úì Removidas {rows_before - len(self.df)} linhas iniciais com NaN")
        
        print(f"\nValores ausentes AP√ìS:")
        nan_count = self.df.isnull().sum()
        if nan_count.sum() > 0:
            print(nan_count[nan_count > 0])
        else:
            print("   ‚úÖ Nenhum")
        
        return self.df
    
    def create_technical_indicators(self):
        """Cria indicadores t√©cnicos"""
        print("\nüìà Criando indicadores t√©cnicos...")
        
        # Moving Averages
        self.df['MA_7'] = self.df['Close'].rolling(window=7).mean()
        self.df['MA_21'] = self.df['Close'].rolling(window=21).mean()
        self.df['MA_50'] = self.df['Close'].rolling(window=50).mean()
        
        # Exponential Moving Averages
        self.df['EMA_12'] = self.df['Close'].ewm(span=12, adjust=False).mean()
        self.df['EMA_26'] = self.df['Close'].ewm(span=26, adjust=False).mean()
        
        # MACD
        self.df['MACD'] = self.df['EMA_12'] - self.df['EMA_26']
        self.df['MACD_Signal'] = self.df['MACD'].ewm(span=9, adjust=False).mean()
        
        # RSI
        delta = self.df['Close'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / loss
        self.df['RSI'] = 100 - (100 / (1 + rs))
        
        # Bollinger Bands
        self.df['BB_Middle'] = self.df['Close'].rolling(window=20).mean()
        bb_std = self.df['Close'].rolling(window=20).std()
        self.df['BB_Upper'] = self.df['BB_Middle'] + (bb_std * 2)
        self.df['BB_Lower'] = self.df['BB_Middle'] - (bb_std * 2)
        
        # Volume
        self.df['Volume_MA_7'] = self.df['Volume'].rolling(window=7).mean()
        
        # Price changes
        self.df['Price_Change'] = self.df['Close'].pct_change()
        self.df['High_Low_Pct'] = (self.df['High'] - self.df['Low']) / self.df['Close']
        
        # Volatility
        self.df['Volatility'] = self.df['Close'].rolling(window=20).std()
        self.df['ATR'] = self.df['High'] - self.df['Low']
        
        # Remover NaN
        rows_before = len(self.df)
        self.df = self.df.dropna()
        removed = rows_before - len(self.df)
        
        print(f"   ‚úì Indicadores criados: {len(self.df.columns)} colunas")
        print(f"   ‚úì Removidas {removed} linhas (warm-up)")
        print(f"   ‚úì Registros finais: {len(self.df)}")
        
        return self.df
    
    def select_features(self):
        """Seleciona features"""
        self.feature_columns = [
            'Open', 'High', 'Low', 'Close', 'Volume',
            'MA_7', 'MA_21', 'MA_50',
            'EMA_12', 'EMA_26',
            'MACD', 'MACD_Signal',
            'RSI',
            'BB_Upper', 'BB_Middle', 'BB_Lower',
            'Volume_MA_7',
            'Price_Change', 'High_Low_Pct',
            'Volatility', 'ATR'
        ]
        
        print(f"\nüéØ Features selecionadas: {len(self.feature_columns)}")
        for i, feat in enumerate(self.feature_columns, 1):
            print(f"   {i:2d}. {feat}")
        
        return self.feature_columns
    
    def create_sequences(self, df, seq_length=60, target_column='Close'):
        """Cria sequ√™ncias LSTM"""
        print(f"\nüî¢ Criando sequ√™ncias...")
        print(f"   Lookback: {seq_length} timesteps")
        
        data = df.values
        target_idx = self.feature_columns.index(target_column)
        
        X, y = [], []
        
        for i in range(seq_length, len(data)):
            X.append(data[i-seq_length:i])
            y.append(data[i, target_idx])
        
        X = np.array(X)
        y = np.array(y)
        
        print(f"   ‚úì X shape: {X.shape}")
        print(f"   ‚úì y shape: {y.shape}")
        print(f"   ‚úì Sequ√™ncias: {len(X)}")
        
        return X, y
    
    def split_and_normalize(self, X, y, train_split=0.65, val_split=0.20):
        """Split temporal + normaliza√ß√£o"""
        print(f"\n‚úÇÔ∏è Dividindo dados...")
        
        train_size = int(len(X) * train_split)
        val_size = int(len(X) * val_split)
        
        X_train = X[:train_size]
        y_train = y[:train_size]
        
        X_val = X[train_size:train_size+val_size]
        y_val = y[train_size:train_size+val_size]
        
        X_test = X[train_size+val_size:]
        y_test = y[train_size+val_size:]
        
        print(f"   ‚úì Treino: {len(X_train)} ({train_split*100:.0f}%)")
        print(f"   ‚úì Val: {len(X_val)} ({val_split*100:.0f}%)")
        print(f"   ‚úì Teste: {len(X_test)} ({(1-train_split-val_split)*100:.0f}%)")
        
        # Normaliza√ß√£o
        print(f"\nüîÑ Normalizando...")
        print(f"   ‚úÖ Fit APENAS no treino")
        
        n_samples_train, n_timesteps, n_features = X_train.shape
        X_train_reshaped = X_train.reshape(-1, n_features)
        
        self.scaler.fit(X_train_reshaped)
        
        X_train_scaled = self.scaler.transform(X_train_reshaped).reshape(n_samples_train, n_timesteps, n_features)
        X_val_scaled = self.scaler.transform(X_val.reshape(-1, n_features)).reshape(len(X_val), n_timesteps, n_features)
        X_test_scaled = self.scaler.transform(X_test.reshape(-1, n_features)).reshape(len(X_test), n_timesteps, n_features)
        
        # Normalizar y
        close_idx = self.feature_columns.index('Close')
        
        y_train_full = np.zeros((len(y_train), n_features))
        y_val_full = np.zeros((len(y_val), n_features))
        y_test_full = np.zeros((len(y_test), n_features))
        
        y_train_full[:, close_idx] = y_train
        y_val_full[:, close_idx] = y_val
        y_test_full[:, close_idx] = y_test
        
        y_train_scaled = self.scaler.transform(y_train_full)[:, close_idx]
        y_val_scaled = self.scaler.transform(y_val_full)[:, close_idx]
        y_test_scaled = self.scaler.transform(y_test_full)[:, close_idx]
        
        print(f"   ‚úÖ Normaliza√ß√£o completa")
        
        return (X_train_scaled, y_train_scaled), (X_val_scaled, y_val_scaled), (X_test_scaled, y_test_scaled)
    
    def save_preprocessor(self, filename='preprocessor.pkl'):
        """Salva preprocessor"""
        filepath = os.path.join('/kaggle/working/', filename)
        
        preprocessor_data = {
            'scaler': self.scaler,
            'feature_columns': self.feature_columns
        }
        
        with open(filepath, 'wb') as f:
            pickle.dump(preprocessor_data, f)
        
        print(f"\nüíæ Preprocessor salvo: {filepath}")
        return filepath


def main():
    """Pipeline completo"""
    
    print("\n" + "="*80)
    print("PR√â-PROCESSAMENTO - 100% LIVRE DE DATA LEAKAGE")
    print("="*80)
    
    # Configura√ß√£o
    INPUT_FILE = 'spy_raw.csv'
    
    # Carregar
    print(f"\nüìÇ Carregando: /kaggle/working/{INPUT_FILE}")
    data_path = os.path.join('/kaggle/working/', INPUT_FILE)
    
    if not os.path.exists(data_path):
        print(f"‚ùå Arquivo n√£o encontrado: {data_path}")
        return None
    
    df = pd.read_csv(data_path, index_col=0, parse_dates=True)
    print(f"   ‚úì Carregados {len(df)} registros")
    
    # Preprocessor
    preprocessor = StockDataPreprocessor(df)
    
    # Pipeline
    print(f"\n{'='*80}")
    print("EXECUTANDO PIPELINE")
    print(f"{'='*80}")
    
    preprocessor.convert_to_numeric()  # ‚Üê NOVA LINHA!
    preprocessor.handle_missing_values()
    preprocessor.create_technical_indicators()
    preprocessor.select_features()
    
    df_features = preprocessor.df[preprocessor.feature_columns]
    
    X, y = preprocessor.create_sequences(df_features, seq_length=60)
    
    train_data, val_data, test_data = preprocessor.split_and_normalize(
        X, y, train_split=0.65, val_split=0.20
    )
    
    # Salvar
    print(f"\nüíæ Salvando dados...")
    np.save('/kaggle/working/X_train.npy', train_data[0])
    np.save('/kaggle/working/y_train.npy', train_data[1])
    np.save('/kaggle/working/X_val.npy', val_data[0])
    np.save('/kaggle/working/y_val.npy', val_data[1])
    np.save('/kaggle/working/X_test.npy', test_data[0])
    np.save('/kaggle/working/y_test.npy', test_data[1])
    print(f"   ‚úì 6 arquivos .npy salvos")
    
    preprocessor.save_preprocessor()
    
    print(f"\n{'='*80}")
    print("‚úÖ PR√â-PROCESSAMENTO CONCLU√çDO")
    print(f"{'='*80}")
    print(f"\nüìã Checklist:")
    print(f"   ‚úÖ Convers√£o para num√©rico")
    print(f"   ‚úÖ Missing values (forward fill)")
    print(f"   ‚úÖ Indicadores (janela passada)")
    print(f"   ‚úÖ Sequ√™ncias (sem leakage)")
    print(f"   ‚úÖ Split (temporal)")
    print(f"   ‚úÖ Normaliza√ß√£o (fit no treino)")
    
    return preprocessor, train_data, val_data, test_data


if __name__ == "__main__":
    result = main()

In [None]:
# Verificar GPU
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("GPU Devices: ", tf.config.list_physical_devices('GPU'))

# Se quiser for√ßar uso de GPU
if len(tf.config.list_physical_devices('GPU')) > 0:
    print("‚úÖ GPU detectada! TensorFlow usar√° GPU automaticamente.")
else:
    print("‚ö†Ô∏è Nenhuma GPU detectada. Usando CPU.")

In [None]:
"""
========================================
SCRIPT 3: MODELAGEM LSTM
Vers√£o Final - Treinamento e Avalia√ß√£o
========================================
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import os
import pickle

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

from sklearn.metrics import mean_absolute_error, mean_squared_error

# Seed para reprodutibilidade
np.random.seed(42)
tf.random.set_seed(42)


class LSTMStockPredictor:
    """Modelo LSTM para predi√ß√£o de a√ß√µes"""
    
    def __init__(self, input_shape):
        self.input_shape = input_shape
        self.model = None
        self.history = None
        
    def build_model(self, lstm_units=[128, 64, 32], dropout_rate=0.2, learning_rate=0.001):
        """Constr√≥i arquitetura LSTM"""
        print("\nüèóÔ∏è Construindo modelo LSTM...")
        
        model = Sequential()
        
        # Primeira LSTM (retorna sequ√™ncias)
        model.add(LSTM(units=lstm_units[0], return_sequences=True, input_shape=self.input_shape))
        model.add(Dropout(dropout_rate))
        
        # LSTMs intermedi√°rias
        for units in lstm_units[1:-1]:
            model.add(LSTM(units=units, return_sequences=True))
            model.add(Dropout(dropout_rate))
        
        # √öltima LSTM (n√£o retorna sequ√™ncias)
        model.add(LSTM(units=lstm_units[-1], return_sequences=False))
        model.add(Dropout(dropout_rate))
        
        # Dense layers
        model.add(Dense(units=25, activation='relu'))
        model.add(Dropout(dropout_rate))
        model.add(Dense(units=1))
        
        # Compilar
        optimizer = Adam(learning_rate=learning_rate)
        model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae'])
        
        self.model = model
        
        print(f"   ‚úì Modelo constru√≠do!")
        print(f"\nüìã Arquitetura:")
        model.summary()
        
        return model
    
    def train(self, X_train, y_train, X_val, y_val, epochs=100, batch_size=32, verbose=1):
        """Treina o modelo"""
        print("\nüéì Iniciando treinamento...")
        
        # Callbacks
        early_stopping = EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True,
            verbose=1
        )
        
        model_checkpoint = ModelCheckpoint(
            '/kaggle/working/best_model.keras',
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        )
        
        reduce_lr = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=1
        )
        
        # Treinar
        history = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stopping, model_checkpoint, reduce_lr],
            verbose=verbose
        )
        
        self.history = history
        print("\n‚úÖ Treinamento conclu√≠do!")
        
        return history
    
    def evaluate(self, X_test, y_test, scaler, feature_columns):
        """Avalia o modelo e calcula m√©tricas"""
        print("\nüìä Avaliando modelo...")
        
        # Predi√ß√µes
        y_pred = self.model.predict(X_test)
        
        # Desnormalizar
        close_idx = feature_columns.index('Close')
        
        y_test_full = np.zeros((len(y_test), len(feature_columns)))
        y_pred_full = np.zeros((len(y_pred), len(feature_columns)))
        
        y_test_full[:, close_idx] = y_test
        y_pred_full[:, close_idx] = y_pred.flatten()
        
        y_test_original = scaler.inverse_transform(y_test_full)[:, close_idx]
        y_pred_original = scaler.inverse_transform(y_pred_full)[:, close_idx]
        
        # M√©tricas
        mae = mean_absolute_error(y_test_original, y_pred_original)
        rmse = np.sqrt(mean_squared_error(y_test_original, y_pred_original))
        mape = np.mean(np.abs((y_test_original - y_pred_original) / y_test_original)) * 100
        
        print(f"\nüìà M√âTRICAS DE AVALIA√á√ÉO:")
        print(f"   MAE (Mean Absolute Error): ${mae:.4f}")
        print(f"   RMSE (Root Mean Square Error): ${rmse:.4f}")
        print(f"   MAPE (Mean Absolute Percentage Error): {mape:.2f}%")
        
        return {
            'y_test': y_test_original,
            'y_pred': y_pred_original,
            'mae': mae,
            'rmse': rmse,
            'mape': mape
        }
    
    def plot_training_history(self):
        """Plota hist√≥rico de treinamento"""
        if self.history is None:
            print("‚ö†Ô∏è Modelo ainda n√£o foi treinado!")
            return
        
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        
        # Loss
        ax1.plot(self.history.history['loss'], label='Treino')
        ax1.plot(self.history.history['val_loss'], label='Valida√ß√£o')
        ax1.set_title('Loss durante Treinamento')
        ax1.set_xlabel('√âpoca')
        ax1.set_ylabel('Loss (MSE)')
        ax1.legend()
        ax1.grid(True)
        
        # MAE
        ax2.plot(self.history.history['mae'], label='Treino')
        ax2.plot(self.history.history['val_mae'], label='Valida√ß√£o')
        ax2.set_title('MAE durante Treinamento')
        ax2.set_xlabel('√âpoca')
        ax2.set_ylabel('MAE')
        ax2.legend()
        ax2.grid(True)
        
        plt.tight_layout()
        plt.savefig('/kaggle/working/training_history.png', dpi=300, bbox_inches='tight')
        print(f"üíæ Gr√°fico salvo: /kaggle/working/training_history.png")
        plt.close()
    
    def plot_predictions(self, evaluation_results, num_points=500):
        """Plota predi√ß√µes vs valores reais"""
        y_test = evaluation_results['y_test'][-num_points:]
        y_pred = evaluation_results['y_pred'][-num_points:]
        
        plt.figure(figsize=(15, 6))
        plt.plot(y_test, label='Valor Real', linewidth=2, alpha=0.8)
        plt.plot(y_pred, label='Predi√ß√£o', linewidth=2, alpha=0.8)
        plt.title(f'Predi√ß√£o vs Valor Real (√∫ltimos {num_points} pontos)', fontsize=14)
        plt.xlabel('Timestep')
        plt.ylabel('Pre√ßo de Fechamento ($)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig('/kaggle/working/predictions_vs_real.png', dpi=300, bbox_inches='tight')
        print(f"üíæ Gr√°fico salvo: /kaggle/working/predictions_vs_real.png")
        plt.close()
    
    def plot_error_distribution(self, evaluation_results):
        """Plota distribui√ß√£o dos erros"""
        y_test = evaluation_results['y_test']
        y_pred = evaluation_results['y_pred']
        errors = y_test - y_pred
        
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        
        # Histograma
        ax1.hist(errors, bins=50, edgecolor='black', alpha=0.7)
        ax1.set_title('Distribui√ß√£o dos Erros')
        ax1.set_xlabel('Erro ($)')
        ax1.set_ylabel('Frequ√™ncia')
        ax1.axvline(x=0, color='r', linestyle='--', linewidth=2)
        ax1.grid(True, alpha=0.3)
        
        # Scatter
        ax2.scatter(y_test, y_pred, alpha=0.5)
        ax2.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 
                 'r--', linewidth=2, label='Predi√ß√£o Perfeita')
        ax2.set_title('Predi√ß√£o vs Valor Real')
        ax2.set_xlabel('Valor Real ($)')
        ax2.set_ylabel('Predi√ß√£o ($)')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('/kaggle/working/error_analysis.png', dpi=300, bbox_inches='tight')
        print(f"üíæ Gr√°fico salvo: /kaggle/working/error_analysis.png")
        plt.close()
    
    def save_model(self, filename='lstm_model.keras'):
        """Salva modelo"""
        if self.model is None:
            print("‚ö†Ô∏è Nenhum modelo para salvar!")
            return
        
        filepath = os.path.join('/kaggle/working/', filename)
        self.model.save(filepath)
        print(f"\nüíæ Modelo salvo: {filepath}")
        return filepath


def main():
    """Executa treinamento completo"""
    
    print("\n" + "="*80)
    print("MODELAGEM LSTM")
    print("="*80)
    
    # Carregar dados
    print("\nüìÇ Carregando dados pr√©-processados...")
    
    try:
        X_train = np.load('/kaggle/working/X_train.npy')
        y_train = np.load('/kaggle/working/y_train.npy')
        X_val = np.load('/kaggle/working/X_val.npy')
        y_val = np.load('/kaggle/working/y_val.npy')
        X_test = np.load('/kaggle/working/X_test.npy')
        y_test = np.load('/kaggle/working/y_test.npy')
        
        with open('/kaggle/working/preprocessor.pkl', 'rb') as f:
            preprocessor_data = pickle.load(f)
        
        scaler = preprocessor_data['scaler']
        feature_columns = preprocessor_data['feature_columns']
        
        print(f"   ‚úì Dados carregados!")
        print(f"   ‚úì X_train: {X_train.shape}")
        print(f"   ‚úì X_val: {X_val.shape}")
        print(f"   ‚úì X_test: {X_test.shape}")
        
    except FileNotFoundError as e:
        print(f"‚ùå Erro: {e}")
        print("   Execute o script de pr√©-processamento primeiro!")
        return None
    
    # Criar modelo
    input_shape = (X_train.shape[1], X_train.shape[2])
    predictor = LSTMStockPredictor(input_shape)
    
    # Construir
    predictor.build_model(
        lstm_units=[128, 64, 32],
        dropout_rate=0.2,
        learning_rate=0.001
    )
    
    # Treinar
    history = predictor.train(
        X_train, y_train,
        X_val, y_val,
        epochs=100,
        batch_size=32,
        verbose=1
    )
    
    # Visualizar hist√≥rico
    predictor.plot_training_history()
    
    # Avaliar
    evaluation_results = predictor.evaluate(X_test, y_test, scaler, feature_columns)
    
    # Visualizar predi√ß√µes
    predictor.plot_predictions(evaluation_results, num_points=500)
    predictor.plot_error_distribution(evaluation_results)
    
    # Salvar modelo
    predictor.save_model('lstm_stock_model.keras')
    
    # Salvar m√©tricas
    metrics_df = pd.DataFrame([{
        'MAE': evaluation_results['mae'],
        'RMSE': evaluation_results['rmse'],
        'MAPE': evaluation_results['mape'],
        'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    }])
    metrics_df.to_csv('/kaggle/working/model_metrics.csv', index=False)
    print(f"üíæ M√©tricas salvas: /kaggle/working/model_metrics.csv")
    
    print("\n" + "="*80)
    print("‚úÖ MODELAGEM COMPLETA!")
    print("="*80)
    print(f"\nüìÅ Arquivos gerados:")
    print(f"   ‚Ä¢ lstm_stock_model.keras")
    print(f"   ‚Ä¢ best_model.keras")
    print(f"   ‚Ä¢ training_history.png")
    print(f"   ‚Ä¢ predictions_vs_real.png")
    print(f"   ‚Ä¢ error_analysis.png")
    print(f"   ‚Ä¢ model_metrics.csv")
    
    return predictor, evaluation_results


if __name__ == "__main__":
    result = main()