# Tech Challenge – Fase 4: LSTM + API (Notebook Robusto)

## Fase 0 – Configurações

In [None]:

SYMBOL = "AAPL"
START_DATE = "2018-01-01"
END_DATE = None
USE_YFINANCE = True  # será sobrescrito automaticamente se data/sample.csv existir

LOOKBACK = 60
TEST_SIZE = 0.2
VAL_SIZE = 0.1
RANDOM_SEED = 42

MODELS_DIR = "models"
ARTIFACTS_DIR = "artifacts"

import os
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(ARTIFACTS_DIR, exist_ok=True)

# Se existir CSV local, ativamos modo offline automaticamente
if os.path.exists("data/sample.csv"):
    print("Detectado data/sample.csv → usando modo OFFLINE (USE_YFINANCE=False)")
    USE_YFINANCE = False


## Fase 1 – Coleta & Pré-processamento (robusto)

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

np.random.seed(RANDOM_SEED)

def _read_local_csv(csv_path="data/sample.csv"):
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path, parse_dates=["Date"])
        if "Close" not in df.columns:
            raise ValueError("CSV local não possui coluna 'Close'")
        return df.sort_values("Date").reset_index(drop=True)
    return None

def load_data(symbol=SYMBOL, start=START_DATE, end=END_DATE, use_yf=USE_YFINANCE):
    """Carrega dados de preços com robustez.
    1) Se USE_YFINANCE=False e existir CSV local → usa CSV
    2) Se USE_YFINANCE=True → tenta baixar via yfinance por dois caminhos
    3) Se falhar, tenta CSV local
    """
    local = _read_local_csv()
    if local is not None and not use_yf:
        print("Usando CSV local (offline).")
        return local

    if use_yf:
        try:
            import yfinance as yf
        except Exception as e:
            print("yfinance indisponível:", e)
            local = _read_local_csv()
            if local is not None:
                print("Fallback para CSV local.")
                return local
            raise

        # Tentativa A: yf.download
        for attempt in range(2):
            try:
                df = yf.download(
                    symbol, start=start, end=end, interval="1d",
                    progress=False, group_by="ticker", auto_adjust=False, threads=False
                )
                if isinstance(df, pd.DataFrame) and not df.empty:
                    if isinstance(df.columns, pd.MultiIndex):
                        if symbol in df.columns.levels[0]:
                            df = df[symbol]
                        else:
                            df.columns = [c[-1] for c in df.columns]
                    df = df.rename_axis("Date").reset_index()
                    if "Close" in df.columns and not df.empty:
                        print(f"Baixado com yf.download (tentativa {attempt+1}).")
                        return df.sort_values("Date").reset_index(drop=True)
            except Exception as e:
                print(f"yf.download falhou (tentativa {attempt+1}):", e)

        # Tentativa B: Ticker().history
        for attempt in range(2):
            try:
                tkr = yf.Ticker(symbol)
                df = tkr.history(start=start, end=end, interval="1d", auto_adjust=False)
                if isinstance(df, pd.DataFrame) and not df.empty:
                    df = df.rename_axis("Date").reset_index()
                    if "Close" in df.columns:
                        print(f"Baixado com Ticker().history (tentativa {attempt+1}).")
                        return df.sort_values("Date").reset_index(drop=True)
            except Exception as e:
                print(f"Ticker().history falhou (tentativa {attempt+1}):", e)

        print("Aviso: não foi possível baixar via yfinance.")

    # Fallback final
    local = _read_local_csv()
    if local is not None:
        print("Fallback: usando CSV local em data/sample.csv.")
        return local

    raise FileNotFoundError("Sem dados: yfinance falhou e não há arquivo local em data/sample.csv")

df = load_data()
df = df.sort_values("Date").reset_index(drop=True)
df.head()


### Visualização rápida do Close

In [None]:

plt.figure()
plt.plot(df["Date"], df["Close"])
plt.title(f"Fechamento – {SYMBOL}")
plt.xlabel("Data")
plt.ylabel("Close")
plt.show()


## Fase 2 – EDA & Preparação de Janelas

In [None]:

from sklearn.preprocessing import MinMaxScaler

prices = df[["Close"]].copy()

scaler = MinMaxScaler()
prices_scaled = scaler.fit_transform(prices.values)

def make_windowed_dataset(series, lookback=60):
    X, y = [], []
    for i in range(lookback, len(series)):
        X.append(series[i - lookback:i])
        y.append(series[i])
    X = np.array(X).reshape(-1, lookback, 1)
    y = np.array(y).reshape(-1, 1)
    return X, y

X, y = make_windowed_dataset(prices_scaled, lookback=LOOKBACK)
len(X), X.shape, y.shape


### Split temporal

In [None]:

def time_series_split(X, y, test_size=0.2, val_size=0.1):
    n = len(X)
    n_test = int(n * test_size)
    n_trainval = n - n_test
    n_val = int(n_trainval * val_size)
    n_train = n_trainval - n_val

    X_train, y_train = X[:n_train], y[:n_train]
    X_val, y_val = X[n_train:n_train+n_val], y[n_train:n_train+n_val]
    X_test, y_test = X[n_train+n_val:], y[n_train+n_val:]
    return (X_train, y_train), (X_val, y_val), (X_test, y_test)

(X_train, y_train), (X_val, y_val), (X_test, y_test) = time_series_split(X, y, TEST_SIZE, VAL_SIZE)
(len(X_train), len(X_val), len(X_test))


## Fase 3 – Modelagem LSTM

In [None]:

from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

def build_lstm_model(lookback=60):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(lookback, 1)),
        Dropout(0.2),
        LSTM(32),
        Dense(1)
    ])
    model.compile(optimizer="adam", loss="mse")
    return model

model = build_lstm_model(lookback=LOOKBACK)
model.summary()


## Fase 4 – Treino & Avaliação

In [None]:

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import mean_absolute_error, mean_squared_error

EPOCHS = 10  # fácil de ajustar para smoke test
BATCH_SIZE = 64

es = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
rlr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1)
ckpt = ModelCheckpoint(filepath=os.path.join(MODELS_DIR, "checkpoint.keras"),
                       monitor="val_loss", save_best_only=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es, rlr, ckpt],
    verbose=1
)

# Avaliação em teste
yhat_test_scaled = model.predict(X_test)
y_test_inv = scaler.inverse_transform(y_test)
yhat_test_inv = scaler.inverse_transform(yhat_test_scaled)

mae = mean_absolute_error(y_test_inv, yhat_test_inv)
rmse = mean_squared_error(y_test_inv, yhat_test_inv, squared=False)
mape = np.mean(np.abs((y_test_inv - yhat_test_inv) / (y_test_inv + 1e-8))) * 100

print(f"MAE : {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAPE: {mape:.2f}%")


### Real vs. Previsto (últimos 200 pontos)

In [None]:

plt.figure()
plt.plot(y_test_inv[-200:], label="Real")
plt.plot(yhat_test_inv[-200:], label="Previsto")
plt.legend(); plt.title("Teste – Real vs Previsto"); plt.show()


## Fase 5 – Salvamento do Modelo & Artefatos

In [None]:

import joblib
joblib.dump(scaler, os.path.join(ARTIFACTS_DIR, "scaler.joblib"))
model.save(os.path.join(MODELS_DIR, "lstm_model.keras"))
print("Salvo artifacts/scaler.joblib e models/lstm_model.keras")
