Here’s a self-contained template that pulls 5-minute OHLCV data, trains an LSTM to predict next-period returns, turns those predictions into long/flat signals, and backtests the signals with **vectorbt**. You can drop this into a Jupyter notebook or a `.py` file—just adjust the `SYMBOL`, date range, look-back window, etc., as needed.

---

### What’s happening

1. **Data download**:
   We grab 5-min OHLCV for `SYMBOL` from Yahoo Finance (note: Yahoo only serves up to \~60 days of intraday data).

2. **Preprocessing**:

   * **StandardScaler** normalizes the close price for more stable LSTM training.
   * We slide a window of `LOOKBACK` bars to build each input sequence.
   * The target is the *next* 5-min return.

3. **Model**:
   A two-layer LSTM with dropout, optimized on mean squared error.

4. **Signals**:
   We go **long** when the model predicts a positive return, flat otherwise.

5. **Backtest**:
   Using **vectorbt** at a 5-minute frequency, with 0.1% round-trip fees.

---

### Next Steps

* **Expand features**: add volume, RSI, momentum, or other technicals into your sequences.
* **Hyperparameter tuning**: experiment with look-back length, LSTM units, batch size.
* **Walk-forward validation**: roll your train/test windows to avoid overfitting.
* **Stop-loss / take-profit** rules: integrate via custom exit signals.

Let me know if you run into any issues or want to extend this (e.g., classification thresholding, multi-symbol portfolios, live execution hooks)!


In [12]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.callbacks import EarlyStopping
import vectorbt as vbt

# ────────── 1. PARAMETERS ──────────
SYMBOL     = "AAPL"     # ticker to backtest
PERIOD     = "60d"      # last 60 days of 5m data
INTERVAL   = "5m"
LOOKBACK   = 20         # bars per LSTM input sequence
TEST_RATIO = 0.2
EPOCHS     = 20
BATCH_SIZE = 64

# ────────── 2. FETCH INTRADAY DATA ──────────
data = yf.download(
    SYMBOL,
    period=PERIOD,
    interval=INTERVAL,
    progress=False
)
if data.empty:
    raise ValueError("No intraday data – check your symbol/interval!")
close_series = data["Close"].dropna()  # pandas Series (1-D)
print(f"Fetched {len(close_series)} bars from {close_series.index.min().date()} to {close_series.index.max().date()}")

# ────────── 3. SCALE CLOSE PRICE ──────────
scaler    = StandardScaler()
# Use .values.reshape(-1,1) to give scaler a 2-D array
close_s   = scaler.fit_transform(close_series.values.reshape(-1, 1)).ravel()

# Convert to numpy arrays
close_arr  = close_series.values        # shape (N,)
scaled_arr = close_s                    # shape (N,)

# ────────── 4. BUILD SEQUENCES & TARGETS ──────────
seqs, targets, idxs = [], [], []
for i in range(LOOKBACK, len(close_arr) - 1):
    seqs.append(scaled_arr[i - LOOKBACK : i])               # last LOOKBACK scalars
    targets.append((close_arr[i + 1] - close_arr[i]) / close_arr[i])  # next-bar return
    idxs.append(close_series.index[i])                      # timestamp

X = np.array(seqs).reshape(-1, LOOKBACK, 1)  # (samples, timesteps, features)
y = np.array(targets)                        # (samples,)
idxs = pd.DatetimeIndex(idxs)

# ────────── 5. TRAIN/TEST SPLIT ──────────
n_samples = len(X)
split     = int(n_samples * (1 - TEST_RATIO))

X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
idx_train, idx_test = idxs[:split], idxs[split:]

# ────────── 6. DEFINE LSTM MODEL ──────────
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(LOOKBACK, 1)),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer="adam", loss="mse")

# ────────── 7. TRAIN WITH EARLY STOPPING ──────────
es = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es],
    verbose=1
)

# ────────── 8. PREDICT & FLATTEN ──────────
y_pred_raw = model.predict(X_test)
print(f"Raw predictions shape: {y_pred_raw.shape}")
y_pred = y_pred_raw.flatten()  # Ensure 1D array
print(f"Flattened predictions shape: {y_pred.shape}")

# ────────── 9. BUILD SIGNALS ──────────
# Align raw closes with our sequences:
close_seq  = close_arr[LOOKBACK : -1]   # drop first LOOKBACK and last bar
close_test = close_seq[split:]          # test portion

# Verify both are 1-D and same length
print("Test len:", len(idx_test), 
      "predictions:", y_pred.shape, 
      "closes:", close_test.shape)

# Ensure all arrays have matching lengths
min_len = min(len(idx_test), len(y_pred), len(close_test))
idx_test_aligned = idx_test[:min_len]
y_pred_aligned = y_pred[:min_len]
close_test_aligned = close_test[:min_len]

print(f"Aligned lengths - idx: {len(idx_test_aligned)}, pred: {len(y_pred_aligned)}, close: {len(close_test_aligned)}")

signals = pd.DataFrame({
    "Close":    close_test_aligned,
    "pred_ret": y_pred_aligned
}, index=idx_test_aligned)

entries = signals["pred_ret"] > 0
exits   = ~entries  # exit whenever pred_ret ≤ 0

# ────────── 10. BACKTEST WITH VECTORBT ──────────
pf = vbt.Portfolio.from_signals(
    close=signals["Close"],
    entries=entries,
    exits=exits,
    init_cash=100_000,
    fees=0.001,
    freq="5T"
)

# ────────── 11. PERFORMANCE & PLOT ──────────
print("\n=== Performance Summary ===")
print(f"Total Return      : {pf.total_return() * 100:.2f}%")
print(f"Annualized Return : {pf.annualized_return() * 100:.2f}%")
print(f"Sharpe Ratio      : {pf.sharpe_ratio():.2f}")
print(f"Max Drawdown      : {pf.max_drawdown() * 100:.2f}%")

pf.plot().show()


  data = yf.download(


Fetched 4642 bars from 2025-04-11 to 2025-07-09
Epoch 1/20


  super().__init__(**kwargs)


[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 0.0013 - val_loss: 8.6706e-06
Epoch 2/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 0.0013 - val_loss: 8.6706e-06
Epoch 2/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1.4651e-04 - val_loss: 2.7440e-06
Epoch 3/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1.4651e-04 - val_loss: 2.7440e-06
Epoch 3/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 7.9805e-05 - val_loss: 5.7172e-06
Epoch 4/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 7.9805e-05 - val_loss: 5.7172e-06
Epoch 4/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 4.6943e-05 - val_loss: 5.8353e-06
Epoch 5/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 4.6943e-05 - val_loss: 5.8353e-06
Epo

ValueError: Data must be 1-dimensional, got ndarray of shape (925, 1) instead