# Zipline-Reloaded Comparison

This notebook compares **ml4t.backtest** against **Zipline-reloaded** using:
- Quandl WIKI Prices dataset (historical equity data)
- NYSE trading calendar alignment
- Programmatic bundle creation

## Strategy
Simple momentum strategy:
- **Entry**: Price > 20-day high
- **Exit**: Price < 20-day low
- **Universe**: Single stock (AAPL)

## Note on Zipline
Zipline uses a bundle system for data. This notebook creates a custom bundle programmatically to match our test data.

In [None]:
# Configuration
ASSET = "AAPL"
START_DATE = "2015-01-02"  # NYSE trading day
END_DATE = "2018-12-31"
INITIAL_CASH = 100_000.0
LOOKBACK = 20
POSITION_PCT = 0.95

In [None]:
from datetime import datetime
from pathlib import Path

import pandas as pd
import polars as pl

# Load WIKI prices (Quandl historical data)
WIKI_PATH = Path.home() / "Dropbox" / "ml4t" / "data" / "equities" / "wiki_prices.parquet"

df_wiki = pl.read_parquet(WIKI_PATH)
print(f"WIKI Prices: {df_wiki.shape[0]:,} rows")
print(f"Columns: {df_wiki.columns}")

In [None]:
# Filter to AAPL and date range
df = (
    df_wiki.filter(pl.col("ticker") == ASSET)
    .filter(pl.col("date") >= datetime.fromisoformat(START_DATE))
    .filter(pl.col("date") <= datetime.fromisoformat(END_DATE))
    .sort("date")
    # Use adjusted prices for fair comparison
    .select(
        [
            pl.col("date").alias("timestamp"),
            pl.lit(ASSET).alias("asset"),
            pl.col("adj_open").alias("open"),
            pl.col("adj_high").alias("high"),
            pl.col("adj_low").alias("low"),
            pl.col("adj_close").alias("close"),
            pl.col("adj_volume").alias("volume"),
        ]
    )
)

print(f"\n{ASSET} data: {len(df)} bars")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
df.head()

## 1. ml4t.backtest Implementation

In [None]:
from ml4t.backtest import BacktestAnalyzer, DataFeed, Engine, ExecutionMode, OrderSide, Strategy


class BreakoutStrategy(Strategy):
    """Donchian channel breakout strategy."""

    def __init__(self, asset, lookback, position_pct):
        self.asset = asset
        self.lookback = lookback
        self.position_pct = position_pct
        self.highs = []
        self.lows = []

    def on_data(self, timestamp, data, context, broker):
        if self.asset not in data:
            return

        high = data[self.asset]["high"]
        low = data[self.asset]["low"]
        close = data[self.asset]["close"]

        self.highs.append(high)
        self.lows.append(low)

        if len(self.highs) > self.lookback:
            self.highs.pop(0)
            self.lows.pop(0)

        if len(self.highs) < self.lookback:
            return

        # Donchian channels (excluding current bar)
        upper = max(self.highs[:-1])
        lower = min(self.lows[:-1])

        pos = broker.get_position(self.asset)
        has_position = pos is not None and pos.quantity > 0

        # Entry: breakout above upper channel
        if close > upper and not has_position:
            equity = broker.get_account_value()
            qty = (equity * self.position_pct) / close
            broker.submit_order(self.asset, qty, OrderSide.BUY)

        # Exit: breakdown below lower channel
        elif close < lower and has_position:
            broker.submit_order(self.asset, pos.quantity, OrderSide.SELL)

In [None]:
# Run ml4t.backtest
feed = DataFeed(prices_df=df)
strategy = BreakoutStrategy(
    asset=ASSET,
    lookback=LOOKBACK,
    position_pct=POSITION_PCT,
)

engine = Engine(
    feed=feed,
    strategy=strategy,
    initial_cash=INITIAL_CASH,
    execution_mode=ExecutionMode.NEXT_BAR,
)

ml4t_result = engine.run()

print("ml4t.backtest Results:")
print(f"  Final Value:  ${ml4t_result['final_value']:,.2f}")
print(f"  Total Return: {ml4t_result['total_return']:.4%}")
print(f"  Max Drawdown: {ml4t_result['max_drawdown_pct']:.4%}")

In [None]:
# Trade analysis
analyzer = BacktestAnalyzer(engine)
stats = analyzer.trade_statistics()
print(stats.summary())

## 2. Zipline-Reloaded Implementation

Zipline requires a bundle with NYSE calendar alignment. We create a custom in-memory bundle.

In [None]:
try:
    import exchange_calendars as xcals
    from zipline.api import order_target_percent, record, symbol  # noqa: F401

    ZIPLINE_AVAILABLE = True
except ImportError:
    ZIPLINE_AVAILABLE = False
    print("Zipline not installed. Run: pip install zipline-reloaded")
    print("Also need: pip install exchange-calendars")

In [None]:
if ZIPLINE_AVAILABLE:
    # Convert to pandas with timezone-aware timestamps
    pdf = df.to_pandas()
    pdf["timestamp"] = pd.to_datetime(pdf["timestamp"]).dt.tz_localize("UTC")
    pdf = pdf.set_index("timestamp")

    # Get NYSE calendar for date filtering
    nyse = xcals.get_calendar("XNYS")

    # Filter to NYSE trading days only
    trading_days = nyse.sessions_in_range(
        pd.Timestamp(START_DATE, tz="UTC"), pd.Timestamp(END_DATE, tz="UTC")
    )
    pdf = pdf[pdf.index.isin(trading_days)]

    print(f"NYSE-aligned data: {len(pdf)} bars")
    print(f"Date range: {pdf.index.min()} to {pdf.index.max()}")

In [None]:
if ZIPLINE_AVAILABLE:
    # Create a simple bundle programmatically
    from zipline.data.bundles import register

    def make_simple_equity_bundle(asset_name, bundle_df):
        """Create a minimal bundle from DataFrame."""

        def ingest(
            environ,  # noqa: ARG001
            asset_db_writer,
            minute_bar_writer,  # noqa: ARG001
            daily_bar_writer,
            adjustment_writer,
            calendar,  # noqa: ARG001
            start_session,  # noqa: ARG001
            end_session,  # noqa: ARG001
            cache,  # noqa: ARG001
            show_progress,
            output_dir,  # noqa: ARG001
        ):
            # Write asset metadata
            asset_db_writer.write(
                equities=pd.DataFrame(
                    {
                        "symbol": [asset_name],
                        "asset_name": [asset_name],
                        "exchange": ["NYSE"],
                    }
                )
            )

            # Write daily bars
            daily_bar_writer.write(
                [(0, bundle_df)],  # sid=0
                show_progress=show_progress,
            )

            # Empty adjustments
            adjustment_writer.write()

        return ingest

    # Register custom bundle
    bundle_name = "ml4t_test"

    # Prepare data for Zipline format
    zl_data = pdf[["open", "high", "low", "close", "volume"]].copy()

    try:
        register(
            bundle_name,
            make_simple_equity_bundle(ASSET, zl_data),
            calendar_name="XNYS",
        )
        print(f"Registered bundle: {bundle_name}")
    except Exception as e:
        print(f"Bundle registration note: {e}")

In [None]:
if ZIPLINE_AVAILABLE:
    # Zipline strategy using TradingAlgorithm
    from zipline.api import (
        order_target_percent,
        record,
        set_commission,
        set_slippage,
        symbol,
    )
    from zipline.finance.commission import PerShare
    from zipline.finance.slippage import FixedSlippage

    def initialize(context):
        context.asset = symbol(ASSET)
        context.lookback = LOOKBACK
        context.position_pct = POSITION_PCT

        # No slippage or commission for comparison
        set_slippage(FixedSlippage(spread=0.0))
        set_commission(PerShare(cost=0.0))

    def handle_data(context, data):
        if not data.can_trade(context.asset):
            return

        # Get historical data
        try:
            hist = data.history(context.asset, ["high", "low", "close"], context.lookback + 1, "1d")
        except Exception:
            return

        if len(hist) < context.lookback + 1:
            return

        # Donchian channels (excluding today)
        upper = hist["high"][:-1].max()
        lower = hist["low"][:-1].min()
        current_close = hist["close"][-1]

        current_position = context.portfolio.positions[context.asset].amount

        if current_close > upper and current_position == 0:
            # Buy
            order_target_percent(context.asset, context.position_pct)
        elif current_close < lower and current_position > 0:
            # Sell
            order_target_percent(context.asset, 0)

        record(portfolio_value=context.portfolio.portfolio_value)

In [None]:
if ZIPLINE_AVAILABLE:
    try:
        # Run Zipline backtest
        # Note: This requires the bundle to be properly ingested
        # For a clean comparison, we use DataPortal directly

        # Alternative: Use panel data directly

        start = pd.Timestamp(START_DATE, tz="UTC")
        end = pd.Timestamp(END_DATE, tz="UTC")

        # This would require bundle ingestion - showing conceptual code
        print("Note: Zipline requires bundle ingestion for full comparison.")
        print("See validation/zipline/ for complete working examples.")
        print("")
        print("The validation suite confirms Zipline produces matching results")
        print("within tolerance for all standard scenarios.")

        zipline_final = None

    except Exception as e:
        print(f"Zipline execution note: {e}")
        zipline_final = None
else:
    zipline_final = None

## 3. VectorBT Comparison (Reference)

In [None]:
try:
    import vectorbt as vbt

    VECTORBT_AVAILABLE = True
except ImportError:
    VECTORBT_AVAILABLE = False

In [None]:
if VECTORBT_AVAILABLE:
    pdf = df.to_pandas().set_index("timestamp")

    # Calculate Donchian channels
    rolling_high = pdf["high"].rolling(LOOKBACK).max().shift(1)
    rolling_low = pdf["low"].rolling(LOOKBACK).min().shift(1)

    # Signals
    entries = pdf["close"] > rolling_high
    exits = pdf["close"] < rolling_low

    # Shift for next-bar execution
    entries = entries.shift(1).fillna(False)
    exits = exits.shift(1).fillna(False)

    vbt_portfolio = vbt.Portfolio.from_signals(
        close=pdf["open"],  # Execute at open
        entries=entries,
        exits=exits,
        init_cash=INITIAL_CASH,
        size=POSITION_PCT,
        size_type="percent",
        fees=0.0,
        freq="1D",
        accumulate=False,
    )

    print("VectorBT Results (Reference):")
    print(f"  Final Value:  ${vbt_portfolio.final_value():,.2f}")
    print(f"  Total Return: {vbt_portfolio.total_return():.4%}")

    vbt_final = vbt_portfolio.final_value()
else:
    vbt_final = None

## 4. Results Summary

In [None]:
results = {
    "ml4t.backtest": ml4t_result["final_value"],
}

if vbt_final is not None:
    results["VectorBT"] = vbt_final

comparison = pd.DataFrame(
    {
        "Framework": list(results.keys()),
        "Final Value": [f"${v:,.2f}" for v in results.values()],
        "Total Return": [f"{(v - INITIAL_CASH) / INITIAL_CASH:.4%}" for v in results.values()],
    }
)

print("\n" + "=" * 60)
print("BREAKOUT STRATEGY COMPARISON")
print("=" * 60)
print(comparison.to_string(index=False))

## Validation Suite Results

The `validation/` directory contains comprehensive framework comparisons:

| Framework | Status | Notes |
|-----------|--------|-------|
| VectorBT Pro | ✅ EXACT MATCH | All 4 scenarios |
| VectorBT OSS | ✅ EXACT MATCH | All 4 scenarios |
| Backtrader | ✅ EXACT MATCH | All 4 scenarios |
| Zipline-reloaded | ✅ PASS | Within tolerance, strategy-level stops |

Run validation scripts:
```bash
# VectorBT Pro (commercial)
source .venv-vectorbt-pro/bin/activate
python validation/vectorbt_pro/scenario_01_long_only.py

# Backtrader
source .venv-validation/bin/activate  
python validation/backtrader/scenario_01_long_only.py

# Zipline
python validation/zipline/scenario_01_long_only.py
```