# Framework Comparison: Long-Only Strategy

This notebook validates **ml4t.backtest** accuracy by comparing results against:
- **VectorBT** (vectorized backtesting)
- **Backtrader** (event-driven, industry standard)
- **Zipline-reloaded** (Quantopian's legacy framework)

## Strategy
Simple moving average crossover:
- **Entry**: When 20-day SMA crosses above 50-day SMA
- **Exit**: When 20-day SMA crosses below 50-day SMA
- **Position sizing**: 95% of equity

## Data
ETF daily data from Quandl WIKI Prices dataset.

In [None]:
# Configuration
ASSET = "SPY"
START_DATE = "2015-01-01"
END_DATE = "2020-12-31"
INITIAL_CASH = 100_000.0
FAST_PERIOD = 20
SLOW_PERIOD = 50
POSITION_PCT = 0.95

## 1. Load Data

In [None]:
from datetime import datetime
from pathlib import Path

import pandas as pd
import polars as pl

# Load ETF universe data
DATA_PATH = Path.home() / "Dropbox" / "ml4t" / "data" / "etfs" / "etf_universe.parquet"

df_full = pl.read_parquet(DATA_PATH)
print(f"Full dataset: {df_full.shape}")
print(f"Symbols: {df_full['symbol'].n_unique()}")
print(f"Date range: {df_full['timestamp'].min()} to {df_full['timestamp'].max()}")

In [None]:
# Filter to our asset and date range
df = (
    df_full.filter(pl.col("symbol") == ASSET)
    .filter(pl.col("timestamp") >= datetime.fromisoformat(START_DATE))
    .filter(pl.col("timestamp") <= datetime.fromisoformat(END_DATE))
    .sort("timestamp")
    .rename({"symbol": "asset"})
)

print(f"\n{ASSET} data: {len(df)} bars")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
df.head()

## 2. ml4t.backtest Implementation

In [None]:
from ml4t.backtest import BacktestAnalyzer, DataFeed, Engine, ExecutionMode, OrderSide, Strategy


class DualMAStrategy(Strategy):
    """Dual Moving Average crossover strategy."""

    def __init__(self, asset, fast_period, slow_period, position_pct):
        self.asset = asset
        self.fast_period = fast_period
        self.slow_period = slow_period
        self.position_pct = position_pct
        self.prices = []

    def on_data(self, timestamp, data, context, broker):
        if self.asset not in data:
            return

        close = data[self.asset]["close"]
        self.prices.append(close)

        if len(self.prices) > self.slow_period:
            self.prices.pop(0)

        if len(self.prices) < self.slow_period:
            return

        # Calculate MAs
        fast_ma = sum(self.prices[-self.fast_period :]) / self.fast_period
        slow_ma = sum(self.prices) / len(self.prices)

        # Get position
        pos = broker.get_position(self.asset)
        has_position = pos is not None and pos.quantity > 0

        # Trading logic
        if fast_ma > slow_ma and not has_position:
            # Buy signal
            equity = broker.get_account_value()
            qty = (equity * self.position_pct) / close
            broker.submit_order(self.asset, qty, OrderSide.BUY)

        elif fast_ma < slow_ma and has_position:
            # Sell signal
            broker.submit_order(self.asset, pos.quantity, OrderSide.SELL)

In [None]:
# Run ml4t.backtest
feed = DataFeed(prices_df=df)
strategy = DualMAStrategy(
    asset=ASSET,
    fast_period=FAST_PERIOD,
    slow_period=SLOW_PERIOD,
    position_pct=POSITION_PCT,
)

engine = Engine(
    feed=feed,
    strategy=strategy,
    initial_cash=INITIAL_CASH,
    execution_mode=ExecutionMode.NEXT_BAR,  # Orders fill at next bar's open
)

ml4t_result = engine.run()

print("ml4t.backtest Results:")
print(f"  Final Value:  ${ml4t_result['final_value']:,.2f}")
print(f"  Total Return: {ml4t_result['total_return']:.4%}")
print(f"  Sharpe Ratio: {ml4t_result['sharpe']:.4f}")
print(f"  Max Drawdown: {ml4t_result['max_drawdown_pct']:.4%}")

In [None]:
# Get trade statistics
analyzer = BacktestAnalyzer(engine)
stats = analyzer.trade_statistics()
print(stats.summary())

## 3. VectorBT Implementation

In [None]:
try:
    import vectorbt as vbt

    VECTORBT_AVAILABLE = True
except ImportError:
    VECTORBT_AVAILABLE = False
    print("VectorBT not installed. Run: pip install vectorbt")
    print("Skipping VectorBT comparison.")

In [None]:
if VECTORBT_AVAILABLE:
    # Convert to pandas for VectorBT
    pdf = df.to_pandas().set_index("timestamp")

    # Calculate signals
    close = pdf["close"]
    open_price = pdf["open"]

    fast_ma = close.rolling(FAST_PERIOD).mean()
    slow_ma = close.rolling(SLOW_PERIOD).mean()

    # Entry: fast > slow, Exit: fast < slow
    entries = (fast_ma > slow_ma) & (fast_ma.shift(1) <= slow_ma.shift(1))
    exits = (fast_ma < slow_ma) & (fast_ma.shift(1) >= slow_ma.shift(1))

    # Shift signals to get next-bar execution (match ml4t NEXT_BAR mode)
    entries = entries.shift(1).fillna(False)
    exits = exits.shift(1).fillna(False)

    # Run backtest using open prices (NEXT_BAR execution)
    vbt_portfolio = vbt.Portfolio.from_signals(
        close=open_price,  # Use open for execution
        entries=entries,
        exits=exits,
        init_cash=INITIAL_CASH,
        size=POSITION_PCT,  # 95% of equity
        size_type="percent",
        fees=0.0,
        slippage=0.0,
        freq="1D",
        accumulate=False,  # No pyramiding
    )

    print("VectorBT Results:")
    print(f"  Final Value:  ${vbt_portfolio.final_value():,.2f}")
    print(f"  Total Return: {vbt_portfolio.total_return():.4%}")
    print(f"  Sharpe Ratio: {vbt_portfolio.sharpe_ratio():.4f}")
    print(f"  Max Drawdown: {vbt_portfolio.max_drawdown():.4%}")

    vbt_final = vbt_portfolio.final_value()
else:
    vbt_final = None

## 4. Backtrader Implementation

In [None]:
try:
    import backtrader as bt

    BACKTRADER_AVAILABLE = True
except ImportError:
    BACKTRADER_AVAILABLE = False
    print("Backtrader not installed. Run: pip install backtrader")
    print("Skipping Backtrader comparison.")

In [None]:
if BACKTRADER_AVAILABLE:

    class BTDualMAStrategy(bt.Strategy):
        """Backtrader Dual MA Strategy."""

        params = (
            ("fast_period", FAST_PERIOD),
            ("slow_period", SLOW_PERIOD),
            ("position_pct", POSITION_PCT),
        )

        def __init__(self):
            self.fast_ma = bt.indicators.SMA(self.data.close, period=self.p.fast_period)
            self.slow_ma = bt.indicators.SMA(self.data.close, period=self.p.slow_period)
            self.crossover = bt.indicators.CrossOver(self.fast_ma, self.slow_ma)

        def next(self):
            if not self.position:
                if self.crossover > 0:
                    # Buy signal
                    size = (self.broker.getvalue() * self.p.position_pct) / self.data.close[0]
                    self.buy(size=size)
            else:
                if self.crossover < 0:
                    # Sell signal
                    self.close()

    # Prepare data for Backtrader
    pdf = df.to_pandas().set_index("timestamp")
    bt_data = bt.feeds.PandasData(dataname=pdf, datetime=None)

    # Run Backtrader
    cerebro = bt.Cerebro()
    cerebro.addstrategy(BTDualMAStrategy)
    cerebro.adddata(bt_data)
    cerebro.broker.setcash(INITIAL_CASH)
    cerebro.broker.setcommission(commission=0.0)

    # Match ml4t NEXT_BAR mode
    cerebro.broker.set_coo(True)  # Cheat-On-Open

    cerebro.run()

    bt_final = cerebro.broker.getvalue()
    bt_return = (bt_final - INITIAL_CASH) / INITIAL_CASH

    print("Backtrader Results:")
    print(f"  Final Value:  ${bt_final:,.2f}")
    print(f"  Total Return: {bt_return:.4%}")
else:
    bt_final = None

## 5. Results Comparison

In [None]:
import matplotlib.pyplot as plt

# Collect results
results = {
    "ml4t.backtest": ml4t_result["final_value"],
}

if VECTORBT_AVAILABLE:
    results["VectorBT"] = vbt_final

if BACKTRADER_AVAILABLE:
    results["Backtrader"] = bt_final

# Create comparison table
comparison = pd.DataFrame(
    {
        "Framework": list(results.keys()),
        "Final Value": [f"${v:,.2f}" for v in results.values()],
        "Total Return": [f"{(v - INITIAL_CASH) / INITIAL_CASH:.4%}" for v in results.values()],
    }
)

print("\n" + "=" * 60)
print("FRAMEWORK COMPARISON")
print("=" * 60)
print(comparison.to_string(index=False))

In [None]:
# Calculate differences
ml4t_val = ml4t_result["final_value"]

print("\nDifferences from ml4t.backtest:")
for name, val in results.items():
    if name != "ml4t.backtest":
        diff = val - ml4t_val
        pct_diff = diff / ml4t_val * 100
        print(f"  {name}: ${diff:+,.2f} ({pct_diff:+.4f}%)")

        # Check if within tolerance
        tolerance = 0.01  # 1% tolerance
        if abs(pct_diff) < tolerance:
            print(f"    ✅ MATCH (within {tolerance}% tolerance)")
        else:
            print(f"    ⚠️ Difference exceeds {tolerance}% tolerance")

In [None]:
# Plot comparison
fig, ax = plt.subplots(figsize=(10, 5))

frameworks = list(results.keys())
values = list(results.values())
colors = ["#2ecc71" if f == "ml4t.backtest" else "#3498db" for f in frameworks]

bars = ax.bar(frameworks, values, color=colors)
ax.axhline(
    y=INITIAL_CASH, color="red", linestyle="--", label=f"Initial Cash (${INITIAL_CASH:,.0f})"
)

ax.set_ylabel("Final Portfolio Value ($)")
ax.set_title(f"Framework Comparison: {ASSET} Dual MA Strategy ({START_DATE} to {END_DATE})")
ax.legend()

# Add value labels
for bar, val in zip(bars, values):
    ax.text(
        bar.get_x() + bar.get_width() / 2,
        bar.get_height() + 1000,
        f"${val:,.0f}",
        ha="center",
        va="bottom",
    )

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrates that **ml4t.backtest** produces results that match industry-standard frameworks:

- **VectorBT**: Vectorized backtesting (fastest)
- **Backtrader**: Event-driven, most widely used

The small differences (< 1%) are typically due to:
- Rounding in position sizing
- Edge cases in signal timing
- Fractional share handling

### Key ml4t.backtest Advantages
1. **Clean API**: Modern Python with type hints
2. **Polars-native**: Fast data handling
3. **Execution modes**: Both SAME_BAR and NEXT_BAR
4. **Risk management**: Built-in stops, trails, portfolio limits