In [6]:
from zipline import run_algorithm
from zipline.api import order_target_percent, symbol
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
# If using UTC, we get the following error: `AttributeError: ‘datetime.timezone’ object has no attribute ‘key’`
start_date = pd.Timestamp('2003-01-01')
end_date = pd.Timestamp("2017-12-31")

In [8]:
def initialize(context):
    dji = [
        "AAPL",
        "AXP",
        "BA",
        "CAT",
        "CSCO",
        "CVX",
        "DD",
        "DIS",
        "GE",
        "GS",
        "HD",
        "IBM",
        "INTC",
        "JNJ",
        "JPM",
        "KO",
        "MCD",
        "MMM",
        "MRK",
        "MSFT",
        "NKE",
        "PFE",
        "PG",
        "TRV",
        "UNH",
        "UTX",
        "VZ",
        "WMT",
        "XOM"
    ]
    context.dji_symbols = [symbol(s) for s in dji]
    context.index_average_window = 100

In [9]:
def handle_data(context, data):
    stock_hist = data.history(context.dji_symbols, 'close', context.index_average_window, '1d')

    stock_analytics = pd.DataFrame()

    # add column for above or below average
    stock_analytics['above_mean'] = stock_hist.iloc[-1] > stock_hist.mean()

    # set weight for stocks to buy
    stock_analytics.loc[stock_analytics['above_mean'] == True, 'weight'] = 1 / len(context.dji_symbols)
    # set weight to 0 for the rest
    stock_analytics.loc[stock_analytics['above_mean'] == False, 'weight'] = 0.0

    for stock, analytics in stock_analytics.iterrows():
        order_target_percent(stock, analytics['weight'])
        if data.can_trade(stock):
            order_target_percent(stock, analytics['weight'])

In [10]:
def analyze(context, perf):
    fig = plt.figure(figsize=(12, 8))

    # First chart
    ax = fig.add_subplot(311)
    ax.set_title("Strategy Results")
    ax.semilogy(
        perf["portfolio_value"], linestyle="-", label="Equity Curve", linewidth=3.0
    )
    ax.legend()
    ax.grid(False)

    # Second chart
    ax = fig.add_subplot(312)
    ax.plot(perf["gross_leverage"], label="Exposure", linestyle="-", linewidth=1.0)
    ax.legend()
    ax.grid(True)

    # Third chart
    ax = fig.add_subplot(313)
    ax.plot(perf["returns"], label="Returns", linestyle="-.", linewidth=1.0)
    ax.legend()
    ax.grid(True)

In [11]:
result = run_algorithm(
    start=start_date,
    end=end_date,
    initialize=initialize,
    handle_data=handle_data,
    analyze=analyze,
    capital_base=10000,
    data_frequency='daily',
    bundle='quandl'
)

There is a major issue with the backtest. We're using the current (at the time the book was written) constituents of the DJI, and the index did not have the same constituents back in 2003 when this simulation starts.

Stocks end up in an index when they perform well in the past, therefore we already know the strategy will perform well in the past.