# Title

TODO: write up about project

Notes on installation?
https://github.com/stefan-jansen/machine-learning-for-trading/tree/main/installation

### Import libraries

In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import zipline
from zipline.api import order_percent, order_target, order_value, order, record, symbol
from zipline.finance import commission, slippage
from zipline import run_algorithm

%load_ext zipline
%matplotlib inline

### Download stock data from 2011 to 2021

In [2]:
def download_stock_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    df = df.reset_index()
    df = df.drop(['Adj Close'], axis=1)
    df = df.rename(str.lower, axis='columns')
    df['dividend'] = 0
    df['split'] = 1
    return df

In [3]:
start_date='2011-01-01'
end_date='2020-12-31'

# df_tsla = download_stock_data('TSLA', start_date, end_date)
# df_aapl = download_stock_data('AAPL', start_date, end_date)
# df_nflx = download_stock_data('NFLX', start_date, end_date)
# df_hsbc = download_stock_data('HSBC', start_date, end_date)
# df_gs = download_stock_data('GS', start_date, end_date)
# df_gme = download_stock_data('GME', start_date, end_date)
# df_ge = download_stock_data('GE', start_date, end_date)
# df_lyg = download_stock_data('LYG', start_date, end_date)
df_shldq = download_stock_data('SHLDQ', start_date, end_date)

[*********************100%***********************]  1 of 1 completed


### Save data to CSV file

In [4]:
# df_tsla.to_csv('daily/tsla.csv', index=False)
# df_aapl.to_csv('daily/aapl.csv', index=False)
# df_nflx.to_csv('daily/nflx.csv', index=False)
# df_hsbc.to_csv('daily/hsbc.csv', index=False)
# df_gs.to_csv('daily/gs.csv', index=False)
# df_gme.to_csv('daily/gme.csv', index=False)
# df_ge.to_csv('daily/ge.csv', index=False)
# df_lyg.to_csv('daily/lyg.csv', index=False)
df_shldq.to_csv('daily/shldq.csv', index=False)

### Ingest data
Edit ~/.zipline/extension.py according to zipline docs https://zipline.ml4trading.io/bundles.html#ingesting-data-from-csv-files

Note: Zipline seems to handle one custom bundle at a time. Ingest data one by one.

In [5]:
# !zipline ingest -b tsla-prices-2011-2021
# !zipline ingest -b aapl-prices-2011-2021
# !zipline ingest -b nflx-prices-2011-2021
# !zipline ingest -b hsbc-prices-2011-2021
# !zipline ingest -b gs-prices-2011-2021
# !zipline ingest -b gme-prices-2011-2021
# !zipline ingest -b ge-prices-2011-2021
# !zipline ingest -b lyg-prices-2011-2021
!zipline ingest -b shldq-prices-2011-2021

[2021-11-06 16:23:03.328939] INFO: zipline.data.bundles.core: Ingesting shldq-prices-2011-2021.
[?25lLoading custom pricing data:   [------------------------------------]    0% | shldq: sid 0
[?25lLoading custom pricing data:   [####################################]  100%[?25h
[?25lMerging daily equity files:  [####################################]   [?25h


In [6]:
!zipline bundles

csvdir <no ingestions>
quandl 2021-10-29 00:52:00.736804
quandl 2021-10-29 00:42:42.964339
quandl 2021-10-28 00:23:28.895431
quantopian-quandl <no ingestions>
shldq-prices-2011-2021 2021-11-06 16:23:03.015244


## Backtesting Trading Strategies

In [10]:
start_date = pd.Timestamp("2011-1-1").tz_localize("UTC")
end_date =pd.Timestamp("2019-12-31").tz_localize("UTC")
principal = 10000

In [11]:
ticker = 'SHLDQ'
bundle_name = 'shldq-prices-2011-2021'

In [20]:
# plotting function
def plot(title, results, columns_to_plot):
    fig, ax = plt.subplots(3, 1, sharex=True, figsize=[16, 9])

    # portfolio value
    results.portfolio_value.plot(ax=ax[0])
    ax[0].set_ylabel('portfolio value in $')

    # asset
    results[columns_to_plot].plot(ax=ax[1])
    ax[1].set_ylabel('price in $')

    # mark transactions
    perf_trans = results.loc[[t != [] for t in results.transactions]]
    buys = perf_trans.loc[[t[0]['amount'] > 0 for t in perf_trans.transactions]]
    sells = perf_trans.loc[[t[0]['amount'] < 0 for t in perf_trans.transactions]]
    ax[1].plot(buys.index, results.price.loc[buys.index], '^', markersize=10, color='g', label='buy')
    ax[1].plot(sells.index, results.price.loc[sells.index], 'v', markersize=10, color='r', label='sell')
    ax[1].legend()

    # daily returns
    results.returns.plot(ax=ax[2])
    ax[2].set_ylabel('daily returns')

    fig.suptitle(title, fontsize=16)
    plt.legend()
    plt.show()

    print('Final portfolio value (including cash): ${}'.format(np.round(results.portfolio_value[-1], 2)))


### Buy and hold

In [12]:
def initialize(context):
    context.asset = symbol(ticker)
    context.has_ordered = False  

def handle_data(context, data):
    record(price=data.current(context.asset, 'price'))
    
    # trading logic
    if not context.has_ordered:
        order_value(context.asset, principal)
        context.has_ordered = True
        
bah_results = run_algorithm(
    start=start_date,
    end=end_date,
    initialize=initialize,
    handle_data=handle_data,
    capital_base=principal,
    bundle=bundle_name,
    data_frequency="daily",
)

# plot('Buy and Hold - Tesla', buy_and_hold_results, ['price'])

### Dollar cost averaging


In [13]:
amount_to_invest_weekly = principal / (3*52)

def initialize(context):
    context.days = 0
    context.asset = symbol(ticker)
    context.amount_to_invest_weekly = amount_to_invest_weekly
    context.amont_invested = 0

def handle_data(context, data):
    context.days += 1

    if context.days == 7:
        context.days = 0
        return
    if context.days == 1:
        n_stocks_to_buy = math.floor(context.amount_to_invest_weekly / data.current(context.asset, 'price'))
        if n_stocks_to_buy == 0:
            context.amount_to_invest_weekly += amount_to_invest_weekly
        else:
            context.amont_invested += n_stocks_to_buy * data.current(context.asset, 'price')
            if context.amont_invested < principal:
                order(context.asset, n_stocks_to_buy)
                context.amount_to_invest_weekly = amount_to_invest_weekly
                
    record(price=data.current(context.asset, "price"))
            

dca_results = run_algorithm(
    start=start_date,
    end=end_date,
    initialize=initialize,
    handle_data=handle_data,
    capital_base=principal,
    bundle=bundle_name,
    data_frequency="daily",
)

# plot('Dollar Cost Averaging - Tesla', dca_results, ['price'])


### Simple moving average

In [14]:
look_back_days = 20

def initialize(context):
    context.days = 0
    context.asset = symbol(ticker)

def handle_data(context, data):
    context.days += 1
    if context.days < look_back_days:
        return

    price_history = data.history(context.asset, fields="price", bar_count=look_back_days, frequency="1d")
 
    # calculate moving averages
    ma = price_history.mean()
    
    # trading logic
    
    # cross up
    if (price_history[-2] < ma) & (price_history[-1] > ma):
        order_percent(context.asset, 1.0)
    # cross down
    elif (price_history[-2] > ma) & (price_history[-1] < ma):
        order_target(context.asset, 0)

    record(price=data.current(context.asset, 'price'),
           moving_average=ma)
    
    
sma_results = run_algorithm(
    start=start_date,
    end=end_date,
    initialize=initialize,
    handle_data=handle_data,
    capital_base=principal,
    bundle=bundle_name,
    data_frequency="daily",
)

# plot('Simple Moving Average - Tesla', sma_results, ['price', 'moving_average'])

  np.divide(
  np.divide(average_annual_return, annualized_downside_risk, out=out)


### Moving Average Crossover

In [15]:
SLOW_MA_PERIODS = 100
FAST_MA_PERIODS = 20

def initialize(context):
    context.days = 0
    context.asset = symbol(ticker)
    context.has_position = False
    
def handle_data(context, data):
    context.days += 1
    if context.days < SLOW_MA_PERIODS:
        return

    # calculate moving averages
    fast_ma = data.history(context.asset, 'price', bar_count=FAST_MA_PERIODS, frequency="1d").mean()
    slow_ma = data.history(context.asset, 'price', bar_count=SLOW_MA_PERIODS, frequency="1d").mean()

    # trading logic
    if (fast_ma > slow_ma) & (not context.has_position):
        order_percent(context.asset, 1.0)
        context.has_position = True
    elif (fast_ma < slow_ma) & (context.has_position):
        order_target(context.asset, 0)
        context.has_position = False

    record(price=data.current(context.asset, 'price'),
           fast_ma=fast_ma,
           slow_ma=slow_ma)

    
mac_results = run_algorithm(
    start=start_date,
    end=end_date,
    initialize=initialize,
    handle_data=handle_data,
    capital_base=principal,
    bundle=bundle_name,
    data_frequency="daily",
)

# plot('Moving Average Crossover Strategy - Tesla', mac_results, ['price', 'fast_ma', 'slow_ma'])

### Save results to CSV file

In [16]:
bah_results.to_csv(f'results/{ticker.lower()}/bah.csv', index=False)
dca_results.to_csv(f'results/{ticker.lower()}/dca.csv', index=False)
sma_results.to_csv(f'results/{ticker.lower()}/sma.csv', index=False)
mac_results.to_csv(f'results/{ticker.lower()}/mac.csv', index=False)