In [2]:
import os
import sys
from datetime import datetime
from os.path import abspath
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf
import matplotlib.pyplot as plt
import quantstats as qs
from hurst import compute_Hc

yf.pdr_override()

# Zipline imports
from zipline.utils.run_algo import load_extensions
from zipline.data import bundles
from zipline.data.data_portal import DataPortal
from zipline.utils.calendar_utils import get_calendar

from zipline.api import set_max_leverage, schedule_function, set_benchmark,set_commission
from zipline.finance.commission import PerContract, PerDollar, PerShare, PerTrade
from zipline.finance.commission import CommissionModel
from zipline.finance.slippage import VolumeShareSlippage, FixedSlippage
from zipline.finance.commission import PerShare, PerTrade, PerDollar
from zipline.api import set_slippage, set_commission
from zipline.data.bundles import register, unregister, ingest
from zipline.data.bundles.csvdir import csvdir_equities
from zipline.utils.calendar_utils import register_calendar, get_calendar
from zipline.api import (order, 
                         order_target,
                         order_value,
                         record, 
                         symbol,
                         get_datetime,
                         order_target_percent,
                         order_target_value,
                         set_benchmark,
                         get_open_orders)
from zipline import run_algorithm
from zipline.utils.calendar_utils import get_calendar
from zipline.api import order_target, record, date_rules, time_rules, symbol # type: ignore

# Ignore Warnings  
import warnings
warnings.filterwarnings('ignore', category=Warning)
warnings.filterwarnings('ignore', category=RuntimeWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=UserWarning)

load_extensions(
    default=True,
    extensions=[],
    strict=True,
    environ=os.environ,
)
%matplotlib inline
%load_ext autoreload
%autoreload 2

ROOT_DIR = abspath('../')
sys.path.append(ROOT_DIR)

# Todo list
1. Filter out stocks with with hurst exponents (It is good at finding mean reverting time series)
2. Built a Bollinger bands strategy
3. Use QuantStats to evaluate the Strategy

In [3]:
ROOT_DIR = abspath('../')
sys.path.append(ROOT_DIR)
TICKER_FILE_PATH = f"{ROOT_DIR}/data/sp500_tickers.csv" 
PARQUET_FILE_PATH = f"{ROOT_DIR}/data/sp500_stock_data.parquet"

START_DATE = datetime(2018, 1, 1)
END_DATE = datetime(2023, 12, 31)

def calculate_years() -> int:
    return int((END_DATE - START_DATE).days / 365)

YEARS = calculate_years()

In [4]:
def load_tickers():
    sp500_df = pd.read_csv(TICKER_FILE_PATH)
    sp500_stock_data = pd.read_parquet(PARQUET_FILE_PATH) 
    return sp500_df['Ticker'].tolist(), sp500_stock_data

def is_not_null(close_data: pd.Series) -> bool:
    years = calculate_years()
    return len(close_data) >= 251.5 * years

def calculate_hurst_exponent(close_data: pd.Series) -> float:
    H, c, data = compute_Hc(close_data, kind="price", simplified=True)
    return H

def hurst_filter(threshold: float = 0.5):
    sp500_tickers, sp500_stock_data = load_tickers()
    filtered_tickers = []
    for ticker in sp500_tickers:
        if ticker in sp500_stock_data:
            close_data = sp500_stock_data[ticker][START_DATE : END_DATE][
                "close"
            ].dropna()
            if is_not_null(close_data):
                hurst_exponent = calculate_hurst_exponent(close_data)
                if hurst_exponent <= threshold:
                    filtered_tickers.append((ticker, hurst_exponent))

    filtered_tickers.sort(key=lambda x: x[1])
    top_n_tickers = [ticker for ticker, _ in filtered_tickers]
    # top_n_tickers = pd.DataFrame(top_n_tickers)
    return top_n_tickers
    

In [5]:
hurst_filtered = hurst_filter()

In [6]:
hurst_filtered

['BDX',
 'NI',
 'K',
 'ED',
 'GEN',
 'AMGN',
 'CL',
 'BIIB',
 'EVRG',
 'GILD',
 'SJM',
 'ATO',
 'PNW',
 'EIX',
 'DLR',
 'KMB',
 'INCY',
 'AEP',
 'CME',
 'CPB',
 'CNC',
 'JKHY',
 'FLT',
 'BF-B']

In [None]:
START_DATE = pd.Timestamp('2018-01-01')
END_DATE = pd.Timestamp('2023-01-01')

BASE_CAPITAL = 100_000

def calculate_years() -> int:
    return int((END_DATE - START_DATE).days / 365)

YEARS = calculate_years()

In [None]:
def plots(results):
    start = results.index[0]
    end = results.index[-1]
    benchmark = pdr.get_data_yahoo('^GSPC', start=start, end=end)['Adj Close'].pct_change()
    results.index = pd.to_datetime(results.index).tz_convert(None)
    results.index = benchmark.index  
    qs.reports.full(results['returns'], benchmark = benchmark, match_dates=True, figsize=(8, 4))