# 11. Simulating screeners
In backtesting, sometimes we want to backtest all stocks. However this is computationally expensive thing to do. A better thing to do, is to to a preliminary screens and save them in the <code>output/screens</code> folder. Then when backtesting, we only use the tickers from the screens. This also resembles live trading, where the output from the screeners is used instead of downloading 5000+ symbols and scanning them.

Warning: The screener results should not suffer from look-ahead bias. If you e.g. want to calculate the 14-day volume on day 1, you only have this available at the end of day 1. To account for this, sometimes you need to subtract 1 day when doing the calculations.

In [1]:
from data import get_data
from tickers import get_tickers, get_id
from times import  first_trading_date_after_equal, first_trading_date_after, last_trading_date_before, \
    last_trading_date_before_equal, get_market_calendar, get_market_dates

from datetime import datetime, date, timedelta, time
from dateutil.relativedelta import relativedelta
import mplfinance as mpf
import pandas as pd
import json

DATA_PATH = "../data/polygon/"

START_DATE = date(2003, 10, 1)
END_DATE = date(2024, 4, 1)

**Top liquid stocks**

(I now use fundamentals instead)

In backtesting, instead of screening all stocks, for most systems I only need e.g. the S&P500 stocks. However I don't care about the exact holdings of the S&P500. The only reason why I would choose the index is because of volume and liquidity. However then turnover is way more informative.

So to create the T100, T500, T1500 and T3000 indices, I simply select the stocks with the highest turnover in each 6-month period.

In [2]:
# tickers = get_tickers()
# tickers = tickers[tickers['type'] == 'CS'] # No ETFs, ADRs and indices
# dates_and_IDs = {} # {'2022-01-01': ['AAPL', 'MSFT', ...], '2022-07-01': ['NVDA', 'AMD', ...], ...}

# # Loop through half years
# for end_datetime in pd.date_range(start= START_DATE ,end = END_DATE,freq='6M', inclusive='right'):
#     start_date = end_datetime.date() - relativedelta(months=6)
#     # Get trading dates
#     end_date = last_trading_date_before_equal(end_datetime.date())
#     end_date_to_query = last_trading_date_before(end_date) # Subtract 1 day to account for look-ahead bias
#     start_date = first_trading_date_after_equal(start_date)

#     all_bars = []
#     # Find IDs with data in the specific half year that are not delisted before end of year
#     for i, row in tickers[tickers['end_date'] >= end_date_to_query].iterrows():
#         bars = get_data(row['ID'], start_date, end_date_to_query, columns=['close', 'turnover'])
#         bars['ID'] = row['ID']
#         all_bars.append(bars)
        
#     # Calculate turnover
#     all_tickers = pd.concat(all_bars)
#     all_tickers = all_tickers.groupby("ID").agg({"turnover": "sum", "close": "mean"})
#     all_tickers.sort_values(by="turnover", ascending=False, inplace=True)
#     dates_and_IDs[end_date.isoformat()] = list(all_tickers.head(500).index)
    
#     print(end_datetime)

# # Store to json
# with open('../output/screens/T500.json', 'w') as f: 
#     json.dump(dates_and_IDs, f)

In [3]:
# with open('../output/screens/T500.json', 'r') as f: 
#     T500 = json.load(f)

# T500[date(2019, 6, 28).isoformat()][:5]

**More than 20% intraday droppers**
* Common stocks only
* Turnover for the day has to be at least $5M
* Original price has to be above $1

In [None]:
tickers = get_tickers()
tickers = tickers[tickers['type'].isin(['CS'])]

# Get list of market dates within our range
dates = get_market_dates(START_DATE, END_DATE)
dates_and_IDs = {day.isoformat(): list() for day in dates} # Create dictionary with empty lists

# Get market calendar in order to handle early closes
market_calendar = get_market_calendar(format='datetime')[['regular_close']]

for index, row in tickers.iterrows():
    id = row['ID']
    
    m1 = get_data(id, START_DATE, END_DATE, timeframe=5, extended_hours=False, \
                  columns=['open', 'close', 'turnover', 'close_original'])

    if m1.empty:
        continue

    m1_open = m1[['open']]
    m1_close = m1[['close', 'close_original']]
    m1_turnover = m1[['close', 'turnover']]
    
    # For every day, get the open price at 9:30 and close price 5 minutes before market close
    m1_open = m1_open.between_time('9:30', '9:30')
    m1_open.index = m1_open.index.date
    
    m1_close = m1_close.reindex(market_calendar['regular_close'] - timedelta(minutes=4)).dropna()
    m1_close.index = m1_close.index.date

    # Get the turnover (ignoring early closes...)
    m1_turnover = m1_turnover.between_time('9:30', '15:55')
    m1_turnover.index = m1_turnover.index.date
    m1_turnover = m1_turnover.groupby(m1_turnover.index).agg({"turnover": "sum"})
    m1_turnover = m1_turnover[['turnover']]

    # Calculate the drop and append to dates_and_IDs
    m1_open_close = pd.concat([m1_open, m1_close, m1_turnover], axis='columns')
    m1_open_close['change'] = (m1_open_close['close']/m1_open_close['open'] - 1)*100 # Base 100
    big_drop = m1_open_close[(m1_open_close['change'] < -20) \
                           & (m1_open_close['close_original'] >= 1.00)\
                           & (m1_open_close['turnover'] >= 5_000_000)]

    for day in big_drop.index:
        dates_and_IDs[day.isoformat()].append(id)
        
    if index % 50 == 0:
        print(index)
        
# Store to json
with open('../output/screens/INTRADAY_MINUS_20PCT.json', 'w') as f: 
    json.dump(dates_and_IDs, f)

**Top gappers at market open**
* Gap at least 30% (previous day regular close until 9:30 AM (9:30 AM is the close of the bar at 9:25 for 5-min and 9:29 for 1-min))
* Pre-market turnover at least $1M
* Unadjusted price at least $1

In [None]:
tickers = get_tickers()
tickers = tickers[tickers['type'].isin(['CS', 'ADRC'])]

# Get list of market dates within our range
dates = get_market_dates(START_DATE, END_DATE)
dates_and_IDs = {day.isoformat(): list() for day in dates} # Create dictionary with empty lists

for index, row in tickers.iterrows():
    id = row['ID']

    m1 = get_data(id, START_DATE, END_DATE, timeframe=5, extended_hours=True, \
                  columns=['close', 'turnover', 'close_original'])
    d1 = get_data(id, START_DATE, END_DATE, timeframe='daily', extended_hours=False, \
                  columns=['close'])

    if m1.empty:
        continue

    # To get the previous close
    d1['prev_close'] = d1['close'].shift(1)
    d1.index = d1.index.date

    # To get current open and pre-market volume
    m1 = m1.between_time('4:00', '9:29')
    m1.index = m1.index.date
    d1_premkt = m1.groupby(m1.index).agg({"close": "last", # This close is the 9:30 time
                                   "close_original": "last", 
                                   "turnover": "sum"})

    # Calculate gap
    gaps = d1_premkt.merge(d1[['prev_close']], left_index=True, right_index=True)
    gaps['gap'] = (gaps['close']/gaps['prev_close'] - 1)*100 # Base 100

    big_gap = gaps[(gaps['gap'] >= 30) \
               & (gaps['close_original'] >= 1.00)\
               & (gaps['turnover'] >= 1e6)]
    
    for day in big_gap.index:
        dates_and_IDs[day.isoformat()].append(id)
        
    if index % 50 == 0:
        print(index)

# # Store to json
with open('../output/screens/GAP_30PCT.json', 'w') as f: 
    json.dump(dates_and_IDs, f)