In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from scipy.stats import norm
import pandas_market_calendars as mcal
from scipy import stats

In [2]:
def bns_jump_test(returns, significance_level=0.01):
    n = len(returns)
    mu = returns.mean(axis=0)
    sigma = returns.std(axis=0)

    z_stat = np.sqrt(n) * (np.abs(mu) - 0.5 * sigma ** 2) / sigma

    critical_value = norm.ppf(1 - significance_level)
    
    return z_stat > critical_value, z_stat, np.argmax(returns)

In [3]:
def realized_quadratic_variation(returns):
    return np.sum(returns**2)

def realized_bipower_variation(returns, beta=0.5):
    return np.sum(np.abs(returns[:-1]) ** beta * np.abs(returns[1:]) ** beta)

def bns_jump_test2(returns, significance_level=0.01):
    n = len(returns)
    qv = realized_quadratic_variation(returns)
    bv = realized_bipower_variation(returns)
    
    # Test statistic
    mu = (bv / n) * (1 + (2 ** (-1)) * ((8 / (3 * n)) ** 0.5))
    test_stat = (qv - mu * n) / (bv * (1 - (1 / n)) ** (-1))
    
    # Critical value
    critical_value = stats.norm.ppf(1 - significance_level)
    
    # If test_stat > critical_value, we reject the null hypothesis (no jumps) and conclude that there is a jump.
    return test_stat > critical_value, test_stat, np.argmax(returns)

In [4]:
def get_price_data(tickers, start_date, end_date, interval):
    stock_data = {}
    for ticker in tickers:
        try:
            stock_data[ticker] = yf.download(ticker, start=start_date, end=end_date, progress=False, interval=interval)
        except:
            print("Error: Could not download data for ticker " + ticker)
            return None
    return stock_data

In [5]:
def get_return_data(tickers, yesterday_data, today_data):
    return_data = {}
    for ticker in tickers:
        price_data = yesterday_data[ticker]['Close'].values
        np.append(price_data, today_data[ticker]['Close'].values[0])
        returns = np.diff(np.log(price_data))
        return_data[ticker] = returns
    return return_data

In [6]:
def get_BNS_test_data(tickers, returns):
    bns_test_data = {}
    for ticker in tickers:
        bns_test_data[ticker] = bns_jump_test(returns[ticker])
    return bns_test_data

In [7]:
def get_overnight_jumps(data, returns):
    overnight_jumps = []
    for key, value in data.items():
        is_jump, test_stat, jump_index = value
        if is_jump:
            if jump_index == len(returns[key]) - 1:
                print(f"{key} jumped at the end of the day with a test statistic of {test_stat}")
                overnight_jumps.append((key, test_stat, returns[key][len(returns[key])-1]))
    overnight_jumps.sort(key=lambda x: x[1], reverse=True)
    return overnight_jumps

In [8]:
def get_trades(overnight_jumps, today_data, today_date, portfolio):
    potential_trade_list = [None] * min(10, len(overnight_jumps))
    return_total = sum([abs(x[2]) for x in overnight_jumps])
    for i in range(min(10, len(overnight_jumps))):
        ticker, test_stat, jump = overnight_jumps[i]
        trade_weight = abs(jump) / return_total
        potential_trade_list[i] = (ticker, trade_weight, jump)

    trade_list = [None] * len(potential_trade_list)
    for i in range(len(potential_trade_list)):
        ticker, trade_weight, jump = potential_trade_list[i]
        if jump < 0:
            # take a long postion
            sell_price = today_data[ticker]['Close'].values[0]
            buy_price = today_data[ticker]['Close'].values[2] 
            profit = (sell_price - buy_price) * trade_weight * portfolio / sell_price
            trade_list[i] = (ticker, profit)
        elif jump > 0:
            # take a short position
            buy_price = today_data[ticker]['Close'].values[0]
            sell_price = today_data[ticker]['Close'].values[2]
            profit = (sell_price - buy_price) * trade_weight * portfolio / sell_price
            trade_list[i] = (ticker, profit)
        else:
            print(f'Error: On {today_date}, {ticker} had a jump of 0.')
    return trade_list

In [9]:
# Create a trading day calendar
nyse = mcal.get_calendar('NYSE')

start_date = '2022-01-01'
end_date = '2022-6-31'
interval = "1h" 

portfolio = 10000
total_trades = {}


early = nyse.schedule(start_date=start_date, end_date=end_date, tz='America/New_York')

# TODO dataset of sp500 on minute intervals for the last ten years
sp500_tickers = ['MMM','AOS','AMZN','ABBV','ACN','ATVI','ADM','ADBE','ADP','AAP','AES','AFL','A','APD','AKAM','ALK','ALB','ARE','ALGN']

for i in range(1, len(early)-1):
    

    yesterday_date = early.iloc[i-1]['market_close'].date()
    today_date = early.iloc[i]['market_close'].date()
    tomorrow_date = early.iloc[i+1]['market_close'].date()

    

    
    yesterday_data = get_price_data(sp500_tickers, yesterday_date, today_date, interval)
    if yesterday_data == None:
        continue
    today_data = get_price_data(sp500_tickers, today_date, tomorrow_date, interval)
    if today_data == None:
        continue
    for ticker in sp500_tickers:
        if len(today_data[ticker]) != 7:
            print(f"Error: {ticker} data is missing for {yesterday_date}")
            continue

    returns = get_return_data(sp500_tickers, yesterday_data, today_data)

    bns_test_data = get_BNS_test_data(sp500_tickers, returns)

    overnight_jumps = get_overnight_jumps(bns_test_data, returns)

    trade_list = get_trades(overnight_jumps, today_data, today_date, portfolio)

    total_trades[today_date] = trade_list
    total_profit = sum([x[1] for x in trade_list])
    print(f"On {today_date}, the total return was {total_profit}.")

On 2022-01-04, the total return was 0.
On 2022-01-05, the total return was 0.
ADBE jumped at the end of the day with a test statistic of 4.593757360089037
On 2022-01-06, the total return was 40.96712918590703.
On 2022-01-07, the total return was 0.
On 2022-01-10, the total return was 0.
On 2022-01-11, the total return was 0.
On 2022-01-12, the total return was 0.
On 2022-01-13, the total return was 0.
On 2022-01-14, the total return was 0.
On 2022-01-18, the total return was 0.
On 2022-01-19, the total return was 0.
On 2022-01-20, the total return was 0.
On 2022-01-21, the total return was 0.
ABBV jumped at the end of the day with a test statistic of 2.6212144262568446
On 2022-01-24, the total return was 233.83206455357725.
On 2022-01-25, the total return was 0.
On 2022-01-26, the total return was 0.
On 2022-01-27, the total return was 0.
On 2022-01-28, the total return was 0.
On 2022-01-31, the total return was 0.
On 2022-02-01, the total return was 0.
On 2022-02-02, the total return 

In [13]:
total_profits = 0
for key, value in total_trades.items():
    if len(value) > 0:
        print(f"On {key}, the trades were {value}.")
        for trade in value:
            total_profits += trade[1]

On 2022-01-06, the trades were [('ADBE', 40.96712918590703)].
On 2022-01-24, the trades were [('ABBV', 233.83206455357725)].
On 2022-02-14, the trades were [('ALGN', -74.09353125869906), ('ACN', 20.156308337834222)].
On 2022-02-15, the trades were [('ABBV', 13.107231829899877)].
On 2022-02-24, the trades were [('AAP', 91.07625960914798)].
On 2022-02-25, the trades were [('A', 104.90561431308963)].
On 2022-03-08, the trades were [('ATVI', -84.53509255809684)].
On 2022-03-18, the trades were [('AMZN', 111.19384420734559)].
On 2022-03-21, the trades were [('AAP', 45.150770475476996)].
On 2022-04-05, the trades were [('AMZN', -70.49685051212116)].
On 2022-04-07, the trades were [('ARE', -30.533999571648035)].
On 2022-04-13, the trades were [('A', 81.23131855361196), ('ADBE', 12.733448288889821)].
On 2022-05-12, the trades were [('ADBE', -162.61637088604164)].
On 2022-05-13, the trades were [('ABBV', -35.89395464943969)].
On 2022-05-19, the trades were [('AOS', 6.81313178903892), ('ARE', 2.

In [20]:
print(f"The total profits were {total_profits}.")
print(f"The total return was {total_profits / portfolio}%.")
print(f"The total return of the S&P 500 was -18.11%") #TODO: create function to calc return of S&P 500
print(f"Our strategy beats the S&P 500 by {total_profits / portfolio - (-18.11)}%.")

The total profits were -219.2117623892016.
The total return was -0.02192117623892016%.
The total return of the S&P 500 was -18.11%
Our strategy beats the S&P 500 by 18.08807882376108%.
