In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
from scipy.stats import norm

In [2]:
def bns_jump_test(data, intraday_returns=390, significance_level=0.05):
    overnight_returns = np.log(data["Close"].iloc[:-1].values / data["Open"].values[1:])
    intraday_returns = np.log(data["Close"].values / data["Open"].values)

    data_size = len(overnight_returns)
    mean = overnight_returns.mean()
    std_dev = overnight_returns.std()
    z_stat = (overnight_returns - mean) / std_dev

    p_values = 2 * (1 - norm.cdf(np.abs(z_stat)))
    return p_values < significance_level

In [18]:
def significant_over_gaps(stock_data, jump_test_results, significance_level=0.05):
    significant_gaps = {}
    
    for ticker, jumps in jump_test_results.items():
        if np.any(jumps):
            overnight_returns = np.log(stock_data[ticker]["Close"].iloc[:-1].values / stock_data[ticker]["Open"].values[1:])
            intraday_volatility = stock_data[ticker]["Close"].pct_change().rolling(window=5).std().dropna()
            threshold = intraday_volatility * np.sqrt(5)
            # Align the lengths of the overnight_returns and threshold arrays
            min_len = min(len(overnight_returns), len(threshold))
            significant_gap = np.abs(overnight_returns[:min_len]) > threshold[:min_len]
            if np.any(significant_gap):
                significant_gaps[ticker] = True
    
    return significant_gaps

In [None]:
def select_top_stocks(significant_gaps, stock_data, num_stocks=10):
    z_statistics = {}
    for ticker, gap in significant_gaps.items():
        if gap:
            overnight_returns = np.log(stock_data[ticker]["Close"].iloc[:-1].values / stock_data[ticker]["Open"].values[1:])
            mean = overnight_returns.mean()
            std_dev = overnight_returns.std()
            z_stat = (overnight_returns - mean) / std_dev
            z_statistics[ticker] = np.max(z_stat)

    sorted_z_statistics = sorted(z_statistics.items(), key=lambda x: x[1], reverse=True)
    return [ticker for ticker, _ in sorted_z_statistics[:num_stocks]]

In [None]:
def execute_trading_strategy(top_stocks, stock_data):
    trading_results = {}
    
    for ticker in top_stocks:
        data = stock_data[ticker].copy()
        overnight_returns = np.log(data["Close"].iloc[:-1].values / data["Open"].values[1:])
        trading_signals = np.sign(overnight_returns)
        
        data['TradingSignal'] = np.nan
        data.loc[data.index[1:], 'TradingSignal'] = trading_signals
        
        data['Position'] = data['TradingSignal'].shift(1)
        data['TradeReturns'] = data['Close'].pct_change() * data['Position']
        data['CumulativeReturns'] = (1 + data['TradeReturns']).cumprod()
        trading_results[ticker] = data.iloc[1:]

    return trading_results

In [34]:
# 1. Get historical stock data for the S&P 500 constituents
# sp500_tickers = pd.read_csv("sp500_tickers.csv")["Symbol"].tolist()
sp500_tickers = ['MMM','AOS','ABT','ABBV','ACN','ATVI','ADM','ADBE','ADP','AAP','AES','AFL','A','APD','AKAM','ALK','ALB','ARE','ALGN']

start_date = "2023-04-27"
end_date = "2023-05-04"

stock_data = {}
for ticker in sp500_tickers:
    stock_data[ticker] = yf.download(ticker, start=start_date, end=end_date, progress=False, interval='1m')

In [35]:
# 2. Execute the BNS jump test
jump_test_results = {}
for ticker, data in stock_data.items():
    
    jump_test_results[ticker] = bns_jump_test(data)

In [36]:
# 3. Apply the jump identification method of Andersen et al. (2010)
significant_gaps = significant_over_gaps(stock_data, jump_test_results)

In [37]:
# 5. Select the most suitable shares based on the highest z-statistic
top_stocks = select_top_stocks(significant_gaps, stock_data)
print("Top 10 stocks:", top_stocks)

Top 10 stocks: ['AFL', 'ADBE', 'ATVI', 'ABBV', 'ADM', 'AES', 'ALK', 'ACN', 'AKAM', 'APD']


In [38]:
# 6. Execute the trading strategy
trading_results = execute_trading_strategy(top_stocks, stock_data)

for ticker, result in trading_results.items():
    print(f"{ticker} Cumulative Returns: {result['CumulativeReturns'].iloc[-1]}")

AFL Cumulative Returns: 0.9881459895552699
ADBE Cumulative Returns: 0.9828369975858309
ATVI Cumulative Returns: 0.996769562326911
ABBV Cumulative Returns: 0.9963954187573587
ADM Cumulative Returns: 0.9741175425680317
AES Cumulative Returns: 0.9843591239436876
ALK Cumulative Returns: 0.9432417652750251
ACN Cumulative Returns: 1.0089635451569408
AKAM Cumulative Returns: 0.9966987638936992
APD Cumulative Returns: 0.9845379127976476


In [45]:
# Set the stock symbol and download historical data
symbol = "AAPL"
stock = yf.Ticker(symbol)
data = stock.history(period="max")

# Calculate daily returns and variance
returns = data["Close"].pct_change()
variance = np.var(returns)

# Calculate expected returns using a simple moving average model
window_size = 20
sma = data["Close"].rolling(window_size).mean()
expected_returns = sma.pct_change()[window_size:]

# Calculate the BNS jump test statistic
last_price = data["Close"][-1]
last_return = returns[-1]
expected_return = expected_returns[-1]
jump_statistic = (last_return - expected_return) / np.sqrt(variance)

# Calculate the p-value for the jump statistic
p_value = norm.sf(abs(jump_statistic)) * 2

# Print the results
print("BNS jump statistic for {}: {}".format(symbol, jump_statistic))
print("P-value for BNS jump test: {}".format(p_value))

BNS jump statistic for AAPL: -0.37333054854278047
P-value for BNS jump test: 0.7089024428598971


In [57]:
start_date = "2023-05-01"
end_date = "2023-05-03"
data = yf.download(['MMM'], start=start_date, end=end_date, progress=False, interval='1m')

In [58]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-05-01 09:30:00-04:00,106.339996,106.500000,106.339996,106.440002,106.440002,68256
2023-05-01 09:31:00-04:00,106.430000,106.540001,106.379997,106.379997,106.379997,5448
2023-05-01 09:32:00-04:00,106.375000,106.389999,106.375000,106.389999,106.389999,1336
2023-05-01 09:33:00-04:00,106.440002,106.549896,106.379997,106.540001,106.540001,2567
2023-05-01 09:34:00-04:00,106.480003,106.550003,106.474899,106.521400,106.521400,2431
...,...,...,...,...,...,...
2023-05-02 15:55:00-04:00,103.070000,103.120003,103.050003,103.059998,103.059998,26276
2023-05-02 15:56:00-04:00,103.059998,103.110001,103.040001,103.059998,103.059998,36495
2023-05-02 15:57:00-04:00,103.059998,103.160004,103.050003,103.125000,103.125000,35296
2023-05-02 15:58:00-04:00,103.129799,103.129799,103.070000,103.078499,103.078499,35406


In [62]:
# Set the stock symbol and download historical data
symbol = "AAPL"
stock = yf.Ticker(symbol)
data = stock.history(period="1mo")

# Get the close prices from 30 days ago to the present day
close_prices = data["Close"].iloc[:30][::-1]

# Calculate daily returns and variance
returns = np.log(data["Open"] / close_prices)
variance = np.var(returns)

# Calculate expected returns using a simple moving average model
window_size = 20
sma = data["Open"].rolling(window_size).mean()
expected_returns = np.log(sma / close_prices.iloc[window_size:])[::-1]

# Calculate the BNS jump test statistic for each day
jump_statistics = []
p_values = []
for i in range(len(data)):
    last_price = data["Open"][i]
    last_return = returns[i]
    expected_return = expected_returns[i]
    jump_statistic = (last_return - expected_return) / np.sqrt(variance)
    jump_statistics.append(jump_statistic)
    p_value = norm.sf(abs(jump_statistic)) * 2
    p_values.append(p_value)

# Print the results
for i in range(len(jump_statistics)):
    print("BNS jump statistic for {} day(s) ago: {}".format(i+1, jump_statistics[i]))
    print("P-value for BNS jump test: {}\n".format(p_values[i]))


BNS jump statistic for 1 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 2 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 3 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 4 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 5 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 6 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 7 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 8 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 9 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 10 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 11 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 12 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 13 day(s) ago: nan
P-value for BNS jump test: nan

BNS jump statistic for 14 day(s) ago: nan
P-val

In [None]:
def bns_jump_test(returns, overnight_returns):
    jump_test_results = []
    for stock, intraday_returns in returns.items():
        overnight_return = overnight_returns[stock]
        test_result = # Perform BNS jump test based on both intraday_returns and overnight_return
        jump_test_results.append((stock, test_result))
    return jump_test_results

def jump_identification(jump_test_results):
    # Apply Andersen et al. (2010) method to identify significant overnight gaps
    significant_gaps = []
    for stock, test_result in jump_test_results:
        if # check if stock has significant overnight gap using Andersen et al. (2010) method:
            significant_gaps.append(stock)
    return significant_gaps

def select_top_stocks(significant_gaps, z_statistics):
    # Select top 10 stocks with the highest z-statistic
    top_stocks = sorted(significant_gaps, key=lambda x: z_statistics[x], reverse=True)[:10]
    return top_stocks

def trading_strategy(top_stocks, overnight_returns):
    trades = []
    for stock in top_stocks:
        if overnight_returns[stock] < 0:
            trade_action = "long"
        else:
            trade_action = "short"
        
        # Reverse the trade after 120 minutes
        trade_reversal = {"stock": stock, "action": trade_action, "reverse_after_minutes": 120}
        trades.append(trade_reversal)
    
    return trades

def main():
    # Load S&P 500 stock constituents data
    intraday_returns = # Load intraday returns for each stock
    overnight_returns = # Load overnight returns for each stock
    z_statistics = # Load z-statistics of Huang and Tauchen (2005) for each stock
    
    # Formation period
    jump_test_results = bns_jump_test(intraday_returns, overnight_returns)
    significant_gaps = jump_identification(jump_test_results)
    top_stocks = select_top_stocks(significant_gaps, z_statistics)
    
    # Trading period
    trades = trading_strategy(top_stocks, overnight_returns)
    
    # Execute trades
    for trade in trades:
        execute_trade(trade)

if __name__ == "__main__":
    main()

In [83]:
yesterday_date = "2023-05-01"
current_date = "2023-05-02"
tomorrow_date = "2023-05-03"
yesterday_data = yf.download("MMM", start=yesterday_date, end=current_date, interval="1m")
current_data = yf.download("MMM", start=current_date, end=tomorrow_date,  interval="1m")

price_data = yesterday_data["Close"].values
np.append(price_data, current_data["Open"].values[0])
price_data

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


array([106.44000244, 106.37999725, 106.38999939, 106.54000092,
       106.52140045, 106.540802  , 106.51999664, 106.62000275,
       106.76499939, 106.73770142, 106.70749664, 106.63559723,
       106.55000305, 106.55999756, 106.48999786, 106.59999847,
       106.47499847, 106.59999847, 106.73999786, 106.66999817,
       106.72499847, 106.74500275, 106.71700287, 106.76000214,
       106.76499939, 106.72000122, 106.76999664, 106.66999817,
       106.68000031, 106.68000031, 106.51000214, 106.4957962 ,
       106.47000122, 106.48000336, 106.52999878, 106.45999908,
       106.47380066, 106.40000153, 106.43000031, 106.54000092,
       106.54000092, 106.44999695, 106.48940277, 106.48999786,
       106.55000305, 106.58999634, 106.57499695, 106.56999969,
       106.51999664, 106.43000031, 106.33000183, 106.20999908,
       106.19999695, 106.16500092, 106.13999939, 106.08000183,
       106.13999939, 106.12000275, 106.13500214, 106.12000275,
       106.26999664, 106.14499664, 106.19999695, 106.22

In [104]:
yesterday_data['Close'].pct_change().values

array([            nan, -5.63746586e-04,  9.40227156e-05,  1.40992130e-03,
       -1.74586669e-04,  1.82137582e-04, -1.95280667e-04,  9.38848166e-04,
        1.35993847e-03, -2.55682797e-04, -2.82981295e-04, -6.73799089e-04,
       -8.02679213e-04,  9.38010938e-05, -6.56904058e-04,  1.03296659e-03,
       -1.17260790e-03,  1.17398452e-03,  1.31331512e-03, -6.55796292e-04,
        5.15611757e-04,  1.87437552e-04, -2.62306218e-04,  4.02927991e-04,
        4.68082926e-05, -4.21469294e-04,  4.68472843e-04, -9.36578414e-04,
        9.37670985e-05,  0.00000000e+00, -1.59353364e-03, -1.33376512e-04,
       -2.42215973e-04,  9.39432339e-05,  4.69528745e-04, -6.57089042e-04,
        1.29640943e-04, -6.93120118e-04,  2.81943410e-04,  1.03354891e-03,
        0.00000000e+00, -8.44790375e-04,  3.70181530e-04,  5.58828163e-06,
        5.63481916e-04,  3.75347583e-04, -1.40720426e-04, -4.68895478e-05,
       -4.69203828e-04, -8.44877401e-04, -9.39570364e-04, -1.12858784e-03,
       -9.41732070e-05, -

In [103]:
returns = np.log(price_data[1:] / price_data[:-1])
returns

array([-5.63905551e-04,  9.40182958e-05,  1.40892829e-03, -1.74601911e-04,
        1.82120997e-04, -1.95299737e-04,  9.38407724e-04,  1.35901459e-03,
       -2.55715489e-04, -2.83021342e-04, -6.74026194e-04, -8.03001532e-04,
        9.37966948e-05, -6.57119914e-04,  1.03243345e-03, -1.17329594e-03,
        1.17329594e-03,  1.31245347e-03, -6.56011421e-04,  5.15478875e-04,
        1.87419988e-04, -2.62340626e-04,  4.02846837e-04,  4.68071971e-05,
       -4.21558137e-04,  4.68363144e-04, -9.37017278e-04,  9.37627027e-05,
        0.00000000e+00, -1.59480466e-03, -1.33385408e-04, -2.42245312e-04,
        9.39388215e-05,  4.69418551e-04, -6.57305019e-04,  1.29632541e-04,
       -6.93360437e-04,  2.81903672e-04,  1.03301516e-03,  0.00000000e+00,
       -8.45147411e-04,  3.70113030e-04,  5.58826602e-06,  5.63323219e-04,
        3.75277158e-04, -1.40730328e-04, -4.68906472e-05, -4.69313938e-04,
       -8.45234511e-04, -9.40012037e-04, -1.12922517e-03, -9.41776416e-05,
       -3.29583815e-04, -

In [105]:
returns = np.diff(np.log(price_data))
returns

array([-5.63905551e-04,  9.40182958e-05,  1.40892829e-03, -1.74601911e-04,
        1.82120997e-04, -1.95299737e-04,  9.38407724e-04,  1.35901459e-03,
       -2.55715489e-04, -2.83021342e-04, -6.74026194e-04, -8.03001532e-04,
        9.37966948e-05, -6.57119914e-04,  1.03243345e-03, -1.17329594e-03,
        1.17329594e-03,  1.31245347e-03, -6.56011421e-04,  5.15478875e-04,
        1.87419988e-04, -2.62340626e-04,  4.02846837e-04,  4.68071971e-05,
       -4.21558137e-04,  4.68363144e-04, -9.37017278e-04,  9.37627027e-05,
        0.00000000e+00, -1.59480466e-03, -1.33385408e-04, -2.42245312e-04,
        9.39388215e-05,  4.69418551e-04, -6.57305019e-04,  1.29632541e-04,
       -6.93360437e-04,  2.81903672e-04,  1.03301516e-03,  0.00000000e+00,
       -8.45147411e-04,  3.70113030e-04,  5.58826602e-06,  5.63323219e-04,
        3.75277158e-04, -1.40730328e-04, -4.68906472e-05, -4.69313938e-04,
       -8.45234511e-04, -9.40012037e-04, -1.12922517e-03, -9.41776416e-05,
       -3.29583815e-04, -

In [95]:
def bns_jump_test(returns, significance_level=0.01):
    n = len(returns)
    mu = returns.mean(axis=0)
    sigma = returns.std(axis=0)

    z_stat = np.sqrt(n) * (np.abs(mu) - 0.5 * sigma ** 2) / sigma

    critical_value = norm.ppf(1 - significance_level)

    return z_stat > critical_value, z_stat

In [96]:
bns_jump_test(price_data)

True

In [97]:
np.argmax(price_data)

26