In [None]:
import pandas as pd 
import numpy as np
import yfinance as yf

The rules for the strategy are:
1. Select all stocks near the market open whose returns from their
previous day’s lows to today’s opens are lower than one standard

deviation. The standard deviation is computed using the daily close-
to-close returns of the last 90 days. These are the stocks that “gapped

down.”
2. Narrow down this list of stocks by requiring their open prices to be
higher than the 20-day moving average of the closing prices.

3. Buy the 10 stocks within this list that have the lowest returns from their
previous day’s lows. If the list has fewer than 10 stocks, then buy the
entire list.

4. Liquidate all positions at the market close.

In [None]:

topN = 10
entryZscore  = 1
lookback = 20 




In [None]:
#1

import yfinance as yf
import pandas as pd

# Get the list of tickers for S&P 500
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol']

# Get historical data for each ticker
data = {}
returns = {}
for ticker in sp500_tickers:
    try:
        stock = yf.Ticker(ticker)
        hist_data = stock.history(period='5y')
        data[ticker] = hist_data[['Open', 'Low', 'Close']]
        returns[ticker] = hist_data['Close'].pct_change()
    except:
        print(f"Error retrieving data for {ticker}")

# Concatenate the data into a single DataFrame
df = pd.concat(data.values(), keys=data.keys())

# Extract the open, low, and close prices
op = df['Open'].unstack()
lo = df['Low'].unstack()
cl = df['Close'].unstack()

# Display the shape of the arrays
print(f"op shape: {op.shape}, lo shape: {lo.shape}, cl shape: {cl.shape}")

In [None]:
sp_returns = pd.concat(returns.values(),keys=returns.keys())
sp_returns = pd.DataFrame(sp_returns)

In [None]:
sp_returns

In [None]:
moving_std_deviaiton = sp_returns.rolling(90).std().shift(1)

moving_std_deviaiton = moving_std_deviaiton.rename(columns={"Close":"Returns"})

moving_std_deviaiton



In [None]:
moving_std_deviaiton

In [None]:
sp_df = pd.concat([df,moving_std_deviaiton],axis = 1)
sp_df

In [None]:
'''Shifting the rolling standard deviation by one day is often done to align the calculated standard deviation with the returns data in a way that allows for meaningful analysis. Here's why this is commonly done:

1. **Avoiding Lookahead Bias**: When calculating a rolling statistic (like a rolling standard deviation), you are essentially using data up to a certain point in time to make calculations for that point and potentially future points. Shifting the result by one day ensures that the calculated standard deviation corresponds to the returns for the next day, avoiding a lookahead bias where the statistic is used to make decisions based on future data.

2. **Aligning Data for Analysis**: In many cases, you want to compare the current day's returns or other metrics with the historical data. Shifting the rolling standard deviation by one day aligns it with the returns data, making it easier to analyze relationships between volatility (as measured by the standard deviation) and returns.

3. **Consistency in Time Series Analysis**: Shifting the result by one day maintains consistency in time series analysis. For example, if you are calculating correlations or other metrics between different time series (e.g., returns and standard deviation), aligning the data correctly ensures that the analysis is meaningful and avoids introducing errors or biases.

Overall, shifting the rolling standard deviation by one day is a common practice to ensure that the calculated statistic is aligned with the data it is intended to analyze, avoiding biases and ensuring consistency in time series analysis.'''

buyPrice = sp_df['Low'].shift(1)*(1-entryZscore*sp_df['Returns'])

buyPrice.shape





In [123]:

'''This line of code calculates a metric called `op_lo` which represents the percentage change from the previous day's low price to the current day's open price for each stock. Here's a breakdown:

- `sp_df['Open']`: This refers to the open prices of the stocks in the DataFrame `sp_df`.

- `sp_df['Low'].shift(1)`: This shifts the low prices (`Low`) of the stocks by one day. This is done to use the low prices from the previous day.

- `(sp_df['Open'] - sp_df['Low'].shift(1))`: This calculates the difference between the current day's open price and the previous day's low price for each stock.

- `(sp_df['Open'] - sp_df['Low'].shift(1))/sp_df['Low'].shift(1)`: Finally, this division calculates the percentage change from the previous day's low price to the current day's open price. This is done by dividing the difference calculated above by the previous day's low price and multiplying by 100 to express the result as a percentage.

Overall, the `op_lo` metric provides insight into how much the price has moved from the previous day's low to the current day's open, expressed as a percentage of the previous day's low price. This can be useful in analyzing intraday price movements and volatility.'''

op_lo = (sp_df['Open'] - sp_df['Low'].shift(1))/sp_df['Low'].shift(1)

op_lo = op_lo




In [None]:
df['Open'].shape

In [None]:

'''This line of code calculates the rolling mean of the closing prices (`Close`) of the stocks over a specified lookback period, and then shifts the result by one day. Let's break it down:

- `df['Close']`: This refers to the closing prices of the stocks in the DataFrame `df`.

- `.rolling(lookback)`: This method calculates a rolling window of the specified `lookback` period (e.g., 20 days) over the data. For each day, it includes the closing prices of the current day and the previous `lookback-1` days.

- `.mean()`: This calculates the mean (average) of the closing prices within each rolling window. So, for each day, you get the average closing price over the previous `lookback` days.

- `.shift(1)`: This shifts the result by one day. The purpose of this shift is to align the rolling mean with the returns for the next day. For example, the rolling mean calculated for day 100 will be aligned with the returns for day 101.

Reasoning:

In the context of the other codes you mentioned, this line of code is used to calculate a moving average of the closing prices over a specified lookback period. The moving average is then used as a reference point or threshold for making trading decisions. Comparing the current day's open price to the previous day's low price and the moving average allows for the identification of stocks whose prices are potentially at attractive levels for buying.'''


moving_average = df['Close'].rolling(lookback).mean().shift(1)
moving_average.shape

In [None]:
pnl = pd.Series(index=df['Close'].index, dtype=float)
positionTable = pd.DataFrame(index=df['Close'].index, columns=df[['Close']].columns)

In [124]:
op_lo.to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Unnamed: 0_level_1,Date,Unnamed: 2_level_1
MMM,2019-06-21 00:00:00-04:00,
MMM,2019-06-24 00:00:00-04:00,0.004171
MMM,2019-06-25 00:00:00-04:00,-0.000519
MMM,2019-06-26 00:00:00-04:00,0.006247
MMM,2019-06-27 00:00:00-04:00,0.004601
...,...,...
ZTS,2024-06-13 00:00:00-04:00,0.006121
ZTS,2024-06-14 00:00:00-04:00,-0.002108
ZTS,2024-06-17 00:00:00-04:00,-0.003493
ZTS,2024-06-18 00:00:00-04:00,0.018408


In [None]:
import pandas as pd

# Assuming these are your existing DataFrames
# df, op_lo, buyPrice, moving_average

# Initialize an empty list to store the boolean masks
stocks_to_buy_list = []

# Iterate over the range of indices
for t in range(1, len(df['Close'])):
    stocks_to_buy_t = op_lo.iloc[[t]].notnull() & (df['Open'].iloc[t] < buyPrice.iloc[t]) & (df['Open'].iloc[t] > moving_average.iloc[t])
    stocks_to_buy_list.append(stocks_to_buy_t)
    print(stocks_to_buy_t)

# Concatenate the list of boolean masks into a single DataFrame
stocks_to_buy_df = pd.concat(stocks_to_buy_list, axis=1)

# Transpose the DataFrame if needed
#stocks_to_buy_df = stocks_to_buy_df.T

print(stocks_to_buy_list)

In [None]:
type(stocks_to_buy_list)


In [103]:
stocks_to_buy_list

[     Date                     
 MMM  2019-06-24 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-06-25 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-06-26 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-06-27 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-06-28 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-07-01 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-07-02 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-07-03 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-07-05 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-07-08 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-07-09 00:00:00-04:00    False
 dtype: bool,
      Date                     
 MMM  2019-

In [127]:
stocks = []
for value in stocks_to_buy_list:
    if value.any():
        stocks.append(value)
        print(value)


     Date                     
MMM  2020-09-21 00:00:00-04:00    True
dtype: bool
     Date                     
MMM  2022-10-25 00:00:00-04:00    True
dtype: bool
     Date                     
ABT  2020-01-27 00:00:00-05:00    True
dtype: bool
     Date                     
ABT  2022-10-19 00:00:00-04:00    True
dtype: bool
      Date                     
ABBV  2022-02-22 00:00:00-05:00    True
dtype: bool
      Date                     
ABBV  2022-02-24 00:00:00-05:00    True
dtype: bool
      Date                     
ABBV  2022-10-28 00:00:00-04:00    True
dtype: bool
      Date                     
ABBV  2023-04-26 00:00:00-04:00    True
dtype: bool
     Date                     
ACN  2021-07-19 00:00:00-04:00    True
dtype: bool
      Date                     
ADBE  2020-01-27 00:00:00-05:00    True
dtype: bool
      Date                     
ADBE  2021-11-23 00:00:00-05:00    True
dtype: bool
     Date                     
AMD  2021-07-08 00:00:00-04:00    True
dtype: bool
    

In [128]:
df = pd.DataFrame(stocks)

df

Unnamed: 0_level_0,MMM,MMM,ABT,ABT,ABBV,ABBV,ABBV,ABBV,ACN,ADBE,...,WY,WMB,WMB,WTW,XYL,XYL,YUM,ZBH,ZBH,ZBH
Date,2020-09-21 00:00:00-04:00,2022-10-25 00:00:00-04:00,2020-01-27 00:00:00-05:00,2022-10-19 00:00:00-04:00,2022-02-22 00:00:00-05:00,2022-02-24 00:00:00-05:00,2022-10-28 00:00:00-04:00,2023-04-26 00:00:00-04:00,2021-07-19 00:00:00-04:00,2020-01-27 00:00:00-05:00,...,2022-10-28 00:00:00-04:00,2021-09-20 00:00:00-04:00,2022-04-25 00:00:00-04:00,2020-02-06 00:00:00-05:00,2019-10-31 00:00:00-04:00,2020-02-24 00:00:00-05:00,2022-09-13 00:00:00-04:00,2020-08-04 00:00:00-04:00,2021-05-04 00:00:00-04:00,2023-11-07 00:00:00-05:00
0,True,,,,,,,,,,...,,,,,,,,,,
1,,True,,,,,,,,,...,,,,,,,,,,
2,,,True,,,,,,,,...,,,,,,,,,,
3,,,,True,,,,,,,...,,,,,,,,,,
4,,,,,True,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
748,,,,,,,,,,,...,,,,,,True,,,,
749,,,,,,,,,,,...,,,,,,,True,,,
750,,,,,,,,,,,...,,,,,,,,True,,
751,,,,,,,,,,,...,,,,,,,,,True,


In [129]:
op_lo_df = pd.concat([op_lo,df],axis=1)

NotImplementedError: Can only union MultiIndex with MultiIndex or Index of tuples, try mi.to_flat_index().union(other) instead.

In [131]:
ww = op_lo_df.dropna()

ww

Unnamed: 0,0,"(MMM, 2020-09-21 00:00:00-04:00)","(MMM, 2022-10-25 00:00:00-04:00)","(ABT, 2020-01-27 00:00:00-05:00)","(ABT, 2022-10-19 00:00:00-04:00)","(ABBV, 2022-02-22 00:00:00-05:00)","(ABBV, 2022-02-24 00:00:00-05:00)","(ABBV, 2022-10-28 00:00:00-04:00)","(ABBV, 2023-04-26 00:00:00-04:00)","(ACN, 2021-07-19 00:00:00-04:00)",...,"(WY, 2022-10-28 00:00:00-04:00)","(WMB, 2021-09-20 00:00:00-04:00)","(WMB, 2022-04-25 00:00:00-04:00)","(WTW, 2020-02-06 00:00:00-05:00)","(XYL, 2019-10-31 00:00:00-04:00)","(XYL, 2020-02-24 00:00:00-05:00)","(YUM, 2022-09-13 00:00:00-04:00)","(ZBH, 2020-08-04 00:00:00-04:00)","(ZBH, 2021-05-04 00:00:00-04:00)","(ZBH, 2023-11-07 00:00:00-05:00)"


In [None]:
for t in range(1, len(df)):
    # Selecting the entire row for the current day 't'
    op_lo_today = op_lo.iloc[t, :]
    open_today = df['Open'].iloc[t, :]
    buy_price_today = buyPrice.iloc[t, :]
    moving_avg_today = moving_average.iloc[t, :]
    
    # Creating a boolean array for stocks to buy
    stocks_to_buy = op_lo_today.notnull() & (open_today < buy_price_today) & (open_today > moving_avg_today)
    
    # Selecting the stocks to buy and sorting them
    sort_data = op_lo_today[stocks_to_buy].sort_values()
    
    # Selecting the top N stocks
    sort_data = sort_data.head(min(topN, len(sort_data)))
    
    # Updating the position table for the selected stocks
    positionTable.iloc[t, sort_data.index] = 1
