Momentum Trading Strategy implemented in Python on a 500 Stocks universe [Beginner friendly]
By AlgoVibes (Youtube)
https://www.youtube.com/watch?v=L2nhNvIAyBI

Description:<br>
Get list of S&P500 stocks<br>
Download price data for all<br>
Concatenate into one dataframe<br>
Calculate percent change<br>
Resample to monthly period<br>

To Do:<br>
    v3: Add daily trailing stop. After stop triggers, don't get back in until next month<br>
        Alternatively, get back in after things go positive<br>
        Implmentation:<br>
            Option 1:  Don't resample data to monthly, just get a monthly index, then loop through monthly periods.<br>
            Keep track of highest price as you iterate through daily prices. If price ever drops by more than stop amount, set rest of prices = cash return (0).<br>
    v3.1<br>
        Add new dataframe to keep track of our regime filters<br>
        Add "regime filter" to help determine if you should be in cash. <br>
        Filters:<br>
            SPY < 200 day MA<br>
            Market breadth increasing / shrinking<br>
    v3.2:  Iterate through possible weighting combinations using itertools.<br>
    v.3.3  Better stock selection:<br>
        Calculate 30-day volume for all stocks in universe at beginning of start_date<br>
        Take highest 100<br>
        Calculate volume changes for top 100 stocks by volume, then select the ones with highest average volume increase over 30 day period (unusual volume)<br>
    v3.3.1 Get more data so that there is more data for warmup indicators, then trim df for desired timeframe.
        

In [None]:
#Import libraries
import os
import sys
import datetime as dt
import yfinance as yf
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd, DateOffset
%matplotlib inline
import quantstats as qs
qs.extend_pandas() # # extend pandas functionality with metrics, etc.
HOME_DIR = os.path.expanduser('~/')
sys.path.append(f"{HOME_DIR}Documents/Algo/Stock Price DB/")
#from get_ticker_data_from_db_v2 import process_ticker
from StockPriceData import process_ticker

In [None]:
#Set variables 
data_folder = f'{HOME_DIR}Documents/Algo/Data'
print(data_folder)
start_date = '2000-01-01'#'1962-01-02'
end_date = '2022-08-23'
get_latest_data = True # Getting latest prices from DB is slow, so we can read previously created csv
tickers = ['AAPL','AMZN','NFLX','AMD','NVDA','MSFT','WMT','IBM','KO','PSX','PG','BAC','WFC']
benchmark = '^GSPC'


In [None]:
# NOTE: when we get price df, we may want to replace all values below a certain threshold with 0 so that our algo doesn't
# trade them since they may appear to have great returns at times even though they're uninvestible at the time such as when they first list.
# We should replace small values from beginning of df until the price meets our threshold. We probably want to leave in prices when they go below our threshold later.
def get_prices(tickers):
    if get_latest_data == True:
        prices, symbols = [],[]
        for ticker in tickers:
            print(f'Processing {ticker}')
            try:
                #df = process_ticker(ticker,end_date)['Adj_Close']
                df = process_ticker(ticker,start_date,end_date)['Adj_Close']
                if not df.empty:
                    print(f'Appending {ticker} to prices and symbols lists')
                    prices.append(df)
                    symbols.append(ticker)
            except:
                print(f'Unable to download data for {ticker}')
            
        all_prices = pd.concat(prices,axis=1) #Concatenate all ticker price dfs to one df
        all_prices.columns = symbols # Rename column names based on tickers
        all_prices.to_csv(f"{data_folder}/all_sp500_prices.csv", index=True) #Index is date
    else:
        #all_prices = pd.read_csv(f"{data_folder}/all_sp500_prices.csv", index_col=[0], header=0, parse_dates=True) #Index is date
        all_prices = pd.read_csv(f"{data_folder}/all_sp500_prices.csv", index_col='Date', parse_dates=True)
        #all_prices.index = pd.to_datetime(all_prices.index) #Not necessary if dates are parsed properly
    return(all_prices)

all_prices = get_prices(tickers)
# Don't use this function to get benchmark prices since it will overwrite all_sp500_prices.csv #benchmark_prices = get_prices(benchmark)

In [None]:
if get_latest_data == True:
    benchmark_prices = pd.DataFrame(process_ticker(benchmark,'2000-01-01',end_date)['Adj_Close'])#, name = benchmark) # Grab more data than we need since we'll be losing some to indicator warmup
    benchmark_prices.to_csv('benchmark_prices.csv')
else:
    benchmark_prices = pd.read_csv('benchmark_prices.csv', index_col=['Date'], parse_dates=True)
benchmark_prices.head(3)

In [None]:
#prices
#symbols
#all_prices.index
all_prices.head()
all_prices.tail(10)
#all_prices[all_prices > 0]
latest_date_for_all = all_prices.dropna().index[-1] #This is the latest date where all tickers have data
all_prices = all_prices.loc[:latest_date_for_all,:] #Trimming df to last date where all tickers had data
all_prices.head()

In [None]:
# See  how much data we have for each ticker
for column in all_prices.columns[:-1]:
    prices = all_prices[column][all_prices[column]>0]
    print(f'{column}:\t{prices.index[0]} - {prices.index[-1]}')

In [None]:
#Calculate monthly returns
if get_latest_data == True:
    all_daily_ret = all_prices.pct_change()
    all_daily_ret['CASH'] = 0 # Add alternative for condition where everything is negative 
    all_daily_ret.to_csv(f"{data_folder}/all_daily_ret.csv", index=True) #Index is date
    all_mtl_ret = all_prices.pct_change().resample('M').agg(lambda x : (x + 1).prod() -1)
    all_mtl_ret['CASH'] = 0 # Add alternative for condition where everything is negative 
    all_mtl_ret.to_csv(f"{data_folder}/all_mtl_ret.csv", index=True) #Index is date
else:
    all_daily_ret = pd.read_csv(f"{data_folder}/all_daily_ret.csv", index_col='Date', parse_dates=True) #Index is date
    all_mtl_ret = pd.read_csv(f"{data_folder}/all_mtl_ret.csv", index_col='Date', parse_dates=True) #Index is date
# IMPORTANT: We need to remove NaN and inf values from percentage returns df since stocks that trade at very low prices or zero will have
# crazy return calculations when prices go from 0 to anything or vice versa.
all_daily_ret = all_daily_ret.replace([np.inf, -np.inf, np.nan], 0)
all_mtl_ret = all_mtl_ret.replace([np.inf, -np.inf, np.nan], 0)
#Create a new prices df of only companies that have prices going back to the earliest date
#earliest_prices = all_prices.loc[:, all_prices.iloc[0] > 0]


In [None]:
all_daily_ret.index
all_daily_ret.tail()

In [None]:
all_mtl_ret.index
all_mtl_ret.tail()

In [None]:
# Instead  of using something like the S&P500 as benchmark, we'll use an equal-weighted average of the returns for our defined assets
benchmark_returns = benchmark_prices.pct_change().dropna() # Benchmark using index
#benchmark_returns = all_prices.pct_change().dropna().mean(axis=1) # Benchmark using average prices for stocks in our universe
benchmark_monthly_returns = benchmark_prices.pct_change().resample('M').agg(lambda x : (x + 1).prod() -1)
#benchmark_monthly_returns = all_mtl_ret.mean(axis=1)
#all_prices.tail().pct_change()#.dropna(axis=0)

In [None]:
benchmark_returns
benchmark_monthly_returns.head()

Put it all together into a function that takes a lookback period (instead of just 12 months)

In [None]:
def regime_indicators(df,benchmark_prices):
    lookback = 200
    regime_df_index = df.index #Create new df from index of existing to hold weighted returns
    regime_df = pd.DataFrame(index = regime_df_index)
    rolling_avg_df = benchmark_prices.rolling(lookback).mean()
    rolling_avg_df.dropna(inplace=True)
    print(rolling_avg_df.head())
    rolling_avg_df.plot(label='SPY MA',figsize=(16, 8))
    
regime_indicators(all_prices,benchmark_prices)

In [None]:
def trailing_stop_indiv(period_daily_ret, stop_loss):
    # This function just calculates a trailing stop on returns of each stock that are passed to it.
    # If the return for any portfolio stocks fall below the stop loss level,
    # the remaining return values for that stock are  set to 0, which would be our return on the
    # stock if we sold it on the next day.
    peak_cum_return = 0 # This is to keep track of highest total return
    cum_ret = 0
    trailing_stop = cum_ret - stop_loss
    ##print(f'{(period_daily_ret.index[0]).date()} - {(period_daily_ret.index[-1]).date()}\t{period_daily_ret.columns.values}, Stop Loss:  {stop_loss}')
    # Iterate through the dates in winning_returns_df
    #for stock in period_daily_ret.columns:
    for stock_idx in range(len(period_daily_ret.columns)):
        #print(f'{stock_idx}\t{period_daily_ret.columns[stock_idx]}')
        #print(f'Calculating trailing stop for {period_daily_ret.columns[stock_idx]}')
        for row in range(len(period_daily_ret)-1): # go through daily return for entire rolling lookback period
            curr_date = period_daily_ret.iloc[row].name
            period_ret = period_daily_ret.iloc[row,stock_idx]
            #print(f'{curr_date}\tPeriod return:  {period_ret}')
            cum_ret += period_ret
            if cum_ret > peak_cum_return:
                peak_cum_return = cum_ret
                #print(f'{curr_date}\tPeak Cum. return:  {peak_cum_return}')
                trailing_stop = peak_cum_return - stop_loss
                #print(f'{curr_date}\ttrailing_stop:  {trailing_stop}')
            elif cum_ret <= trailing_stop:
                ##print(f'{curr_date}\t{period_daily_ret.columns[stock_idx]}\tCumulative return ({cum_ret}) < stop loss level ({trailing_stop})  SELL!')
                period_daily_ret.iloc[row+1:,stock_idx] = 0 # Set values for stock to 0 from next day to end of period df
                break # Exit the loop since we're no longer in the stock
        #print(f'period_daily_ret:  {period_daily_ret}')
    return period_daily_ret


In [None]:
def get_stats(return_series):
    tot_ret = round(qs.stats.comp(return_series),2)
    sharpe_ratio = round(qs.stats.sharpe(return_series),2)
    max_dd = round(qs.stats.max_drawdown(return_series),2)
    return(tot_ret,sharpe_ratio,max_dd)

In [None]:
def mom_long(all_mtl_ret, lookback):
    #Calculate rolling returns based on provided lookback period and return df
    #Loop though rolling return df and identify 50 winning stocks
    #Get return for following month for each winning stock
    #Append returns to list
    #Return the return for the series of returns 
    all_mtl_ret_lb = all_mtl_ret.rolling(lookback).agg(lambda x: (x+1).prod() - 1) #Calculate return for period
    #See https://stackoverflow.com/questions/67168187/cannot-called-a-function-using-agg-method-in-pandas
    all_mtl_ret_lb.dropna(inplace=True)
    rets = {}
    for row in range(len(all_mtl_ret_lb)-1): #Loop through all monthly periods & identify winners and losers
        #curr = all_mtl_ret_lb.iloc[row] # All stocks for period
        curr = all_mtl_ret_lb.iloc[row][all_mtl_ret_lb.iloc[row].ge(0)] # Only stocks with prices >= 0
        win = curr.nlargest(2) # Take the top 2 stocks
        win_ret = all_mtl_ret.loc[win.name + MonthEnd(1), win.index] # Get returns for following month for selected stocks
        win_mean = win_ret.mean() #Average return of top stocks for period
        rets[curr.name]=win_mean
    #print(f'rets: {rets}')
    ret_series = pd.Series(rets, dtype='float64')
    # Use QS to calculate strategy stats here or just return the return series and do it later.
    tot_ret = round(qs.stats.comp(ret_series),2)
    sharpe_ratio = round(qs.stats.sharpe(ret_series),2)
    max_dd = round(qs.stats.max_drawdown(ret_series),2)
    print(f'Lookback:  {lookback},  Tot Ret: {tot_ret},  Sharpe Ratio:  {sharpe_ratio},  Max DD:  {max_dd}')
    # Calculate returns manually for comparison
    tot_return = (pd.Series(rets, dtype='float64') + 1).prod() - 1
    cum_returns = (ret_series + 1).cumprod()
    #print(f'Tot Cum Return:  {round((cum_returns.iloc[-1])-1,2)}')
    return (ret_series)
    
mom_return_series = mom_long(all_mtl_ret, 12)
tot_ret,sharpe_ratio,max_dd = get_stats(mom_return_series)


daily momentum strategy:<br>
    parameters:  daily returns df, lookback period<br>
    1. copy daily returns df index only<br>
    2. resample index to monthly - end of month<br>
    3. iterate through monthly index:<br>
        set index counter = 0<br>
        set month start date index<br>
        set end date to start date index + 1<br>
        

In [None]:
def mom_long_w_stop(all_daily_ret, lookback, stop_loss):
    # We need to get date index for monthly resampled, then go though each period and apply trailing stop 
    # logic: keep track of highest price for period, then if price drops more than limit, set the rest of the period's return 
    # to the return of our safe asset, cash (0). After going through all periods, resample to monthly then pass to rest of function.
    all_mtl_ret = all_daily_ret.resample('M').agg(lambda x : (x + 1).prod() -1)
    all_mtl_ret_lb = all_mtl_ret.rolling(lookback).agg(lambda x: (x+1).prod() - 1) #Calculate rolling return for period
    all_mtl_ret_lb.dropna(inplace=True)
    rets = {}
    rets_w_stop = {}
    #print(f'all_daily_ret stocks:  {all_daily_ret.columns.values}, {len(all_daily_ret)} rows')
    for row in range(len(all_mtl_ret_lb)-1): #Loop through all monthly periods & identify winners and losers
    #for row in range(5): #Loop through all monthly periods & identify winners and losers
        curr = all_mtl_ret_lb.iloc[row][all_mtl_ret_lb.iloc[row].ge(0)] # Only stocks with returns >= 0. We may want to change this to
        #print(f'Curr stocks:  {curr.index[0]}, {curr.index[-1]}')
        #print(f'Previous period end:  {all_mtl_ret_lb.iloc[row].name}')
        # only select stocks with PRICES > 0 for each date so we don't get weird return values when prices go from 0 to anything. 
        win = curr.nlargest(2) # Take the top 2 stocks with highest returns for current period
        last_period_end = win.name # Date for beginning of following month. We should increment this by 1 so we don't overlap with the end of last monthly period
        period_start = last_period_end + DateOffset(days=1) # Increment next period start by one day to avoid overlap with prior period.
        period_end = win.name+MonthEnd(1)# Date for end of following month
        
        # Modifications for trailing stop
        if stop_loss != 0:
            period_daily_ret = all_daily_ret.loc[period_start:period_end,win.index] # Daily returns of winning stocks for this period 
            #print(f'period_daily_ret:  {period_daily_ret.index[0]} - {period_daily_ret.index[-1]}')
            period_daily_ret_modified = trailing_stop_indiv(period_daily_ret,stop_loss)
            #print(f'period_daily_ret_modified:  {period_daily_ret_modified.index[0]} - {period_daily_ret_modified.index[-1]}')
            #print(f'period_daily_ret_modified:  {period_daily_ret_modified}')
            # Calculate the cumsum for the win_ret_modified DAILY returns to get equivalent monthly return for the period 
            win_ret = period_daily_ret_modified.cumsum().iloc[-1,:]           
        else:
            win_ret = all_mtl_ret.loc[win.name + MonthEnd(1), win.index] # Returns are based on following month's returns for current month's winning stocks
        
        win_mean = win_ret.mean() #Average return of top stocks for period
        rets[curr.name]=win_mean
        ret_series = pd.Series(rets, dtype='float64')        

    return (ret_series)
    
monthly_returns = mom_long_w_stop(all_daily_ret, 12,  0.35) # Test
tot_ret,sharpe_ratio,max_dd = get_stats(monthly_returns)
print(f'Tot ret:  {tot_ret}\tsharpe_ratio:  {sharpe_ratio}\tmax_dd:  {max_dd}')


In [None]:
def calc_rolling_returns(returns_df, window):
    # Take returns_df and calculate rolling average based on passed window size
    pass
    

In [None]:
print(monthly_returns.describe())
monthly_returns.plot()


In [None]:
benchmark_monthly_returns.plot()

In [None]:
# Calculate momentum results for several different lookback periods
monthly_periods = None #120 # Specify "None" if you don't want to roll over the entire dataset
lookback_periods = [3,6,12]
#stop_loss = .05
stop_losses = range(0,50,5) # start, stop, step
#stop_losses = [30]
# Improvement:  keep track of lookback period return series by adding to a dictionary, then we can reference later
# to compare different weightings for top x lookback periods. This will allow us to try compare returns for different weightings.
# (period 1 return * weight) + (period 2 return * wieght), etc
'''You can compute a weighted average by multiplying its relative proportion or percentage by its value in sequence and
 adding those sums together. Thus, if a portfolio is made up of 55% stocks, 40% bonds, and 5% cash, those weights would be
  multiplied by their annual performance to get a weighted average return.'''

#all_mtl_ret_limited = all_mtl_ret.loc['2000-09-01':].copy()
all_mtl_ret_limited = all_mtl_ret.iloc[:monthly_periods,:] # Trim df required number of periods (optional)
print(f'Returns for {len(all_mtl_ret_limited)} months ({all_mtl_ret_limited.index[0].date()} - {all_mtl_ret_limited.index[-1].date()})')
all_return_data = pd.DataFrame(columns=['lookback', 'stop', 'tot_ret', 'sharpe', 'max_dd'])
index = 0 # Initialize to use as index for our statistics df
for lookback in lookback_periods:
  for stop_loss in stop_losses:
    stop_loss = stop_loss/100
    return_series = mom_long_w_stop(all_daily_ret, lookback, stop_loss)
    tot_ret,sharpe_ratio,max_dd = get_stats(return_series)
    # Append results to all_return_data. See https://datagy.io/empty-pandas-dataframe/ # Note: append is the specific case(axis=0, join='outer') of concat (being deprecated use concat)
    current_stats = pd.DataFrame({'lookback':lookback,'stop':stop_loss,'tot_ret':tot_ret,'sharpe':sharpe_ratio,'max_dd':max_dd},index=[index])
    all_return_data = pd.concat([all_return_data, current_stats])
    index += 1


In [None]:
# Plot parameters and returns
#print(f'all_return_data:\n  {all_return_data}')
all_return_data.sort_values(by='sharpe', ascending=False,inplace=True)
print(all_return_data[:10])
#TO DO
# Calculate returns over rolling monthly periods and average.

In [None]:
def get_best_returns(criteria='tot_ret'):
    best_returns = {}
    for lookback in all_return_data['lookback'].unique(): # Get a list of lookback periods from the return data df
        lookback_results=(all_return_data[all_return_data['lookback'] == lookback]).copy().reset_index(drop=True) #Get subset of return data df based on current lookback
        max_index = (lookback_results[criteria].idxmax()) # Get the index of the best return for current lookback
        stop_loss = all_return_data.iloc[max_index].stop # Get the stop loss that was used.
        print(f'Best return combo for {lookback} month lookback:')
        print(lookback_results.iloc[[max_index]])
        best_returns[lookback] = mom_long_w_stop(all_daily_ret, lookback, stop_loss) # Get the return series for this combo since we didn't reference it anywhere
        
    return best_returns # Return dictionary containing best return series for each lookback period
    #qs.reports.metrics(best_returns_series, benchmark_monthly_returns, mode='basic') # Compared returns to benchmark
    #print(get_stats(best_returns_series))

best_returns = get_best_returns('tot_ret') #Get the returns with highest total return for each lookback period

In [None]:
best_returns[12]

In [None]:
tot_ret_b,sharpe_ratio_b,max_dd_b = get_stats(benchmark_monthly_returns)
print(f'Benchmark return periods:  {benchmark_monthly_returns.shape[0]}')
print(f'Benchmark tot_ret: {tot_ret_b}\t sharpe: {sharpe_ratio_b}\tmax_dd: {max_dd_b}')


In [None]:
# Look at returns over various timeframes to see how strategy performs over different periods of time, instead of just looking at total period
# Split return data into chunks and iterate over chunks to evaluate min, max, avg returns for the periods

In [None]:
# This section loops through several lookback periods which are based to our strategy.
# The return for each period is referenced in a new dataframe which we use to weight
# the different returns based on different parameters.
#period_weights = [.3333, .3333, .3333]
period_weights = [.5, 0, .5]
weighted_returns_index = all_mtl_ret_limited.index #Create new df from index of existing to hold weighted returns
weighted_returns = pd.DataFrame(index = weighted_returns_index)
index = 0
for period_return in best_returns.keys():
    lookback = lookback_periods[index]
    ##print(f'Period weight for {lookback} period lookback:  {period_weights[index]}')
    weighted_return = best_returns[period_return]*period_weights[index]
    ##print(f'weighted_return for {lookback} period lookback:  {weighted_return}')
    weighted_returns[lookback] = weighted_return
    index+=1
weighted_returns = weighted_returns.replace([np.inf, -np.inf, np.nan], 0)
returns = weighted_returns.sum(axis=1) # Add up weighted returns to get total return


In [None]:
# Pass returns and benchmark to QuantStats to get return metrics
#qs.reports.metrics(returns, '^GSPC', mode='basic')
qs.reports.metrics(returns, benchmark_monthly_returns, mode='basic') # Compared returns to benchmark
#qs.reports.metrics(returns, best_returns[3], mode='basic') #Compared weighted returns to one of the lookback periods

In [None]:
qs.reports.plots(returns, benchmark_monthly_returns, mode='full')
#qs.reports.plots(returns, best_returns[3], mode='full')