# Simple mean reversion algorithm using components of the S&P 500

With and without transaction costs.

## Strategy Description:

h(n): Stocks with the most abnormal t - 1 day moves should have opposite directional price movement on day t.

Buy stocks with the worst previous day moves and short those with the best previous day moves.

In [56]:
# imports
import numpy as np
import pandas as pd
startDate = 20060101
endDate = 20061231

In [57]:
# bring the spx data into the notebook as a dataframe
df = pd.read_table('SPX_20071123.txt')  # stores day closes for SPX components
df['Date'] = df['Date'].astype('int')  # convert the date to an integer
df.set_index('Date', inplace=True)  # set the index to the date
df.sort_index(inplace=True)  # sort the dataframe by date
df.head(10)

Unnamed: 0_level_0,A,AA,AAPL,ABC,ABI,ABK,ABT,ACAS,ACE,ACS,...,XEL,XL,XLNX,XOM,XRX,XTO,YHOO,YUM,ZION,ZMH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19991124,41.125,20.3772,23.6563,5.998,,31.1549,22.8971,7.6786,12.8492,19.0,...,14.8614,38.5249,43.811,35.1539,26.7738,2.2456,57.75,10.0716,51.6312,
19991126,41.1875,20.6142,23.7656,5.998,,31.1183,22.5998,7.632,12.8492,18.7188,...,14.9079,38.5249,43.5356,34.6541,26.7738,2.2215,56.7188,9.8871,51.6826,
19991129,42.0,20.5352,23.6406,5.9675,,30.6418,23.0086,7.5855,12.2246,18.7188,...,14.8614,38.2478,46.8092,35.265,26.2989,2.0403,56.5313,9.964,50.7074,
19991130,42.1875,20.6932,24.4688,6.0284,,31.9613,22.5998,7.4925,12.1353,18.7188,...,15.0926,37.6935,43.811,35.2372,25.7052,1.9679,53.1875,10.21,53.017,
19991201,43.0,21.1078,25.7656,6.0588,,33.574,22.2652,7.539,12.2692,18.7188,...,15.046,37.7859,43.7804,36.3201,25.3584,1.9317,57.2188,10.0101,53.6329,
19991202,44.125,22.1346,27.5469,5.9066,,32.5477,22.1537,7.7019,12.403,18.5625,...,15.046,37.2315,46.3197,36.3201,25.4182,1.9317,61.4531,9.9179,54.0947,
19991203,45.0,22.4703,28.75,5.7239,,33.3541,22.2652,7.6786,12.6707,18.875,...,15.1849,37.8783,43.1991,36.7089,24.8201,1.9075,63.25,9.841,55.4805,
19991206,45.75,22.6283,29.0938,5.8153,,33.464,21.6333,7.7717,12.3138,18.6563,...,15.2306,37.5087,44.5452,37.2643,24.9995,1.8713,70.2031,9.841,54.0947,
19991207,45.25,23.0034,29.4531,5.7544,,33.4274,21.2244,7.632,12.1799,18.8438,...,15.1849,37.2315,43.4744,37.2643,24.4014,1.8955,87.0,9.9025,52.2471,
19991208,45.0625,22.8257,27.5156,5.7544,,32.5477,20.8899,7.5157,12.0014,18.4063,...,14.9537,36.9082,43.1379,37.3476,23.3847,1.823,79.9063,9.964,50.81,


In [58]:
# calculate the daily returns
daily_returns = df.pct_change()  # calculate the daily returns
mkt_daily_returns = daily_returns.mean(axis=1)  # calculate the market return, which is also mean of the daily returns

In [59]:
# calculate the weights
weights = -(np.array(daily_returns) - np.array(mkt_daily_returns).reshape((daily_returns.shape[0], 1)))  # calculate the weights
weights_sum = np.nansum(abs(weights), axis=1)  # calculate the sum of the weights
weights[weights_sum == 0,] = 0  # set the weights to zero if the sum of the weights is zero
weights_sum[weights_sum == 0] = 1  # set the sum of the weights to one if the sum of the weights is zero
weights = weights / weights_sum.reshape((daily_returns.shape[0], 1))  # normalize the weights

In [60]:
# Get the daily pnl
daily_pnl = np.nansum(np.array(pd.DataFrame(weights).shift())* np.array(daily_returns), axis=1)  # calculate the daily pnl
daily_pnl = daily_pnl[np.logical_and(df.index >= startDate, df.index <= endDate)]  # filter the daily pnl by the start and end date

In [61]:
# Calculate the sharpe ratio
sharpe_ratio = np.sqrt(252) * np.mean(daily_pnl) / np.std(daily_pnl)  # calculate the sharpe ratio
print(f'Sharpe without transaction costs: {sharpe_ratio}')

Sharpe without transaction costs: 0.9577856810103857


Calculation with transaction costs:

In [62]:
# Calculate the sharpe ratio with transaction costs
transaction_costs = 0.0005  # set the transaction costs
weights = weights[np.logical_and(df.index >= startDate, df.index <= endDate)]  # filter the weights by the start and end date
daily_pnl_with_costs = daily_pnl - (np.nansum(abs(weights - np.array(pd.DataFrame(weights).shift())), axis=1) * transaction_costs)  # calculate the daily pnl with transaction costs
sharpe_ratio_with_costs = np.sqrt(252) * np.mean(daily_pnl_with_costs) / np.std(daily_pnl_with_costs)  # calculate the sharpe ratio with transaction costs
print(f'Sharpe with transaction costs: {sharpe_ratio_with_costs}')

Sharpe with transaction costs: -2.161743371896227


Conclusion: Sharpe ratio with transaction costs is highly unprofitable using this strategy.

## Part Two: Modification of entry criterion

Enter on the open of the day t rather than the close of day t.

In [63]:
df=pd.read_table('SPX_op_20071123.txt')  # stores day closes for SPX components
df['Date'] = df['Date'].astype('int')  # convert the date to an integer
df.set_index('Date', inplace=True)  # set the index to the date
df.sort_index(inplace=True)  # sort the dataframe by date
df.head(10)

Unnamed: 0_level_0,A,AA,AAPL,ABC,ABI,ABK,ABT,ACAS,ACE,ACS,...,XEL,XL,XLNX,XOM,XRX,XTO,YHOO,YUM,ZION,ZMH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19991124,40.125,20.4562,23.25,5.9675,,31.3748,22.7112,7.5855,13.3399,18.75,...,14.8156,38.4787,43.1379,34.5986,26.7145,2.2577,56.5625,10.0716,51.2719,
19991126,40.875,20.6932,23.6875,5.998,,30.9349,22.5998,7.5855,12.76,18.9063,...,14.8614,38.5711,43.9945,34.8762,26.8925,2.2456,58.25,9.9025,51.5286,
19991129,41.0,20.5747,23.5625,5.998,,30.825,22.4139,7.6553,12.8492,18.75,...,14.9079,38.6635,44.2699,34.5986,26.5957,2.1973,57.3125,9.8564,51.5542,
19991130,42.0,20.5352,24.5313,5.998,,30.825,22.5998,7.5623,12.2246,18.75,...,14.8614,38.6635,47.2987,35.5427,25.8834,2.1973,55.8594,9.8564,50.7074,
19991201,42.1875,21.167,25.125,5.9675,,30.7517,22.5998,7.5623,12.0907,18.5938,...,15.0926,37.6935,44.2316,35.2094,25.478,1.98,57.5,10.1639,52.9657,
19991202,43.75,21.246,25.7813,5.998,,31.9246,22.4139,7.632,12.3138,18.7188,...,15.046,37.832,44.0251,36.82,25.5976,1.9921,57.5938,10.0255,53.3762,
19991203,44.9375,22.3123,28.0469,5.8457,,33.3541,22.5625,7.632,12.4923,18.625,...,15.046,37.3239,47.5435,36.4313,25.7172,1.9317,62.4961,9.9333,54.1461,
19991206,45.25,22.6283,28.6406,5.8457,,33.3541,22.0793,7.7484,12.4923,18.625,...,15.1849,37.3239,44.79,36.5423,24.8201,1.9317,63.25,9.841,55.1725,
19991207,45.75,22.5888,29.1406,5.7239,,33.7206,21.7448,7.7484,12.5815,18.7188,...,15.2306,37.4625,44.943,37.3198,25.0593,1.8955,74.0,9.841,54.1718,
19991208,45.25,22.9837,29.0625,5.8153,,33.7206,21.4102,7.5855,12.0907,18.7188,...,15.1849,37.4625,43.1073,37.1532,24.8799,1.8472,80.4688,9.8717,52.5037,


In [64]:
# calculate the daily returns
daily_returns = df.pct_change()  # calculate the daily returns
mkt_daily_returns = daily_returns.mean(axis=1)  # calculate the market return, which is also mean of the daily returns

In [65]:
# calculate the weights
weights = -(np.array(daily_returns) - np.array(mkt_daily_returns).reshape((daily_returns.shape[0], 1)))  # calculate the weights
weights_sum = np.nansum(abs(weights), axis=1)  # calculate the sum of the weights
weights[weights_sum == 0,] = 0  # set the weights to zero if the sum of the weights is zero
weights_sum[weights_sum == 0] = 1  # set the sum of the weights to one if the sum of the weights is zero
weights = weights / weights_sum.reshape((daily_returns.shape[0], 1))  # normalize the weights

In [66]:
# Get the daily pnl
daily_pnl = np.nansum(np.array(pd.DataFrame(weights).shift())* np.array(daily_returns), axis=1)  # calculate the daily pnl
daily_pnl = daily_pnl[np.logical_and(df.index >= startDate, df.index <= endDate)]  # filter the daily pnl by the start and end date

In [67]:
# Calculate the sharpe ratio
sharpe_ratio = np.sqrt(252) * np.mean(daily_pnl) / np.std(daily_pnl)  # calculate the sharpe ratio
print(f'Sharpe without transaction costs: {sharpe_ratio}')

Sharpe without transaction costs: 2.381759409645484


In [68]:
# Calculate the sharpe ratio with transaction costs
transaction_costs = 0.0005  # set the transaction costs
weights = weights[np.logical_and(df.index >= startDate, df.index <= endDate)]  # filter the weights by the start and end date
daily_pnl_with_costs = daily_pnl - (np.nansum(abs(weights - np.array(pd.DataFrame(weights).shift())), axis=1) * transaction_costs)  # calculate the daily pnl with transaction costs
sharpe_ratio_with_costs = np.sqrt(252) * np.mean(daily_pnl_with_costs) / np.std(daily_pnl_with_costs)  # calculate the sharpe ratio with transaction costs
print(f'Sharpe with transaction costs: {sharpe_ratio_with_costs}')

Sharpe with transaction costs: 1.3996944546182988
