In [32]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import plotly.graph_objects as go

### Simple Mean Reversion

The idea of the strategy is to long securities that underperformed and short securities that overperformed.

According to the thread, the strategy should perform well in bull markets, and very badly in bear markets.

In [51]:
def calc_return(X, targets):
    longs = X<0
    # longs = ((1-(X/19))+1)*longs/60
    shorts = X>0
    # shorts = -((X-40)/19+1)*shorts/60
    return (targets*(longs+shorts)).sum(axis=1)

#### Data preparation

In [52]:
paths = ['/Users/mcardonasanchez/Desktop/Online Courses/The-Data-Science-Course-2021-All-Resources/Other/data/stock_prices.csv']
stocks = pd.concat([pd.read_csv(e) for e in paths])
stocks.Date = pd.to_datetime(stocks.Date)
stocks['Return'] = 100*stocks.Close.pct_change()
targets = pd.pivot(stocks, index = 'Date', values = 'Return', columns = 'SecuritiesCode').iloc[1:]
targets.head()

SecuritiesCode,1301,1332,1333,1375,1376,1377,1379,1381,1407,1413,...,9982,9983,9984,9987,9989,9990,9991,9993,9994,9997
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-05,251.928021,-79.254931,472.183099,,-52.676923,114.889467,-35.612708,-9.304511,-54.145078,,...,-70.625395,9059.139785,-80.857009,-52.10352,108.962868,-84.485294,-10.979463,59.272405,-1.559889,-54.612337
2017-01-06,241.645885,-79.014599,468.695652,,-52.446483,113.18328,-35.475113,-10.612436,-53.870293,,...,-70.492826,8297.463002,-78.945116,-53.246443,114.83376,-85.261905,-8.966074,61.668146,-3.12843,-54.220963
2017-01-10,240.09901,-79.548763,475.622776,,-51.684699,113.051823,-36.336336,-8.018868,-55.179487,,...,-70.799752,8114.437367,-78.229517,-54.232459,120.233463,-85.689046,-5.679012,63.089005,-6.367041,-54.057143
2017-01-11,241.41791,-79.854281,475.949367,,-50.926217,106.014075,-34.503106,-8.250356,-55.710594,,...,-70.907968,8086.836518,-77.969398,-54.267216,121.106821,-86.030268,-4.0,59.548611,-4.787813,-54.342857
2017-01-12,241.802253,-80.080557,484.558824,,-52.169811,121.56476,-37.863501,-7.593123,-55.503876,,...,-70.781638,7970.063694,-77.821626,-55.278766,119.893899,-85.802171,-2.548853,59.721011,-5.949782,-54.962275


#### Create train data

In [53]:
# rolling factor, averaging on 2 values
roll = 2
train = targets.rolling(roll).mean().shift(2).iloc[3:]
train.head()

SecuritiesCode,1301,1332,1333,1375,1376,1377,1379,1381,1407,1413,...,9982,9983,9984,9987,9989,9990,9991,9993,9994,9997
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-11,246.786953,-79.134765,470.439375,,-52.561703,114.036373,-35.543911,-9.958473,-54.007685,,...,-70.55911,8678.301394,-79.901062,-52.674981,111.898314,-84.873599,-9.972769,60.470275,-2.344159,-54.41665
2017-01-12,240.872448,-79.281681,472.159214,,-52.065591,113.117552,-35.905725,-9.315652,-54.52489,,...,-70.646289,8205.950185,-78.587316,-53.739451,117.533611,-85.475475,-7.322543,62.378575,-4.747736,-54.139053
2017-01-13,240.75846,-79.701522,475.786071,,-51.305458,109.532949,-35.419721,-8.134612,-55.445041,,...,-70.85386,8100.636943,-78.099458,-54.249837,120.670142,-85.859657,-4.839506,61.318808,-5.577427,-54.2
2017-01-16,241.610082,-79.967419,480.254095,,-51.548014,113.789418,-36.183304,-7.921739,-55.607235,,...,-70.844803,8028.450106,-77.895512,-54.772991,120.50036,-85.91622,-3.274427,59.634811,-5.368797,-54.652566
2017-01-17,246.287724,-79.882306,484.322811,,-52.276856,119.202406,-37.79232,-7.166807,-55.526848,,...,-70.761189,7988.829315,-77.881669,-55.34376,122.116262,-86.002501,-2.129127,59.51268,-5.594978,-54.929294


#### Model evaluation

In [54]:
# the values are sorted day by day such than values that overperformed are ranked badly
X = np.argsort(np.argsort(train))
y = targets.loc[X.index]

# benchmark, averaging all securities for a given day
bm = y.mean(axis=1)

# return of our strategy 
r = calc_return(X,y)

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = bm.index,
        y = bm.cumsum().values,
        name = "Benchmark",
        marker = {"color":"black"}
    )
)

fig.add_trace(
    go.Scatter(
        x = r.index,
        y = r.cumsum().values,
        name = "Long Short Strategy",
        marker = {"color":"green"}
    )
)

fig.update_layout(template="presentation", title = "Long-Short strategy cumulative return vs Benchmark")

#### Sharpe Ratios

In [9]:
print(f"sharp ratio, benchmark: {round(bm.mean()/bm.std()*252**0.5,3)}")
print(f"sharp ratio, Long - Short strategy: {round(r.mean()/r.std()*252**0.5,3)}")

sharp ratio, benchmark: 0.638
sharp ratio, Long - Short strategy: 0.939


In [19]:
dividends = pd.pivot(stocks, index='Date', values = 'ExpectedDividend', columns='SecuritiesCode')
dvs = dividends.fillna(0)
train2 = train.copy()
subdvds = dvs.loc[train2.index]
train2[subdvds!=0] = train2[subdvds.shift(-1)!=0]+0.5

X = np.argsort(np.argsort(train2))
y = targets.loc[X.index]

# benchmark, averaging all securities for a given day
bm = y.mean(axis=1)

# return of our strategy
r = calc_return(X,y)

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = bm.index,
        y = bm.cumsum().values,
        name = "Benchmark",
        marker = {"color":"black"}
    )
)

fig.add_trace(
    go.Scatter(
        x = r.index,
        y = r.cumsum().values,
        name = "Long Short Strategy",
        marker = {"color":"green"}
    )
)

fig.update_layout(template="presentation", title = "Long-Short strategy cumulative return vs Benchmark")

In [20]:
print(f"sharp ratio, benchmark: {round(bm.mean()/bm.std()*252**0.5,3)}")
print(f"sharp ratio, Long - Short strategy: {round(r.mean()/r.std()*252**0.5,3)}")

sharp ratio, benchmark: 0.638
sharp ratio, Long - Short strategy: 1.434


#### Score on evaluation data

In [21]:
evalstocks = pd.read_csv('/Users/mcardonasanchez/Desktop/Online Courses/The-Data-Science-Course-2021-All-Resources/Other/data/stock_prices.csv')
evalstocks.Date = pd.to_datetime(evalstocks.Date)
evaltargets = pd.pivot(evalstocks, index='Date', values='Target', columns='SecuritiesCode')

evaldividends = pd.pivot(evalstocks, index='Date', values='ExpectedDividend', columns='SecuritiesCode')
dvs = evaldividends.fillna(0)

roll = 2
train = pd.concat([targets, evaltargets]).rolling(roll).mean().shift(2).loc[evaltargets.index]

subdvds = dvs.loc[train.index]
train[subdvds!=0] = train[subdvds.shift(-1)!=0] +0.5

X = np.argsort(np.argsort(train))
y = evaltargets.loc[X.index]

#Benchmark, averaging all securities for a given day
bm = y.mean(axis=1)

#Return of our strategy
r = calc_return(X,y)

fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x = bm.index,
        y = bm.cumsum().values,
        name = "Benchmark",
        marker = {"color":"black"}
    )
)


fig.add_trace(
    go.Scatter(
        x = r.index,
        y = r.cumsum().values,
        name = "Long Short Strategy",
        marker = {"color":"green"}
    )
)

fig.update_layout(template="presentation", title = "Long-Short strategy cumulative return vs Benchmark")

print(f"sharp ratio, benchmark: {round(bm.mean()/bm.std()*252**0.5,3)}")
print(f"sharp ratio, Long - Short strategy: {round(r.mean()/r.std()*252**0.5,3)}")

fig.show()

sharp ratio, benchmark: 0.638
sharp ratio, Long - Short strategy: 1.405
