In [1]:
import numpy as np
import pandas as pd
import cufflinks as cf
cf.go_offline()
import sys
import os

# user defined functions
sys.path.append(os.getcwd())
from utility.utility import compute_weights, compute_beta, compute_summary

In [2]:
raw_data = pd.read_csv(r'data.csv').sort_values(['ticker', 'date'], ascending = [True, True])
print(raw_data.dtypes)
raw_data.head()

ticker     object
date       object
last      float64
volume      int64
dtype: object


Unnamed: 0,ticker,date,last,volume
0,1332 JT,2013-01-04,169.0987,1464100
1,1332 JT,2013-01-07,166.3266,1783500
2,1332 JT,2013-01-08,166.3266,1759800
3,1332 JT,2013-01-09,165.4026,767800
4,1332 JT,2013-01-10,167.2507,1503100


In [3]:
## check whether there is duplicated data
print('There are {} duplicated data.'.format(raw_data.duplicated(subset = ['ticker', 'date'], keep='first').sum()))

There are 0 duplicated data.


In [4]:
## transform data into wide panel format
price = raw_data.set_index(['ticker', 'date'])['last'].unstack('ticker')
ret = np.log(1+price.pct_change(limit = 5))

volume = raw_data.set_index(['ticker', 'date'])['volume'].unstack('ticker')
adv = (price * volume).rolling(window = 21, min_periods = 10).mean()

In [23]:
## construct universe: non-null close price and adv > 5M USD (assuming 122 Yen = 1 USD)
universe = price.notnull() & (adv > 5e6 * 122)

In [24]:
# beta of the stocks
beta = compute_beta(ret, universe)
beta.tail()

ticker,1332 JT,1333 JT,1334 JT,1605 JT,1721 JT,1801 JT,1802 JT,1803 JT,1808 JT,1812 JT,...,9503 JT,9531 JT,9532 JT,9602 JT,9613 JT,9681 JT,9735 JT,9766 JT,9983 JT,9984 JT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-15,1.010226,0.771635,,1.206411,0.685761,0.920795,1.031198,0.991042,0.933728,0.977613,...,0.705662,0.44455,0.46419,0.615555,0.993759,,0.606908,1.031077,0.863996,1.008591
2021-03-16,1.008427,0.770848,,1.206634,0.6844,0.9211,1.031207,0.991837,0.935217,0.977828,...,0.70614,0.445712,0.465095,0.613816,0.994271,,0.606908,1.03134,0.863416,1.008855
2021-03-17,1.006201,0.768698,,1.20951,0.684969,0.920366,1.03043,0.992077,0.938196,0.976504,...,0.708009,0.44612,0.464446,0.612198,0.99842,,0.605787,1.032345,0.862462,1.013885
2021-03-18,1.005117,0.767918,,1.208405,0.686675,0.921769,1.031637,0.992444,0.942085,0.977225,...,0.708003,0.445024,0.46436,0.613058,0.996861,,0.605578,1.02845,0.862796,1.008964
2021-03-19,1.004974,0.767944,,1.207817,0.686424,0.92245,1.03318,0.993789,0.944866,0.97798,...,0.707719,0.444282,0.463374,0.612052,0.994768,,0.604161,1.026329,0.858414,1.006682


In [25]:
## a simple momentum strategy (dollar-neutral):
strategy_name = 'momentum'

momentum_score = ret.rolling(window = 252-21, min_periods =100).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)
summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])

display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,-0.434332,-0.460058,-0.446986,-0.441585,-0.426044,-0.443476,-0.441595
turnover,8.519546,8.517634,8.51748,8.51722,8.51609,8.514139,8.516815


In [26]:
## a beta-neutral momentum strategy :

strategy_name = 'momentum_beta_neutral'

momentum_score = ret.rolling(window = 252-21, min_periods =100).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)

# simplified beta neutral (no longer dollar neutral)
benchmark_weights = universe.astype(float).divide(universe.sum(axis=1).replace(0., np.nan), axis=0).mask(~universe)
momentum_weights -= benchmark_weights.multiply( (momentum_weights * beta).sum(axis=1, min_count=1), axis=0 )

summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])

display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,-0.311373,-0.347373,-0.325173,-0.334288,-0.310762,-0.316575,-0.262789
turnover,8.673823,8.671588,8.671445,8.671254,8.670213,8.668346,8.671945


In [27]:
## a simple mean-reversion strategy:
strategy_name = 'mean_reversion'
for halflife in [3, 5, 10, 21]:
    mr_score = -ret.ewm(halflife = halflife).mean() / ret.rolling(window = 21, min_periods = 10).std()
    mr_weights = compute_weights(mr_score, universe)
    # beta neutral
    mr_weights -= benchmark_weights.multiply((mr_weights * beta).sum(axis=1, min_count=1), axis=0)
    summary, pnls = compute_summary(mr_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
    display(summary)
    pnls.dropna(how='all').cumsum().iplot(title='strategy {}-{}: PNL plot with different lags'.format(
        strategy_name, halflife))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.331835,0.552883,0.28983,0.119004,0.268704,-0.091378,0.668196
turnover,55.693471,55.692473,55.68852,55.68731,55.685167,55.691157,55.68939


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.38053,0.54113,0.312288,0.227753,0.306599,0.077574,0.570361
turnover,43.760101,43.760454,43.758478,43.758048,43.761688,43.769391,43.779115


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.436444,0.55446,0.390921,0.291414,0.371118,0.226137,0.410892
turnover,31.475215,31.4767,31.476931,31.478096,31.484184,31.491999,31.497175


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.365365,0.457633,0.345932,0.248162,0.30301,0.273958,0.290122
turnover,22.111909,22.113086,22.114676,22.115943,22.120309,22.123498,22.113989
