In [1]:
import numpy as np
import pandas as pd
import cufflinks as cf
cf.go_offline()
import sys
import os

# user defined functions
sys.path.append(os.getcwd())
from utility.utility import compute_weights, compute_beta, compute_summary

In [2]:
raw_data = pd.read_csv(r'data.csv').sort_values(['ticker', 'date'], ascending = [True, True])
print(raw_data.dtypes)
raw_data.head()

ticker     object
date       object
last      float64
volume      int64
dtype: object


Unnamed: 0,ticker,date,last,volume
0,1332 JT,2013-01-04,169.0987,1464100
1,1332 JT,2013-01-07,166.3266,1783500
2,1332 JT,2013-01-08,166.3266,1759800
3,1332 JT,2013-01-09,165.4026,767800
4,1332 JT,2013-01-10,167.2507,1503100


In [3]:
## check whether there is duplicated data
print('There are {} duplicated data.'.format(raw_data.duplicated(subset = ['ticker', 'date'], keep='first').sum()))

There are 0 duplicated data.


In [4]:
## transform data into wide panel format
price = raw_data.set_index(['ticker', 'date'])['last'].unstack('ticker')
ret = np.log(1+price.pct_change(limit = 5))

volume = raw_data.set_index(['ticker', 'date'])['volume'].unstack('ticker')
adv = (price * volume).rolling(window = 21, min_periods = 10).mean()

In [5]:
## construct universe: non-null close price and adv > 1M USD (assuming 122 Yen = 1 USD)
universe = price.notnull() & (adv > 1e6 * 122)

In [12]:
# beta of the stocks
beta = compute_beta(ret, universe)
beta.tail()

ticker,1332 JT,1333 JT,1334 JT,1605 JT,1721 JT,1801 JT,1802 JT,1803 JT,1808 JT,1812 JT,...,9503 JT,9531 JT,9532 JT,9602 JT,9613 JT,9681 JT,9735 JT,9766 JT,9983 JT,9984 JT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-15,1.010226,0.771635,,1.206411,0.685761,0.920795,1.031198,0.991042,0.933728,0.977613,...,0.705662,0.44455,0.46419,0.615555,0.993759,,0.606908,1.031077,0.863996,1.008591
2021-03-16,1.008427,0.770848,,1.206634,0.6844,0.9211,1.031207,0.991837,0.935217,0.977828,...,0.70614,0.445712,0.465095,0.613816,0.994271,,0.606908,1.03134,0.863416,1.008855
2021-03-17,1.006201,0.768698,,1.20951,0.684969,0.920366,1.03043,0.992077,0.938196,0.976504,...,0.708009,0.44612,0.464446,0.612198,0.99842,,0.605787,1.032345,0.862462,1.013885
2021-03-18,1.005117,0.767918,,1.208405,0.686675,0.921769,1.031637,0.992444,0.942085,0.977225,...,0.708003,0.445024,0.46436,0.613058,0.996861,,0.605578,1.02845,0.862796,1.008964
2021-03-19,1.004974,0.767944,,1.207817,0.686424,0.92245,1.03318,0.993789,0.944866,0.97798,...,0.707719,0.444282,0.463374,0.612052,0.994768,,0.604161,1.026329,0.858414,1.006682


In [20]:
## a simple momentum strategy (dollar-neutral):
strategy_name = 'momentum'

momentum_score = ret.rolling(window = 120, min_periods =60).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)
summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])

display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,-0.178134,-0.14728,-0.1753,-0.182773,-0.159646,-0.022839,-0.018493
turnover,11.545686,11.545933,11.546457,11.546778,11.547318,11.541963,11.537802


In [21]:
## a beta-neutral momentum strategy :

strategy_name = 'momentum_beta_neutral'

momentum_score = ret.rolling(window = 100, min_periods =60).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)

# simplified beta neutral (no longer dollar neutral)
benchmark_weights = universe.astype(float).divide(universe.sum(axis=1).replace(0., np.nan), axis=0).mask(~universe)
momentum_weights -= benchmark_weights.multiply( (momentum_weights * beta).sum(axis=1, min_count=1), axis=0 )

summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])

display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.161919,0.178874,0.177408,0.201535,0.283014,0.226958,0.109535
turnover,12.618318,12.616352,12.615901,12.61604,12.614599,12.610303,12.617384


In [22]:
## a simple mean-reversion strategy:
strategy_name = 'mean_reversion'
for halflife in [3, 5, 10, 21]:
    mr_score = -ret.ewm(halflife = halflife).mean() / ret.rolling(window = 21, min_periods = 10).std()
    mr_weights = compute_weights(mr_score, universe)
    # beta neutral
    mr_weights -= benchmark_weights.multiply((mr_weights * beta).sum(axis=1, min_count=1), axis=0)
    summary, pnls = compute_summary(mr_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
    display(summary)
    pnls.dropna(how='all').cumsum().iplot(title='strategy {}-{}: PNL plot with different lags'.format(
        strategy_name, halflife))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.270594,0.452195,0.217543,0.004623,0.151406,-0.090122,0.629318
turnover,55.778018,55.77701,55.773433,55.772323,55.771114,55.777742,55.776739


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.304216,0.434199,0.229928,0.113059,0.210204,0.083137,0.537632
turnover,43.820542,43.820819,43.819134,43.818663,43.822757,43.830917,43.841432


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.348394,0.445439,0.301171,0.193485,0.300907,0.238409,0.411149
turnover,31.498947,31.500247,31.50057,31.501764,31.507961,31.515879,31.52151


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.288486,0.367423,0.27434,0.173529,0.255134,0.279654,0.300637
turnover,22.109825,22.110974,22.112552,22.11372,22.118035,22.121164,22.112275
