In [1]:
import numpy as np
import pandas as pd
import cufflinks as cf
cf.go_offline()
import sys
import os

# user defined functions
sys.path.append(os.getcwd())
from utility.utility import compute_weights, compute_beta, compute_summary

In [2]:
raw_data = pd.read_csv(r'data.csv').sort_values(['ticker', 'date'], ascending = [True, True])
print(raw_data.dtypes)
raw_data.head()

ticker     object
date       object
last      float64
volume      int64
dtype: object


Unnamed: 0,ticker,date,last,volume
0,1332 JT,2013-01-04,169.0987,1464100
1,1332 JT,2013-01-07,166.3266,1783500
2,1332 JT,2013-01-08,166.3266,1759800
3,1332 JT,2013-01-09,165.4026,767800
4,1332 JT,2013-01-10,167.2507,1503100


In [3]:
## check whether there is duplicated data
print('There are {} duplicated data.'.format(raw_data.duplicated(subset = ['ticker', 'date'], keep='first').sum()))

There are 0 duplicated data.


In [4]:
## transform data into wide panel format
price = raw_data.set_index(['ticker', 'date'])['last'].unstack('ticker')
ret = np.log(1+price.pct_change(limit = 5))

volume = raw_data.set_index(['ticker', 'date'])['volume'].unstack('ticker')
adv = (price * volume).rolling(window = 21, min_periods = 10).mean()

In [5]:
## construct universe: non-null close price and adv > 1M USD (assuming 122 Yen = 1 USD)
universe = price.notnull() & (adv > 1e6 * 122)

In [7]:
## a simple momentum strategy (dollar-neutral):
strategy_name = 'momentum'
momentum_score = ret.rolling(window = 100, min_periods =60).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)

# beta neutral
# beta = compute_beta(ret, universe)
# benchmark_weights = universe.astype(float).divide(universe.sum(axis=1).replace(0., np.nan), axis=0).mask(~universe)
# momentum_weights -= benchmark_weights.multiply( (momentum_weights * beta).sum(axis=1, min_count=1), axis=0 )

summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)

Unnamed: 0,0,1,2,3,5,10,21
sharpe,-0.109516,-0.097854,-0.1311,-0.090927,0.081089,0.186001,0.075805
turnover,12.65228,12.650667,12.650431,12.650589,12.649373,12.645767,12.652948


In [8]:
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different signal lags'.format(strategy_name))

In [10]:
## a simple mean-reversion strategy:
strategy_name = 'mean_reversion'
for halflife in [3, 5, 10, 21]:
    mr_score = -ret.ewm(halflife = halflife).mean() / ret.rolling(window = 21, min_periods = 10).std()
    mr_weights = compute_weights(mr_score, universe)
    # beta neutral
    mr_weights -= benchmark_weights.multiply((mr_weights * beta).sum(axis=1, min_count=1), axis=0)
    summary, pnls = compute_summary(mr_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
    display(summary)
    pnls.dropna(how='all').cumsum().iplot(title='strategy {}-{}: PNL plot with different lags'.format(
        strategy_name, halflife))

Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.270594,0.452195,0.217543,0.004623,0.151406,-0.090122,0.629318
turnover,55.778018,55.77701,55.773433,55.772323,55.771114,55.777742,55.776739


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.304216,0.434199,0.229928,0.113059,0.210204,0.083137,0.537632
turnover,43.820542,43.820819,43.819134,43.818663,43.822757,43.830917,43.841432


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.348394,0.445439,0.301171,0.193485,0.300907,0.238409,0.411149
turnover,31.498947,31.500247,31.50057,31.501764,31.507961,31.515879,31.52151


Unnamed: 0,0,1,2,3,5,10,21
sharpe,0.288486,0.367423,0.27434,0.173529,0.255134,0.279654,0.300637
turnover,22.109825,22.110974,22.112552,22.11372,22.118035,22.121164,22.112275
