In [32]:
import numpy as np
import pandas as pd
import cufflinks as cf
cf.go_offline()

In [4]:
raw_data = pd.read_csv(r'C:\Users\ruico\OneDrive - HKUST Connect\Desktop\YInterceptTest\data.csv')
raw_data.head()

Unnamed: 0,ticker,date,last,volume
0,1332 JT,2013-01-04,169.0987,1464100
1,1332 JT,2013-01-07,166.3266,1783500
2,1332 JT,2013-01-08,166.3266,1759800
3,1332 JT,2013-01-09,165.4026,767800
4,1332 JT,2013-01-10,167.2507,1503100


In [14]:
## check whether there is duplicated data
print('There are {} duplicated data.'.format(raw_data.duplicated(subset = ['ticker', 'date'], keep='first').sum()))

There are 0 duplicated data.


In [15]:
## transform data into panel format
price = raw_data.set_index(['ticker', 'date'])['last'].unstack('ticker')
ret = price.pct_change(limit = 5)
volume = raw_data.set_index(['ticker', 'date'])['volume'].unstack('ticker')
adv = (price * volume).rolling(window = 21, min_periods = 10).mean()

In [26]:
## construct universe: non-null close price and adv > 5M USD (assuming 122 Yen = 1 USD)
universe = price.notnull() & (adv > 5e6 * 122)

In [79]:
def compute_beta(ret, univ, window=252, method = 'equal', mkt_cap = None):
    if method == 'equal':
        benchmark_pnl = ret.mean(axis=1)
    elif method == 'mktcap':
        benchmark_weights = mkt_cap.rolling(window= 21, min_periods = 10).mean()
        benchmark_weights = benchmark_weights.divide(benchmark_weights.sum(axis=1, min_count=1), axis=0)
        benchmark_pnl = (benchmark_weights.shift() * ret).sum(axis=1, min_count=1)
    beta = pd.DataFrame(np.nan, index = ret.index, columns = ret.columns)
    for start_date, end_date in zip(univ.index[:-window], univ.index[window:]):
        valid_instr = ret.loc[start_date:end_date, :].count(axis=0) > (window//2)
        period_benchmark_ret = benchmark_pnl.loc[start_date:end_date].dropna()
        valid_days = period_benchmark_ret.index
        X = np.vstack([np.ones(len(period_benchmark_ret)), period_benchmark_ret.values]).T
        Y = ret.reindex(valid_days).loc[:, valid_instr].fillna(0).values
        beta.loc[end_date, valid_instr] = np.linalg.pinv(X.T.dot(X)).dot(X.T).dot(Y)[1, :]
    return beta
        
    
def compute_weights(score, univ, method = 'UniformRank'):
    df_score = score.reindex_like(univ).mask(~univ)
    if method == 'UniformRank':
        weights = df_score.rank(axis=1, pct=True) - 0.5
    ## Rescale long leg and short leg to be sum of 1
    weights[weights > 0]= weights.divide(weights[weights > 0].sum(axis=1, min_count=1), axis=0)
    weights[weights < 0]= -weights.divide(weights[weights < 0].sum(axis=1, min_count=1), axis=0)
    return weights

def compute_summary(weights, ret, lags = [0]):
    df_summary = pd.DataFrame(index = lags)
    df_pnl = {}
    for lag in lags:
        df_weights = weights.shift(lag)
        pnl = (df_weights.shift() * ret).sum(axis=1, min_count=1)
        turnover = (df_weights - df_weights.shift()).abs().sum(axis=1)
        aum = df_weights.abs().sum(axis=1)
        df_summary.loc[lag, 'sharpe'] = pnl.mean() * np.sqrt(252) / pnl.std()
        df_summary.loc[lag, 'turnover'] = turnover.sum() / aum.sum()* 100
        df_pnl[lag] = pnl
    df_pnl = pd.concat(df_pnl, axis=1)
    return df_summary, df_pnl

In [71]:
## compute beta
beta = compute_beta(ret, universe)
benchmark_weights = universe.astype(float).divide(universe.sum(axis=1).replace(0., np.nan), axis=0).mask(~universe)

In [70]:
benchmark_weights.dropna(how='all')

ticker,1332 JT,1333 JT,1334 JT,1605 JT,1721 JT,1801 JT,1802 JT,1803 JT,1808 JT,1812 JT,...,9503 JT,9531 JT,9532 JT,9602 JT,9613 JT,9681 JT,9735 JT,9766 JT,9983 JT,9984 JT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-18,0.000000,0.0,0.0,0.004878,0.004878,0.004878,0.004878,0.004878,0.000000,0.004878,...,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878
2013-01-21,0.000000,0.0,0.0,0.004878,0.004878,0.004878,0.004878,0.004878,0.000000,0.004878,...,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878
2013-01-22,0.000000,0.0,0.0,0.004878,0.004878,0.004878,0.004878,0.004878,0.000000,0.004878,...,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878
2013-01-23,0.000000,0.0,0.0,0.004878,0.004878,0.004878,0.004878,0.004878,0.000000,0.004878,...,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878
2013-01-24,0.000000,0.0,0.0,0.004878,0.004878,0.004878,0.004878,0.004878,0.000000,0.004878,...,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878,0.004878
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-15,0.004525,0.0,0.0,0.004525,0.004525,0.004525,0.004525,0.004525,0.004525,0.004525,...,0.004525,0.004525,0.004525,0.004525,0.004525,0.000000,0.004525,0.004525,0.004525,0.004525
2021-03-16,0.004525,0.0,0.0,0.004525,0.004525,0.004525,0.004525,0.004525,0.004525,0.004525,...,0.004525,0.004525,0.004525,0.004525,0.004525,0.000000,0.004525,0.004525,0.004525,0.004525
2021-03-17,0.004545,0.0,0.0,0.004545,0.004545,0.004545,0.004545,0.004545,0.004545,0.004545,...,0.004545,0.004545,0.004545,0.004545,0.004545,0.000000,0.004545,0.004545,0.004545,0.004545
2021-03-18,0.004545,0.0,0.0,0.004545,0.004545,0.004545,0.004545,0.004545,0.004545,0.004545,...,0.004545,0.004545,0.004545,0.004545,0.004545,0.000000,0.004545,0.004545,0.004545,0.004545


In [82]:
## a simple momentum strategy:
strategy_name = 'momentum'
momentum_score = ret.rolling(window = 252-11, min_periods =100).mean().shift(21)
momentum_weights = compute_weights(momentum_score, universe)
# beta neutral
momentum_weights -= benchmark_weights.multiply((momentum_weights * beta).sum(axis=1, min_count=1), axis=0)
summary, pnls = compute_summary(momentum_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
display(summary)
pnls.dropna(how='all').cumsum().iplot(title='strategy {}: PNL plot with different lags'.format(strategy_name))

Unnamed: 0,sharpe,turnover
0,-0.377895,8.516377
1,-0.358685,8.515942
2,-0.323828,8.516143
3,-0.320515,8.516359
5,-0.318966,8.514995
10,-0.294378,8.512922
21,-0.358416,8.517257


In [87]:
## a simple mean-reversion strategy:
strategy_name = 'mean_reversion'
for halflife in [3, 5, 10, 21]:
    mr_score = -ret.ewm(halflife = halflife).mean() / ret.rolling(window = 21, min_periods = 10).std()
    mr_weights = compute_weights(mr_score, universe)
    # beta neutral
    mr_weights -= benchmark_weights.multiply((mr_weights * beta).sum(axis=1, min_count=1), axis=0)
    summary, pnls = compute_summary(mr_weights, ret, lags=[0, 1, 2, 3, 5, 10, 21])
    display(summary)
    pnls.dropna(how='all').cumsum().iplot(title='strategy {}-{}: PNL plot with different lags'.format(
        strategy_name, halflife))

Unnamed: 0,sharpe,turnover
0,0.347114,55.702697
1,0.572136,55.701668
2,0.310715,55.697309
3,0.13587,55.696248
5,0.269286,55.693611
10,-0.100091,55.699244
21,0.654828,55.698737


Unnamed: 0,sharpe,turnover
0,0.401532,43.790835
1,0.568435,43.79107
2,0.338197,43.788686
3,0.243827,43.788248
5,0.310046,43.791396
10,0.073719,43.798474
21,0.554339,43.808974


Unnamed: 0,sharpe,turnover
0,0.456371,31.528006
1,0.57795,31.529314
2,0.408499,31.529299
3,0.305207,31.530582
5,0.376455,31.536242
10,0.224224,31.543179
21,0.404606,31.548225


Unnamed: 0,sharpe,turnover
0,0.383882,22.163742
1,0.473601,22.164601
2,0.353733,22.166115
3,0.25566,22.167284
5,0.3083,22.171608
10,0.27123,22.174063
21,0.296149,22.165676
