In [2]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from tqdm import tqdm
from dateutil.relativedelta import relativedelta

In [1]:
from xquant.backtest.backtest import run_backtest
from xquant.portfolio import Portfolio
from xquant.util import *

In [3]:
import plotly.express as px

In [4]:
industry_index = pd.read_csv('data\\WIND_II_industry_index.csv', index_col=['Date'], parse_dates=['Date'])
total_returns = pd.read_csv('data\\quarterly_total_returns_II.csv', index_col=['Date'], parse_dates=['Date'])
earnings = pd.read_csv('data\\quarterly_earnings_II.csv', index_col=['Date'], parse_dates=['Date'])
benchmark = pd.read_csv('data\\csi_300.csv', index_col=['date'], parse_dates=['date'])

In [5]:
START = pd.Timestamp('20100401')
END = pd.Timestamp('20191231')
SECTORS = list(industry_index.columns)

## Momentum Strategy

At each rebalance date, build zero-cost portfolios that longs industries with strong momentum (U) financed by shorting industries with weak momentum (D).

$H_{0}:r_{U}-r_{D}=0$

In [6]:
def get_momentum(industry, date, look_back) -> float:
    '''calculates the momentum of an industry'''

    # skip one month to avoid factors like bid-ask spread
    end_date = date - relativedelta(months=1)
    start_date = end_date - relativedelta(months=look_back)

    try:
        # calculate return
        period = industry_index[industry].loc[start_date:end_date]
        start_price = period.head(1)[0]
        end_price = period.tail(1)[0]
    except IndexError:
        print(date, start_date, end_date)
        print(period)
        print('\n')

    momentum = (end_price/start_price) - 1

    return momentum

In [7]:
def get_mom_rank(industry, date, look_back) -> float:
    '''calculates the numeric rank of an industry's momentum among peers'''
    mom_list = [get_momentum(sector, date, look_back) for sector in SECTORS]
    mom_dict = dict(zip(SECTORS, mom_list))

    series = pd.Series(mom_dict).dropna()
    df = pd.DataFrame(series, columns=['momentum'])
    df.sort_values(by=['momentum'], ascending=False, inplace=True)
    df['rank'] = np.arange(1,len(df)+1)
    
    rank = df.at[industry, 'rank']
    return rank

In [8]:
def get_cap_portfolio(prices, include, funds) -> Portfolio:

    total_cap = prices[include].sum()
    weights = [prices[industry]/total_cap for industry in include]

    budgets = [funds*weight for weight in weights] # budget available for each industry
    
    shares = np.divide(budgets, prices[include].values)
    shares_dict = dict(zip(include, shares))

    portfolio = Portfolio(long=shares_dict, short={}, cash=0)
    
    return portfolio

In [9]:
def stock_selection_mom(funds, date, look_back) -> Portfolio:

    long_include, short_include = [],[]
        
    for industry in SECTORS:
        mom_rank = get_mom_rank(industry, date, look_back)
        if mom_rank <= 8:
            long_include.append(industry)
        elif mom_rank >= 17:
            short_include.append(industry)

    prices = industry_index.loc[closest_trading_day(date, industry_index.index, 'bfill')]    

    # equal weighted portfolio    
    long_shares = [funds/len(long_include)/prices[i] for i in long_include]
    long_stocks = dict(zip(long_include, long_shares))
    long_portfolio = Portfolio(long=long_stocks, short={}, cash=0)
    short_shares = [funds/len(short_include)/prices[j] for j in short_include]
    short_stocks = dict(zip(short_include, short_shares))
    short_portfolio = Portfolio(long=short_stocks, short={}, cash=0)

    # cap weighted portfolio
    # long_portfolio = get_cap_portfolio(prices, long_include, funds)
    # short_portfolio = get_cap_portfolio(prices, short_include, funds)

    return long_portfolio, short_portfolio

In [10]:
def calculate_return_mom(date, look_back, holding_period):
    # at any given time, the number of portfolios this strategy holds is equal to holding_period
    sub_longs, sub_shorts = [], []

    for i in range(1, holding_period+1):
        look_back_end = date - relativedelta(months=i)
        look_back_start = look_back_end - relativedelta(months=look_back)

        sub_long, sub_short = stock_selection_mom(100, look_back_start, look_back)
        sub_longs.append(sub_long)
        sub_shorts.append(sub_short)
    
    long_total_val, short_total_val = 0, 0
    for l, s  in zip(sub_longs, sub_shorts):
        long_total_val += l.get_net_liquidation(date, industry_index)
        short_total_val += s.get_net_liquidation(date, industry_index)
    
    long_return = long_total_val / (100 * holding_period) - 1
    short_return = short_total_val / (100 * holding_period) - 1

    long_short_excess = long_return - short_return
    return long_short_excess

In [11]:
chart_r = pd.DataFrame(columns=['H=3','H=6','H=9','H=12','H=18','H=24'], index=['L=3','L=6','L=9','L=12','L=18','L=24'])
chart_r.index.rename('Look-back', inplace=True)
chart_t = chart_r.copy()

In [13]:
for col in tqdm(chart_r.columns):
    for idx in chart_r.index:

        l_b, h_p = int(idx[2:]), int(col[2:])
        long_short_excess = []
        for month in pd.date_range(start='20050101', end='20210501', freq='M'):
            month = (month+relativedelta(days=1))
            long_short_excess.append(calculate_return_mom(date=month, look_back=l_b, holding_period=h_p))

        cumulative = np.prod([i+1 for i in long_short_excess])
        n_days = (pd.Timestamp('20210501') - pd.Timestamp('20050101')).days
        annualized = (cumulative + 1) ** (365/n_days) - 1
        t_stat = stats.ttest_1samp(long_short_excess, 0).statistic

        chart_r.at[idx, col] = annualized
        chart_t.at[idx, col] = t_stat

100%|██████████| 6/6 [4:15:30<00:00, 2555.10s/it]


In [14]:
chart_r

Unnamed: 0_level_0,H=3,H=6,H=9,H=12,H=18,H=24
Look-back,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L=3,0.0291701,0.118877,0.184633,0.201845,0.269463,0.231232
L=6,0.221675,0.272376,0.26557,0.313467,0.333065,0.170749
L=9,0.244962,0.290557,0.34656,0.395805,0.299183,0.0829073
L=12,0.107223,0.139182,0.107429,0.0377047,0.000287748,3.90332e-07
L=18,-3.71141e-08,1.38667e-09,-9.69119e-11,-3.07927e-11,-1.66533e-15,0.0
L=24,4.44089e-15,0.0,0.0,0.0,0.0,0.0


In [15]:
chart_t

Unnamed: 0_level_0,H=3,H=6,H=9,H=12,H=18,H=24
Look-back,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
L=3,0.142555,2.28432,3.50044,3.98821,4.51476,4.20773
L=6,2.70019,3.09205,2.99074,3.19325,3.33353,2.37683
L=9,2.61958,2.8257,3.04318,3.17821,2.89137,1.42666
L=12,1.73993,1.91335,1.68623,0.95534,-1.08095,-2.97202
L=18,-1.1796,-2.19327,-3.03636,-3.5365,-4.57326,-5.71907
L=24,-4.11578,-4.61826,-4.95325,-5.25819,-6.34736,-7.56473


In [16]:
chart_r.to_csv('D:\\Downloads\\return.csv')
chart_t.to_csv('D:\\Downloads\\t_stat.csv')