In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_folder = '/Users/chengxinxiangye/Desktop/Quant Asset/'  

# used to build size deciles
exclude =  (1, 2)

# only include 3 exchanges mentioned in the paper
possible_exchcd = (1, 2, 3)

# include common stocks, exclude REITe etc.
possible_shrcd = (10, 11)

In [3]:
# load data from the folder
mcrsp_raw = pd.read_pickle(data_folder + 'mcrsp_raw.pkl')

# make sure company permno is in integer format
mcrsp_raw['permno'] = mcrsp_raw['permno'].astype(int)

# only use data within timeframe required
mcrsp = mcrsp_raw[(mcrsp_raw['date'].dt.year>=1990) & (mcrsp_raw['date'].dt.year<=2021)].copy()  

In [4]:
# only include stocks as required
mcrsp= mcrsp[mcrsp['exchcd'].isin(possible_exchcd)]
mcrsp= mcrsp[mcrsp['shrcd'].isin(possible_shrcd)]

# drop return data with no value
mcrsp = mcrsp[mcrsp['ret'].notna()].copy()

# calculate market equity for size deciles
mcrsp['me'] = mcrsp['prc'].abs() * mcrsp['shrout'] * 1e-3

# sort dataframe with date and permno for further calculation
mcrsp = mcrsp.sort_values(by=['date','permno']).reset_index(drop=True).copy()

In [5]:
# store for iteration of each day 
t_list = mcrsp['date'].unique()

# store the quantiles for each day
size_quantiles = []

for i in t_list:
    # check that on month i exclude stocks in NASDAQ
    month = mcrsp[(mcrsp['date'] == i) & (mcrsp['exchcd'].isin(exclude))]
    
    # 10 quantiles for each month
    split = pd.qcut(month['me'], q=10, retbins=True)[1]
    size_quantiles.append({'date': i, 'q10': split[0],
                            'q20': split[1],
                            'q30': split[2],
                            'q40': split[3],
                            'q50': split[4],
                            'q60': split[5],
                            'q70': split[6],
                            'q80': split[7],
                            'q90': split[8]})

mcrsp = mcrsp.merge(pd.DataFrame(size_quantiles), on='date', how='left')

In [6]:
# first assume all stocks are in size bin 10
mcrsp['size'] = 10

# check from smallest size to largest
mcrsp.loc[mcrsp['me'] <= mcrsp['q10'], 'size'] = 1
mcrsp.loc[(mcrsp['me'] <= mcrsp['q20']) & (mcrsp['me'] > mcrsp['q10']), 'size'] = 2
mcrsp.loc[(mcrsp['me'] <= mcrsp['q30']) & (mcrsp['me'] > mcrsp['q20']), 'size'] = 3
mcrsp.loc[(mcrsp['me'] <= mcrsp['q40']) & (mcrsp['me'] > mcrsp['q30']), 'size'] = 4
mcrsp.loc[(mcrsp['me'] <= mcrsp['q50']) & (mcrsp['me'] > mcrsp['q40']), 'size'] = 5
mcrsp.loc[(mcrsp['me'] <= mcrsp['q60']) & (mcrsp['me'] > mcrsp['q50']), 'size'] = 6
mcrsp.loc[(mcrsp['me'] <= mcrsp['q70']) & (mcrsp['me'] > mcrsp['q60']), 'size'] = 7
mcrsp.loc[(mcrsp['me'] <= mcrsp['q80']) & (mcrsp['me'] > mcrsp['q70']), 'size'] = 8
mcrsp.loc[(mcrsp['me'] <= mcrsp['q90']) & (mcrsp['me'] > mcrsp['q80']), 'size'] = 9

# drop column not using in future calculations
mcrsp = mcrsp.drop(columns=['q10', 'q20', 'q30', 'q40', 'q50', 'q60', 'q70', 'q80', 'q90'])

In [7]:
# calculate cumulative returns for entire time period
mcrsp['ret_1'] = mcrsp['ret'] + 1
mcrsp['cumret'] = mcrsp.groupby('permno').ret_1.cumprod()

# sort dataframe for further calculation
mcrsp = mcrsp.sort_values(by=['permno','date']).reset_index(drop=True).copy() 

# construct period variable to record how many times return has been cumulated
mcrsp['period'] = 1
mcrsp['period'] = mcrsp.groupby('permno').period.cumsum()

# calculate return to build momentum deciles
mcrsp['ret_rank'] = np.NaN
mcrsp = mcrsp.sort_values(by=['permno','date']).reset_index(drop=True).copy() 

# for dates return cuulated less than or equal to 6 days directly assign return by minus 1 
for i in range(len(mcrsp['period'])):
    if mcrsp['period'][i] <= 6:
        mcrsp['ret_rank'][i] = mcrsp['cumret'][i] - 1

# for other days that have more than 6 times cumulated
# assign the return that has been devided by the cuulative return for 6 periods before and minus 1
mcrsp.loc[mcrsp['period'] > 6, 'ret_rank'] = mcrsp.loc[mcrsp['period'] > 6, 'cumret'] / mcrsp.groupby('permno')['cumret'].shift(6) - 1

# only keep data with rank
mcrsp = mcrsp[mcrsp['ret_rank'].notna()].copy()


In [8]:
# store the cut quantiles for each day
cut_quantiles = []

# store quantile cuts and merge to dataframe on date
for i in t_list:
    month_1 = mcrsp[mcrsp['date'] == i]
    cut_quantiles.append({'date': i, 'q30': month_1['ret_rank'].quantile(0.3), 'q70': month_1['ret_rank'].quantile(0.7)})

mcrsp = mcrsp.merge(pd.DataFrame(cut_quantiles), on='date', how='left')
mcrsp = mcrsp[(mcrsp['q30'].notna()) & (mcrsp['q70'].notna())].copy()

# assume all stocks are in second decile
mcrsp['mom'] = 2
mcrsp.loc[mcrsp['ret_rank'] <= mcrsp['q30'], 'mom'] = 1
mcrsp.loc[mcrsp['ret_rank'] >= mcrsp['q70'], 'mom'] = 3

# shift the signal by 1 month avoid using current data
mcrsp['mom'] = mcrsp['mom'].shift(1)
mcrsp = mcrsp[mcrsp['mom'].notna()].copy()

In [9]:
# calculate the return of holding 6 periods
mcrsp['holding_6'] = mcrsp.groupby('permno')['ret_rank'].shift(-5)/6
mcrsp = (mcrsp[mcrsp['holding_6'].notna()]).copy()

In [10]:
# generate the table as it is shown in the lecture note
result = mcrsp[['mom','holding_6','size']].groupby(['mom','size'])\
                        .agg({'holding_6': 'mean'}).reset_index().pivot(index='mom', columns='size', values='holding_6')
result['allstock'] = mcrsp[['mom','holding_6']].groupby(['mom']).agg({'holding_6': 'mean'}).reset_index().iloc[:,1].values

# Long short portfolio results
result.loc['strategy'] = result.iloc[2,:] - result.iloc[0,:]
result

size,1,2,3,4,5,6,7,8,9,10,allstock
mom,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1.0,0.071999,0.01459,0.007715,0.008187,0.007937,0.009384,0.009697,0.010923,0.010196,0.009047,0.010322
2.0,0.059595,0.013691,0.011784,0.012493,0.01172,0.012072,0.011739,0.011668,0.011706,0.010934,0.012054
3.0,0.013731,0.013692,0.015575,0.016732,0.015462,0.013596,0.013238,0.013026,0.01394,0.013347,0.014588
strategy,-0.058267,-0.000898,0.007859,0.008545,0.007525,0.004212,0.003541,0.002104,0.003744,0.0043,0.004266
