In [1]:
import pandas as pd
import numpy as np
import wrds
import statsmodels.api as sm

# Download CRSP stock information
conn = wrds.Connection()
crsp = conn.raw_sql("""
                      select a.permno, 
                      a.permco,
                      a.date, 
                      b.shrcd, 
                      b.exchcd, 
                      a.ret, 
                      a.vol, 
                      a.shrout, 
                      a.prc
                      from crsp.msf as a
                      left join crsp.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where a.date between '01/01/1965' and '12/31/1989'
                      and b.exchcd between 1 and 3
                      and b.shrcd between 10 and 11
                      """)

Enter your WRDS username [rory]:rorysrose
Enter your password:········
WRDS recommends setting up a .pgpass file.
Create .pgpass file now [y/n]?: y
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [2]:
df=crsp
df['date']=pd.to_datetime(df['date'], format='%Y-%m-%d')
df['prc']=df['prc'].apply(lambda x: abs(x))
df['year-month']=df['date'].apply(lambda x:str(x.year)+'-'+str(x.month))
df.sort_values(by=['permno', 'date'], ascending=True, inplace=True)
df=df.reset_index(drop=True)
df['logret'] = np.log(1+df['ret'])

In [3]:
lagged=6 # J-month lagged returns
holdingperiod=6 # K-month holding returns
skipmonth=1 # skip one month between J-month lagged returns and K-month holding returns

df['momentum_{}_logreturn'.format(lagged)] =\
np.exp(df.groupby('permno')['logret'].rolling(lagged).sum().reset_index(drop=True))-1
df['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)] =\
np.exp(df.groupby('permno')['logret'].rolling(
    holdingperiod).sum().reset_index(drop=True).shift(
    -holdingperiod-skipmonth))-1
df_=df.dropna().reset_index(drop=True)

In [4]:
# remove some extreme values
percentile=0.01
bottom_extreme_value=df_['momentum_6_logreturn'].quantile(percentile)
top_extreme_value=df_['momentum_6_logreturn'].quantile(1-percentile)
df_=\
df_[(df_['momentum_6_logreturn'] >= bottom_extreme_value)&\
    (df_['momentum_6_logreturn'] <= top_extreme_value)].reset_index(drop=True)

num_percentile=10
df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,\
                                              1,\
                                              num_percentile)]=\
df_.groupby('date')['momentum_{}_logreturn'.format(lagged)].transform(
    lambda x: pd.qcut(x.rank(),
                      num_percentile,
                      labels=False))+1
_df_=\
df_.groupby(['date',
             'momentum_{}_logreturn_rank_{}_{}'.format(lagged, 1, num_percentile)]
           )['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)].mean().reset_index(drop=False)

In [7]:
list_portfolio_rank=[1,2,3,4,5,6,7,8,9,10,"10-1"]
for i in list_portfolio_rank[0:]:
    
    if i=="10-1":
        
        data_low=_df_[_df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,
                                                                     1,
                                                                     num_percentile)
                          ]==1].reset_index(drop=True)
        data_high=_df_[_df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,
                                                                      1,
                                                                      num_percentile)
                           ]==10].reset_index(drop=True)
        
        result_return=\
        data_high['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]-\
        data_low['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]
            
        reg_mean=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).params.iloc[0]

        reg_t_stat=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).tvalues.iloc[0]
        
        print("portfolio: {}, mean: {}, t stat: {}".format(i, reg_mean, reg_t_stat))
        
    else:
        
        data=_df_[_df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,
                                                                 1,
                                                                 num_percentile)
                      ]==i].reset_index(drop=True)

        result_return=\
        data['holding_{}_skip_{}_logreturn'.format(holdingperiod,
                                                   skipmonth)]

        reg_mean=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).params.iloc[0]

        reg_t_stat=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).tvalues.iloc[0]
        
        print("portfolio: {}, mean: {}, t stat: {}".format(i, reg_mean, reg_t_stat))
        
print("")
print("Comparing to Jegadeesh and Titman (JF, 1993): ")
print("")
print("Jegadeesh and Titman (1993) with J=6 and K=6 in Panel B of Table I: Sell = {} (mean) with {} (t stat)".format(0.0068, 1.35))
print("My results with portfolio: {} (Sell) is {} (mean) with {} (t stat)".format(1, 0.0052, 1.16))
print("")
print("Jegadeesh and Titman (1993) with J=6 and K=6 in Panel B of Table I: Buy = {} (mean) with {} (t stat)".format(0.0178, 4.41))
print("My results with portfolio: {} (Buy) is {} (mean) with {} (t stat)".format(1, 0.0171, 4.10))
print("")
print("Jegadeesh and Titman (1993) with J=6 and K=6 in Panel B of Table I: Buy and Sell = {} (mean) with {} (t stat)".format(0.0114, 3.37))
print("My results with portfolio: {} (Buy and Sell) is {} (mean) with {} (t stat)".format("10-1", 0.0118, 5.21))

portfolio: 1, mean: 0.005280307415913469, t stat: 1.1554147222895323
portfolio: 2, mean: 0.009018936095079151, t stat: 2.1774365367564523
portfolio: 3, mean: 0.010980965833952451, t stat: 2.855101649891131
portfolio: 4, mean: 0.012537399069458048, t stat: 3.5109670748942934
portfolio: 5, mean: 0.012107383123772774, t stat: 3.5027529440177267
portfolio: 6, mean: 0.013437730183290324, t stat: 4.104036607978746
portfolio: 7, mean: 0.01381333620323369, t stat: 4.310613561963477
portfolio: 8, mean: 0.014547443275394815, t stat: 4.398689845645834
portfolio: 9, mean: 0.015244790705183822, t stat: 4.219732520903058
portfolio: 10, mean: 0.01715545863671897, t stat: 4.102252934959423
portfolio: 10-1, mean: 0.011875151220805506, t stat: 5.21109295485174

Comparing to Jegadeesh and Titman (JF, 1993): 

Jegadeesh and Titman (1993) with J=6 and K=6 in Panel B of Table I: Sell = 0.0068 (mean) with 1.35 (t stat)
My results with portfolio: 1 (Sell) is 0.0052 (mean) with 1.16 (t stat)

Jegadeesh and Tit