### Extend from '01/01/1965' to '12/29/2023'

In [8]:
import pandas as pd
import numpy as np
import wrds
import statsmodels.api as sm
import getFamaFrenchFactors as gff

# Download CRSP stock information
conn = wrds.Connection()
crsp = conn.raw_sql("""
                      select a.permno, 
                      a.permco,
                      a.date, 
                      b.shrcd, 
                      b.exchcd, 
                      a.ret, 
                      a.vol, 
                      a.shrout, 
                      a.prc
                      from crsp.msf as a
                      left join crsp.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where a.date between '01/01/1965' and '12/29/2023'
                      and b.exchcd between 1 and 3
                      and b.shrcd between 10 and 11
                      """)

# Download Fama and French 3 Factors
ff3 = gff.famaFrench3Factor(frequency='m')
ff3['date']=pd.to_datetime(ff3['date_ff_factors'], format='%Y-%m-%d')
ff3['year-month']=ff3['date'].apply(lambda x:str(x.year)+'-'+str(x.month))

Enter your WRDS username [rory]:rorysrose
Enter your password:········
WRDS recommends setting up a .pgpass file.
Create .pgpass file now [y/n]?: y
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [9]:
df=crsp
df['date']=pd.to_datetime(df['date'], format='%Y-%m-%d')
df['prc']=df['prc'].apply(lambda x: abs(x))
df['year-month']=df['date'].apply(lambda x:str(x.year)+'-'+str(x.month))
df.sort_values(by=['permno', 'date'], ascending=True, inplace=True)
df=df.reset_index(drop=True)
df['logret'] = np.log(1+df['ret'])

In [10]:
lagged=6 # J-month lagged returns
holdingperiod=6 # K-month holding returns
skipmonth=1

df['momentum_{}_logreturn'.format(lagged)] =\
np.exp(df.groupby('permno')['logret'].rolling(lagged).sum().reset_index(drop=True))-1
df['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)] =\
np.exp(df.groupby('permno')['logret'].rolling(
    holdingperiod).sum().reset_index(drop=True).shift(
    -holdingperiod-skipmonth))-1
df_=df.dropna().reset_index(drop=True)

In [11]:
# remove some extreme values
percentile=0.01
bottom_extreme_value=df_['momentum_6_logreturn'].quantile(percentile)
top_extreme_value=df_['momentum_6_logreturn'].quantile(1-percentile)
df_=\
df_[(df_['momentum_6_logreturn'] >= bottom_extreme_value)&\
    (df_['momentum_6_logreturn'] <= top_extreme_value)].reset_index(drop=True)

num_percentile=10
df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,\
                                              1,\
                                              num_percentile)]=\
df_.groupby('date')['momentum_{}_logreturn'.format(lagged)].transform(
    lambda x: pd.qcut(x.rank(),
                      num_percentile,
                      labels=False))+1

In [12]:
_df_=\
df_.groupby(['date',
             'momentum_{}_logreturn_rank_{}_{}'.format(lagged, 1, num_percentile)]
           )['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)].mean().reset_index(drop=False)

In [15]:
_df_

Unnamed: 0,date,momentum_6_logreturn_rank_1_10,holding_6_skip_1_logreturn
0,1965-06-30,1,0.364855
1,1965-06-30,2,0.290408
2,1965-06-30,3,0.275600
3,1965-06-30,4,0.261634
4,1965-06-30,5,0.216780
...,...,...,...
7025,2023-12-29,6,-0.008247
7026,2023-12-29,7,-0.002059
7027,2023-12-29,8,0.001576
7028,2023-12-29,9,0.006630


### Extend Q1 a to the most recent data ('01/01/1965' to '2/29/2024').

In [7]:
list_portfolio_rank=[1,2,3,4,5,6,7,8,9,10,"10-1"]
for i in list_portfolio_rank[0:]:
    
    if i=="10-1":
        
        data_low=_df_[_df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,
                                                                     1,
                                                                     num_percentile)
                          ]==1].reset_index(drop=True)
        data_high=_df_[_df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,
                                                                      1,
                                                                      num_percentile)
                           ]==10].reset_index(drop=True)
        
        result_return=\
        data_high['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]-\
        data_low['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]
            
        reg_mean=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).params.iloc[0]

        reg_t_stat=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).tvalues.iloc[0]
        
        print("portfolio: {}, mean: {}, t stat: {}".format(i, reg_mean, reg_t_stat))
        
    else:
        
        data=_df_[_df_['momentum_{}_logreturn_rank_{}_{}'.format(lagged,
                                                                 1,
                                                                 num_percentile)
                      ]==i].reset_index(drop=True)

        result_return=\
        data['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]

        reg_mean=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).params.iloc[0]

        reg_t_stat=\
        sm.OLS(result_return/holdingperiod,
               [1 for i in range(result_return.shape[0])]).fit(cov_type='HAC',
                                                               cov_kwds={'maxlags':lagged},
                                                               use_t=True).tvalues.iloc[0]
        
        print("portfolio: {}, mean: {}, t stat: {}".format(i, reg_mean, reg_t_stat))
        
print("")
print("The results still hold although means decrease a little bit.")

portfolio: 1, mean: 0.006260296200799513, t stat: 1.861330613713324
portfolio: 2, mean: 0.007927318729602987, t stat: 2.940434403010609
portfolio: 3, mean: 0.009704437027607931, t stat: 4.157250852766279
portfolio: 4, mean: 0.011189090620285827, t stat: 5.1765532109615116
portfolio: 5, mean: 0.011506218474050603, t stat: 5.760647791224003
portfolio: 6, mean: 0.011853585452971835, t stat: 6.194696670160636
portfolio: 7, mean: 0.012361356038984063, t stat: 6.530216871973713
portfolio: 8, mean: 0.013001653988327717, t stat: 6.737390034804002
portfolio: 9, mean: 0.014023789252845737, t stat: 6.557791679340448
portfolio: 10, mean: 0.015598188728516146, t stat: 5.932331760379424
portfolio: 10-1, mean: 0.009337892527716633, t stat: 5.381634216219918

The results still hold.


### Merge with Fama French 3 Factors

In [27]:
_df_=\
df_.groupby(['date',
             'momentum_{}_logreturn_rank_{}_{}'.format(lagged, 1, num_percentile)]
           )['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)].mean().reset_index(drop=False)
_df_['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]=\
_df_['holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)]/6 # monthly return

_df_portfolio=\
_df_.pivot(index='date',
           columns='momentum_{}_logreturn_rank_{}_{}'.format(lagged, 1, num_percentile),
           values='holding_{}_skip_{}_logreturn'.format(holdingperiod, skipmonth)).reset_index(drop=False)
_df_portfolio['year-month']=\
_df_portfolio['date'].apply(lambda x: str(x.year)+'-'+str(x.month))

table = pd.merge(_df_portfolio,
                 ff3.drop(columns = ['date', 'date_ff_factors']),
                 how='left', on=['year-month'])
table['10-1']=table[10]-table[1]

### CAPM

In [30]:
list_portfolio_rank=[1,2,3,4,5,6,7,8,9,10,"10-1"]

for i in list_portfolio_rank[0:]:
    
    table['avg_return_{}'.format(i)] = table[i] - table["RF"]
    X = table[["Mkt-RF"]].values
    X = sm.add_constant(X)
    y = table['avg_return_{}'.format(i)].values
    
    reg_mean=\
    sm.OLS(y,X).fit(cov_type='HAC',
                    cov_kwds={'maxlags': lagged},
                    use_t=True).params[0]

    reg_t_stat=\
    sm.OLS(y,X).fit(cov_type='HAC',
                    cov_kwds={'maxlags': lagged},
                    use_t=True).tvalues[0]
    
    print("portfolio: {}, alpha: {}, t stat: {}".format(i, reg_mean, reg_t_stat))

print("")
print("The results remain valid, despite a slight decrease in means.")

portfolio: 1, alpha: 0.002784888890821373, t stat: 0.8108563866620047
portfolio: 2, alpha: 0.004419000042484615, t stat: 1.5978072842545064
portfolio: 3, alpha: 0.006168118953962684, t stat: 2.5822162149364587
portfolio: 4, alpha: 0.0076494412460688145, t stat: 3.4600817873015144
portfolio: 5, alpha: 0.007887533561775175, t stat: 3.8846301916164947
portfolio: 6, alpha: 0.008245981413033381, t stat: 4.2530942220532495
portfolio: 7, alpha: 0.008752959214989298, t stat: 4.57645980429064
portfolio: 8, alpha: 0.009353110542539018, t stat: 4.8062775215795215
portfolio: 9, alpha: 0.010414084075443617, t stat: 4.822283430745048
portfolio: 10, alpha: 0.011932435654748016, t stat: 4.516764653387988
portfolio: 10-1, alpha: 0.005469366071958881, t stat: 3.130572188753375

The results remain valid, despite a slight decrease in means.


### Fama-French three-factor alpha

In [31]:
list_portfolio_rank=[1,2,3,4,5,6,7,8,9,10,"10-1"]

for i in list_portfolio_rank[0:]:
    
    table['avg_return_{}'.format(i)] = table[i] - table["RF"]
    X = table[["Mkt-RF", "SMB", "HML"]].values
    X = sm.add_constant(X)
    y = table['avg_return_{}'.format(i)].values
    
    reg_mean=\
    sm.OLS(y,X).fit(cov_type='HAC',
                    cov_kwds={'maxlags': lagged},
                    use_t=True).params[0]

    reg_t_stat=\
    sm.OLS(y,X).fit(cov_type='HAC',
                    cov_kwds={'maxlags': lagged},
                    use_t=True).tvalues[0]
    
    print("portfolio: {}, alpha: {}, t stat: {}".format(i, reg_mean, reg_t_stat))

print("")
print("The results remain valid, despite a slight decrease in means.")

portfolio: 1, alpha: 0.003532866014252679, t stat: 1.0137807877803233
portfolio: 2, alpha: 0.0050453190495915986, t stat: 1.7930656890797347
portfolio: 3, alpha: 0.0065685385012084286, t stat: 2.6952742903434452
portfolio: 4, alpha: 0.00805575789284619, t stat: 3.5651831042339466
portfolio: 5, alpha: 0.008141340464652308, t stat: 3.9463326650566777
portfolio: 6, alpha: 0.008473980913785987, t stat: 4.305085120408617
portfolio: 7, alpha: 0.008940806489359283, t stat: 4.61155582046597
portfolio: 8, alpha: 0.009512445606051762, t stat: 4.847198757389187
portfolio: 9, alpha: 0.01060098708996266, t stat: 4.886264130150616
portfolio: 10, alpha: 0.01211566740215471, t stat: 4.537334823063074
portfolio: 10-1, alpha: 0.00492099346840327, t stat: 2.7537237216500867

The results remain valid, despite a slight decrease in means.
