In [45]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import warnings
warnings.filterwarnings("ignore")
np.set_printoptions(suppress=True)

def get_ret_year(mf_rets, st, ed):
    mf_rets_year = mf_rets[(mf_rets['date']<=ed) & (mf_rets['date']>=st)]
    date_ls = sorted(list(set(mf_rets_year['date'])))
    mf_rets_dicts = mf_rets_year.to_dict('records')
    rets_year_dict = {}
    for dict_i in mf_rets_dicts:
        crsp_fundno = dict_i['crsp_fundno']
        date = dict_i['date']
        ret = dict_i['mret']
        if crsp_fundno not in rets_year_dict:
            rets_year_dict[crsp_fundno] = {}
        rets_year_dict[crsp_fundno][date] = ret
    
    fundno_rets = {}
    NAN = float('nan')
    for fund_i in rets_year_dict:
        dict_i = rets_year_dict[fund_i]
        rest_ls = [NAN for i in range(len(date_ls))]
        for dete_i in dict_i:
            ind = date_ls.index(dete_i)
            rest_ls[ind] = dict_i[dete_i]
        fundno_rets[fund_i] = rest_ls
    fundno_rets_year1 = pd.DataFrame.from_dict(fundno_rets)
    return fundno_rets_year1


def get_qu_ls(quts, fund_retavg):
    ls = [[], [], [], [], [], [], [], [], [], []]
    for id_i in fund_retavg:
        avg = fund_retavg[id_i]
        for i in range(10):
            if (avg>=quts[i]) & (avg<quts[i+1]):
                ls[i].append(int(id_i))
    return ls
        
def get_group_ls(fundno_rets_year1):
    fundno_rets_year1 = fundno_rets_year1.dropna(axis=1, how='any')
    fund_retavg = fundno_rets_year1.mean().to_dict()
    ret_avgls = [fund_retavg[i] for i in fund_retavg]
    # from low to high:
    quts = list(np.percentile(ret_avgls, [(i+1)*10 for i in range(9)]))
    quts.insert(0, -8888)
    quts.append(8888)
    portfolios_fundids = get_qu_ls(quts, fund_retavg)
    return portfolios_fundids

def get_returns(ret_i):
    ret_array = ret_i.to_numpy()
    rest1 = ret_array[0, :]
    inval_indx0 = np.isnan(rest1)
    val_indx0 = 1- np.isnan(rest1)
    num_vals = np.sum(val_indx0)  
    ws = np.array([1/num_vals for i in range(rest1.shape[0])])
    ws[inval_indx0] = 0
    rets = []
    for i in range(12):
        rest1 = ret_array[i, :]
        rest11 = ret_array[i, :]
        rest11[np.isnan(rest11)] = 0
        ret_f1 = np.matrix(rest11)*np.matrix(ws).T
        ret_f1 = ret_f1[0, 0]
        rets.append(ret_f1)
        if i <= 10:
            rest2 = ret_array[i+1, :]
            indx_inval  = np.isnan(rest2)
            indx_val = 1 - indx_inval
            asets_vals = (np.array(rest11)+1)*np.array(ws)
            asets_vals = asets_vals + np.sum(asets_vals[indx_val])/np.sum(indx_val)
            asets_vals[indx_inval] = 0
            ws = asets_vals/np.sum(asets_vals)
            ws = ws.tolist()
    return rets


def get_pf_rets(pf_ids, fundno_rets_year2):
    ret_ls = []
    for pf_i in pf_ids:
        ret_i = fundno_rets_year2[pf_i]
        rets_pf_i = get_returns(ret_i)
        ret_ls.append(rets_pf_i)
    rets = np.array(ret_ls).T
    return rets


def get_all_rets(mf_rets):
    months  = []
    cols = ['p{}'.format(i+1) for i in range(10)]
    NANs = [float('nan') for i in range(12)]
    for i in tqdm(range(55)):
        st = int(str(1964+i)+'01')
        ed = int(str(1964+i)+'12')
        fundno_rets_year1 = get_ret_year(mf_rets, st, ed)
        pf_group_ls = get_group_ls(fundno_rets_year1)
        st1 = int(str(1964+i+1)+'01')
        ed1 = int(str(1964+i+1)+'12')
        fundno_rets_year2 = get_ret_year(mf_rets, st1, ed1)
        new_cols = set(fundno_rets_year1.columns) - set(fundno_rets_year2.columns)
        for col_i in new_cols:
            fundno_rets_year2[col_i] = NANs
        rets_i = get_pf_rets(pf_group_ls, fundno_rets_year2)
        if i==0:
            rets = rets_i
        else:
            rets = np.vstack((rets, rets_i))
        mon_i = [int(str(1964+i+1)+str(j+1).zfill(2)) for j in range(12)]
        months += mon_i
    df_pf_rets = pd.DataFrame(rets, columns=cols)
    df_pf_rets['date'] = months
    return df_pf_rets

# 1 process data

In [88]:
mf_rets = pd.read_csv('./original_data/MFrets_1962to2012.csv')
mf_rets = mf_rets.dropna(axis=0, how='any')
years = list(mf_rets['year'])
months = list(mf_rets['month'].astype(str))
dates = []
for i in range(len(years)):
    date_i = str(years[i]) + str(months[i]).zfill(2)
    dates.append(date_i)
mf_rets['date'] = dates
mf_rets['mret'] = mf_rets['mret']*100
mf_rets = mf_rets.drop(['year', 'month'], axis=1)
mf_rets.to_csv('./MF_rets.csv')

In [61]:
mom = pd.read_csv('./original_data/Mom.csv')
df3 = pd.read_csv('./original_data/FF3.csv')
df3 = df3[df3['Unnamed: 0']>=192701]
df3 = df3.reset_index()
df3['Mom'] = mom['Mom']
dates1 = sorted(list(set(dates)))
df3['date'] = df3['Unnamed: 0']
df3 = df3.drop(['Unnamed: 0', 'index'], axis=1)
df3.to_csv('FF4.csv')

# 2 regression:

$$
\begin{gathered}
r_{i t}=\alpha_{i T}+\beta_{i T} \mathrm{VWRF}_{t}+e_{i t} \quad t=1,2, \cdots, T \\
r_{i t}=\alpha_{i T}+b_{i T} \mathrm{RMRF}_{t}+s_{i T} \mathrm{SMB}_{t}+h_{i T} \mathrm{HML}_{t}+e_{i t} \quad t=1,2, \cdots, T \\
r_{i t}=\alpha_{i T}+b_{i T} \mathrm{RMRF}_{t}+s_{i T} \mathrm{SMB}_{t}+h_{i T} \mathrm{HML}_{t}+p_{i T} \mathrm{PR} 1 \mathrm{YR}_{t}+e_{i t} \\
t=1,2, \cdots, T
\end{gathered}
$$

## 2.1 variable meaning:

- $r_{i t}$ is the return on a portfolio in excess of the one-month T-bill return;
- VWRF is the excess return on the CRSP value-weighted portfolio of all NYSE;
- RMRF is the excess return on a value-weighted aggregate market proxy;
- SMB, HML, PR1YR are returns on valueweighted, zero-investment, factor-mimicking portfolios for size, book-to-market equity, and one-year momentum in stock returns.


## 2.2 portfolio  construction:

On January 1 of each year, I form ten equal-weighted portfolios of mutual funds, using reported returns.  I hold the portfolios for one year, then reform them. This yields a time series of monthly returns on each decile portfolio from 1963 to 1993. Funds that disappear during the course of the year are included in the equal-weighted average until they disappear, then the portfolio weights are readjusted appropriately.

Mutual funds are sorted on January 1 each year from 1963 to 1993 into decile portfolios based on their previous calendar year's return. The portfolios are equally weighted monthly so the weights are readjusted whenever a fund disappears. Funds with the highest past one-year return comprise decile 1 and funds with the lowest comprise decile 10. Deciles 1 and 10 are further subdivided into thirds on the same measure. VWRF is the excess return on the CRSP value-weight market proxy.

RMRF, SMB, and HML are Fama and French's (1993) market proxy and factor-mimicking portfolios for size and book-to-market equity. PR1YR is a factor-mimicking portfolio for one-year return momentum. Alpha is the intercept of the Model. The t-statistics are in parentheses.

In [2]:
ff4_df = pd.read_csv('./FF4.csv')
ff4_df = ff4_df.drop('Unnamed: 0', axis=1)
mf_rets = pd.read_csv('./MF_rets.csv')
mf_rets = mf_rets.drop('Unnamed: 0', axis=1)

In [36]:
df_pf_rets = get_all_rets(mf_rets)

100%|███████████████████████████████████████████| 55/55 [00:32<00:00,  1.71it/s]


In [38]:
df_pf_rets = df_pf_rets.dropna(axis=0, how='any')

In [39]:
df_pf_rets = df_pf_rets[df_pf_rets['date']<201901]

In [40]:
df_pf_rets['ls'] = df_pf_rets['p10'] - df_pf_rets['p1']
ff4_df1 = ff4_df[(ff4_df['date']>=196501) & (ff4_df['date']<=201812)]
ff4_df1 = ff4_df1.drop('date',axis=1)
ff4_df1 = ff4_df1.reset_index()

In [41]:
df_pf_rets[ff4_df1.columns[1:]] = ff4_df1[ff4_df1.columns[1:]]
df_pf_rets1 = df_pf_rets.copy()
cls = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p8', 'p9', 'p10', 'ls']
for cls_i in cls:
    df_pf_rets1[cls_i] = df_pf_rets1[cls_i] - df_pf_rets1['RF']
df_pf_rets1['MK'] = df_pf_rets1['Mkt-RF']
df_pf_rets2 = df_pf_rets1[df_pf_rets1['date']<=199312]
df_pf_rets3 = df_pf_rets1[(df_pf_rets1['date']>199312) & (df_pf_rets1['date']<=201812)]

In [66]:
df_pf_rets1.head(10)

Unnamed: 0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,date,ls,Mkt-RF,SMB,HML,RF,Mom,MK
0,5.447729,4.463886,4.15865,5.497686,5.861933,4.229543,3.667814,3.084767,4.634757,4.874929,196501,-0.8528,3.54,2.7,0.12,0.28,-1.44,3.54
1,1.711232,1.557359,0.198353,2.833095,2.091935,2.249812,1.656438,2.874664,2.042772,1.928702,196502,-0.08253,0.44,3.53,0.11,0.3,0.32,0.44
2,-0.128695,-0.868995,-1.321821,-1.135703,-0.931022,-1.724669,-1.059863,-1.360703,-0.959594,0.15738,196503,-0.073925,-1.34,1.88,1.03,0.36,0.15,-1.34
3,3.002791,4.323376,3.102605,6.14903,-6.057863,15.127356,3.184667,-241.872439,4.464557,3.633018,196504,0.320227,3.11,1.15,0.66,0.31,2.63,3.11
4,-2.45539,-2.380424,-3.206664,-1.419176,-1.383699,-1.042943,-0.651861,-0.388768,-1.394899,-0.968262,196505,1.177128,-0.77,0.13,-1.61,0.31,0.6,-0.77
5,-8.38685,0.80066,-6.933109,-11.063352,-6.53851,-7.348622,-6.650678,-13.73232,-8.330364,-12.470488,196506,-4.433638,-5.51,-4.35,0.59,0.35,-3.13,-5.51
6,1.367461,7.381185,1.905741,5.053661,1.92548,2.070418,2.343347,5.858602,3.578242,1.545043,196507,-0.132418,1.43,0.87,2.14,0.31,4.12,1.43
7,7.299883,5.919211,3.999344,6.192048,4.620389,4.765741,3.711014,9.115251,4.542616,5.285605,196508,-2.344278,2.73,2.83,-1.02,0.33,2.58,2.73
8,3.94361,6.186589,4.926426,4.472367,3.762275,3.217879,2.662542,6.692929,3.502343,3.764578,196509,-0.489032,2.86,0.64,-0.12,0.31,3.3,2.86
9,5.636084,5.657979,3.700259,5.994869,4.07919,4.274574,3.148745,8.785883,4.196062,3.40018,196510,-2.545904,2.6,2.53,1.57,0.31,3.46,2.6


In [42]:
df_pf_rets1.to_csv('pf_rets1.csv')
df_pf_rets2.to_csv('pf_rets2.csv')
df_pf_rets3.to_csv('pf_rets3.csv')

In [102]:
cls = ['p10', 'p9', 'p8', 'p7', 'p6', 'p5', 'p4', 'p3', 'p2', 'p1', 'ls']

In [108]:
i = 0
for i in range(11):
    pi = cls[i]
    mean = round(df_pf_rets2[pi].mean(), 2)
    std = round(df_pf_rets2[pi].std(), 2)

    Y = df_pf_rets2[pi]
    X = df_pf_rets2['MK']
    X = sm.add_constant(X)
    mod_ff = sm.OLS(Y, X)
    ols_ff = mod_ff.fit()

    tvs = round(ols_ff.tvalues, 2)
    t1 = round(tvs[0], 2)
    t2 = round(tvs[1], 2)
    coefs = ols_ff.params
    coef1 = round(coefs[0], 2)
    coef2 = round(coefs[1], 2)

    AdjR2 = round(ols_ff.rsquared_adj, 2)

    X1 = df_pf_rets2[['MK', 'SMB', 'HML', 'Mom']]
    X1 = sm.add_constant(X1)
    mod_ff1 = sm.OLS(Y, X1)
    ols_ff1 = mod_ff1.fit()
    tvs1 = ols_ff1.tvalues
    t3 = round(tvs1[0], 2)
    t4 = round(tvs1[1], 2)
    t5 = round(tvs1[2], 2)
    t6 = round(tvs1[3], 2)
    t7 = round(tvs1[4], 2)

    coefs1 = ols_ff1.params
    coef11 = round(coefs1[0], 2)
    coef12 = round(coefs1[1], 2)
    coef13 = round(coefs1[2], 2)
    coef14 = round(coefs1[3], 2)
    coef15 = round(coefs1[4], 2)

    AdjR21 = round(ols_ff1.rsquared_adj, 2)
    line1 = '{}&{}&{}&{}&{}&{}&{}&{}&{}&{}&{}&{}\\\\'.format(i+1, mean, std, coef1, coef2, AdjR2, coef11, coef12, coef13, coef14, coef15, AdjR21)
    line2 = '&&&({})&({})&&({})&({})&({})&({})&({})&\\\\'.format(t1, t2, t3, t4, t5, t6, t7)
    print(line1)
    print(line2)

1&0.99&36.28&0.59&1.04&0.01&0.65&0.62&0.99&-0.89&0.21&0.01\\
&&&(0.3)&(2.46)&&(0.31)&(1.26)&(1.38)&(-1.11)&(0.37)&\\
2&0.63&11.15&0.15&1.22&0.25&0.07&1.13&0.18&-0.22&0.18&0.25\\
&&&(0.29)&(10.76)&&(0.12)&(8.64)&(0.95)&(-1.04)&(1.21)&\\
3&-0.25&14.29&-0.65&1.02&0.1&-0.64&0.81&0.51&-0.42&0.11&0.11\\
&&&(-0.89)&(6.4)&&(-0.82)&(4.43)&(1.9)&(-1.39)&(0.51)&\\
4&0.73&8.23&0.33&1.03&0.32&0.12&0.85&0.46&-0.28&0.3&0.36\\
&&&(0.91)&(12.89)&&(0.31)&(9.59)&(3.52)&(-1.9)&(2.89)&\\
5&0.16&8.16&-0.27&1.1&0.38&-0.36&0.97&0.41&-0.13&0.08&0.39\\
&&&(-0.78)&(14.5)&&(-1.0)&(11.31)&(3.2)&(-0.91)&(0.83)&\\
6&3.4&36.23&2.97&1.12&0.02&3.8&0.71&0.55&-1.33&-0.3&0.02\\
&&&(1.53)&(2.65)&&(1.84)&(1.47)&(0.77)&(-1.65)&(-0.53)&\\
7&-0.06&10.42&-0.46&1.05&0.21&-0.27&0.92&0.4&-0.17&-0.23&0.22\\
&&&(-0.93)&(9.6)&&(-0.51)&(7.39)&(2.21)&(-0.81)&(-1.56)&\\
8&0.12&6.98&-0.33&1.17&0.58&-0.28&1.03&0.38&-0.24&-0.02&0.61\\
&&&(-1.37)&(22.12)&&(-1.1)&(17.45)&(4.4)&(-2.45)&(-0.27)&\\
9&0.28&6.62&-0.16&1.13&0.61&0.02&1.0&0.33&-0.2

In [109]:
i = 0
for i in range(11):
    pi = cls[i]
    mean = round(df_pf_rets3[pi].mean(), 2)
    std = round(df_pf_rets3[pi].std(), 2)

    Y = df_pf_rets3[pi]
    X = df_pf_rets3['MK']
    X = sm.add_constant(X)
    mod_ff = sm.OLS(Y, X)
    ols_ff = mod_ff.fit()

    tvs = round(ols_ff.tvalues, 2)
    t1 = round(tvs[0], 2)
    t2 = round(tvs[1], 2)
    coefs = ols_ff.params
    coef1 = round(coefs[0], 2)
    coef2 = round(coefs[1], 2)

    AdjR2 = round(ols_ff.rsquared_adj, 2)

    X1 = df_pf_rets3[['MK', 'SMB', 'HML', 'Mom']]
    X1 = sm.add_constant(X1)
    mod_ff1 = sm.OLS(Y, X1)
    ols_ff1 = mod_ff1.fit()
    tvs1 = ols_ff1.tvalues
    t3 = round(tvs1[0], 2)
    t4 = round(tvs1[1], 2)
    t5 = round(tvs1[2], 2)
    t6 = round(tvs1[3], 2)
    t7 = round(tvs1[4], 2)

    coefs1 = ols_ff1.params
    coef11 = round(coefs1[0], 2)
    coef12 = round(coefs1[1], 2)
    coef13 = round(coefs1[2], 2)
    coef14 = round(coefs1[3], 2)
    coef15 = round(coefs1[4], 2)

    AdjR21 = round(ols_ff1.rsquared_adj, 2)
    line1 = '{}&{}&{}&{}&{}&{}&{}&{}&{}&{}&{}&{}\\\\'.format(i+1, mean, std, coef1, coef2, AdjR2, coef11, coef12, coef13, coef14, coef15, AdjR21)
    line2 = '&&&({})&({})&&({})&({})&({})&({})&({})&\\\\'.format(t1, t2, t3, t4, t5, t6, t7)
    print(line1)
    print(line2)


1&9.34&119.45&7.38&3.13&0.01&5.39&3.71&2.22&3.13&2.11&0.01\\
&&&(1.06)&(1.95)&&(0.77)&(2.14)&(1.0)&(1.28)&(1.41)&\\
2&3.21&37.15&2.73&0.78&0.0&1.89&1.2&-0.11&1.23&0.9&0.01\\
&&&(1.26)&(1.55)&&(0.87)&(2.22)&(-0.16)&(1.62)&(1.94)&\\
3&0.65&10.44&-0.04&1.11&0.2&-0.18&1.1&0.4&0.31&0.12&0.22\\
&&&(-0.08)&(8.83)&&(-0.34)&(8.18)&(2.33)&(1.62)&(1.0)&\\
4&0.71&8.88&-0.11&1.3&0.39&-0.09&1.26&0.16&0.04&-0.05&0.39\\
&&&(-0.26)&(13.97)&&(-0.21)&(12.47)&(1.22)&(0.27)&(-0.53)&\\
5&0.41&11.49&-0.3&1.13&0.18&-0.35&1.09&0.42&0.04&0.07&0.18\\
&&&(-0.49)&(8.07)&&(-0.57)&(7.17)&(2.14)&(0.21)&(0.56)&\\
6&-3.03&67.29&-4.21&1.88&0.01&-4.73&1.89&1.4&2.27&0.04&0.01\\
&&&(-1.08)&(2.09)&&(-1.19)&(1.93)&(1.11)&(1.65)&(0.05)&\\
7&2.26&23.4&2.11&0.24&-0.0&1.49&0.42&0.67&1.65&0.41&0.03\\
&&&(1.55)&(0.77)&&(1.1)&(1.26)&(1.55)&(3.46)&(1.41)&\\
8&0.79&19.14&-0.03&1.32&0.09&-0.01&1.28&0.15&-0.16&0.01&0.08\\
&&&(-0.03)&(5.37)&&(-0.01)&(4.78)&(0.45)&(-0.41)&(0.05)&\\
9&0.74&20.13&-0.1&1.34&0.08&-0.28&1.37&0.31&0.57&0.09&0.

In [110]:
i = 0
for i in range(11):
    pi = cls[i]
    mean = round(df_pf_rets1[pi].mean(), 2)
    std = round(df_pf_rets1[pi].std(), 2)

    Y = df_pf_rets1[pi]
    X = df_pf_rets1['MK']
    X = sm.add_constant(X)
    mod_ff = sm.OLS(Y, X)
    ols_ff = mod_ff.fit()

    tvs = round(ols_ff.tvalues, 2)
    t1 = round(tvs[0], 2)
    t2 = round(tvs[1], 2)
    coefs = ols_ff.params
    coef1 = round(coefs[0], 2)
    coef2 = round(coefs[1], 2)

    AdjR2 = round(ols_ff.rsquared_adj, 2)

    X1 = df_pf_rets1[['MK', 'SMB', 'HML', 'Mom']]
    X1 = sm.add_constant(X1)
    mod_ff1 = sm.OLS(Y, X1)
    ols_ff1 = mod_ff1.fit()
    tvs1 = ols_ff1.tvalues
    t3 = round(tvs1[0], 2)
    t4 = round(tvs1[1], 2)
    t5 = round(tvs1[2], 2)
    t6 = round(tvs1[3], 2)
    t7 = round(tvs1[4], 2)

    coefs1 = ols_ff1.params
    coef11 = round(coefs1[0], 2)
    coef12 = round(coefs1[1], 2)
    coef13 = round(coefs1[2], 2)
    coef14 = round(coefs1[3], 2)
    coef15 = round(coefs1[4], 2)

    AdjR21 = round(ols_ff1.rsquared_adj, 2)
    line1 = '{}&{}&{}&{}&{}&{}&{}&{}&{}&{}&{}&{}\\\\'.format(i+1, mean, std, coef1, coef2, AdjR2, coef11, coef12, coef13, coef14, coef15, AdjR21)
    line2 = '&&&({})&({})&&({})&({})&({})&({})&({})&\\\\'.format(t1, t2, t3, t4, t5, t6, t7)
    print(line1)
    print(line2)


1&4.86&85.54&3.88&1.97&0.01&2.47&2.0&1.4&1.18&1.07&0.01\\
&&&(1.15)&(2.61)&&(0.71)&(2.44)&(1.23)&(0.92)&(1.3)&\\
2&1.82&26.58&1.31&1.04&0.03&0.67&1.2&-0.03&0.54&0.59&0.03\\
&&&(1.26)&(4.47)&&(0.62)&(4.77)&(-0.08)&(1.38)&(2.34)&\\
3&0.16&12.65&-0.36&1.06&0.14&-0.46&0.98&0.39&-0.02&0.08&0.14\\
&&&(-0.78)&(10.17)&&(-0.96)&(8.7)&(2.5)&(-0.1)&(0.69)&\\
4&0.72&8.53&0.15&1.14&0.35&0.11&1.09&0.24&-0.06&0.05&0.36\\
&&&(0.55)&(18.85)&&(0.41)&(16.53)&(2.61)&(-0.63)&(0.69)&\\
5&0.28&9.83&-0.28&1.11&0.25&-0.36&1.03&0.4&-0.03&0.07&0.26\\
&&&(-0.83)&(14.79)&&(-1.04)&(12.7)&(3.5)&(-0.26)&(0.81)&\\
6&0.42&52.98&-0.29&1.43&0.01&-0.55&1.35&0.82&0.68&-0.14&0.01\\
&&&(-0.14)&(3.07)&&(-0.26)&(2.66)&(1.16)&(0.87)&(-0.27)&\\
7&1.02&17.68&0.67&0.71&0.03&0.19&0.75&0.48&0.74&0.19&0.04\\
&&&(0.97)&(4.58)&&(0.26)&(4.5)&(2.06)&(2.85)&(1.13)&\\
8&0.43&13.98&-0.18&1.24&0.15&-0.12&1.15&0.25&-0.18&-0.03&0.15\\
&&&(-0.36)&(10.86)&&(-0.23)&(9.29)&(1.44)&(-0.91)&(-0.24)&\\
9&0.5&14.52&-0.11&1.22&0.14&-0.2&1.19&0.27&0.2&-0