In [1]:
import os
import pandas as pd
import numpy as np
import wrds
from random import *
from dateutil.relativedelta import *
from pandas.tseries.offsets import *
from scipy import stats
import matplotlib.pyplot as plt
import pandas_datareader
import datetime
import warnings
from scipy.optimize import fmin
warnings.filterwarnings('ignore', category=FutureWarning)
from scipy.stats import skew

In [2]:
data_folder = '/Users/chengxinxiangye/Desktop/Quant Asset/' 
id_wrds = 'yahan123456'  
possible_exchcd = (1, 2, 3)
possible_shrcd = (10,11)

In [None]:
# Establish a connection to WRDS
conn = wrds.Connection(wrds_username='wrds_id')

# Load CRSP returns and change variables format
crsp_raw = conn.raw_sql("""
    select a.permno, a.date, b.shrcd, b.exchcd, 
    a.ret, a.prc, a.shrout
    from crspq.msf as a
    left join crsp.msenames as b
    on a.permno=b.permno
    and b.namedt<=a.date
    and a.date<=b.nameendt
    where a.date between '01/01/1964' and '12/31/1997'
""")

# Sort data
crsp_raw = crsp_raw.sort_values(['permno', 'date']).reset_index(drop=True).copy()

# Set date column to datetime
crsp_raw['date'] = pd.to_datetime(crsp_raw['date'], format='%Y-%m-%d', errors='ignore') + MonthEnd(0)

# Store data to data folder
crsp_raw.to_pickle(data_folder + 'crsp_raw.pkl')

WRDS recommends setting up a .pgpass file.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [None]:
dlret_raw = conn.raw_sql("""
    select permno, dlret, dlstdt, dlstcd
    from crspq.msedelist
""")

# Set date column to datetime
dlret_raw['date'] = pd.to_datetime(dlret_raw['dlstdt'])+ MonthEnd(0)

# Store data to data folder
dlret_raw.to_pickle(data_folder + 'dlret_raw.pkl')

#Load Data from folder
dcrsp_raw = pd.read_pickle(data_folder + 'crsp_raw.pkl')

# Makesure data is in numeric type
dcrsp_raw['permno'] = dcrsp_raw['permno'].astype(int)

# Load CRSP Deslisting returns
dlret_raw = pd.read_pickle(data_folder + 'dlret_raw.pkl')

# Makesure data is in numeric type
dlret_raw['permno'] = dlret_raw['permno'].astype(int)
dlret_raw['permno']
# Merge for input
CRSP_Stocks = dcrsp_raw.merge(dlret_raw[['dlret','date','permno']], how='left', on=['permno','date']).copy()

In [5]:
#Load Data from folder
dcrsp_raw = pd.read_pickle(data_folder + 'crsp_raw.pkl')

# Makesure data is in numeric type
dcrsp_raw['permno'] = dcrsp_raw['permno'].astype(int)

# Load CRSP Deslisting returns
dlret_raw = pd.read_pickle(data_folder + 'dlret_raw.pkl')

# Makesure data is in numeric type
dlret_raw['permno'] = dlret_raw['permno'].astype(int)
dlret_raw['permno']
# Merge for input
CRSP_Stocks = dcrsp_raw.merge(dlret_raw[['dlret','date','permno']], how='left', on=['permno','date']).copy()

In [None]:
def PS3_Q1(dcrsp):
    # Filter data by SHRCD, EXCHCD
    dcrsp=dcrsp[dcrsp['exchcd'].isin(possible_exchcd)]
    dcrsp=dcrsp[dcrsp['shrcd'].isin(possible_shrcd)]

    # Adjust delisting return
    # 1. If both delisting and holding period return are not NA, calculate return using both
    dcrsp['ret'] = np.where(dcrsp['ret'].notna() & dcrsp['dlret'].notna(), 
                        (1+dcrsp['ret'])*(1+dcrsp['dlret'])-1, dcrsp['ret'])
    # 2. If holding period return is NA and delisting return is not NA, set return to be delisting return
    dcrsp['ret'] = np.where(dcrsp['ret'].isna()  & dcrsp['dlret'].notna(), dcrsp['dlret'], dcrsp['ret'])

    # Calculate Market Equity = Price($) x shares outstanding(thousands) / thousands (in million)
    dcrsp['me'] = dcrsp['prc'].abs() * dcrsp['shrout'] * 1e-6
    
    dcrsp = dcrsp[['date','permno','ret','me','exchcd','prc']].sort_values(by=['date','permno']).reset_index(drop=True).copy()   

    # Check condition of permno used in the paper
    # check whether price(t-13) is missing
    dcrsp['p13_missing'] = dcrsp[['prc','permno']].groupby('permno').shift(13).isna()
    # check whether return(t-2) is missing
    dcrsp['ret2_missing'] = dcrsp[['ret','permno']].groupby('permno').shift(2).isna()
    # check whether market equity(t-2) is missing
    dcrsp['me1_missing'] = dcrsp[['me','permno']].groupby('permno').shift(1).isna()
    # create a dataframe with permno that are not missing any one mentioned above
    keep = dcrsp[~(dcrsp['p13_missing'] | dcrsp['ret2_missing'] | dcrsp['me1_missing'])]
    # extract the permno 
    permno = keep['permno'].unique()
    # update our data with permno meet requirements
    dcrsp = dcrsp[dcrsp['permno'].isin(permno)].copy()

    # Calculate lagged market equity
    dcrsp.loc[:, 'Lme'] = dcrsp.groupby('permno')['me'].shift(1)

    dcrsp = dcrsp[dcrsp['Lme'] > 0].reset_index(drop=True).copy()

    # Sort date by date
    dcrsp = dcrsp.sort_values(['date']).reset_index(drop=True).copy()

    dcrsp.rename(columns={'Lme': 'lag_Mkt_Cap','ret': "Ret",'permno':'PERMNO','exchcd':'EXCHCD'}, inplace=True)

    # Code from Momentum file to calculate the ranking return
    dcrsp['mltp'] = dcrsp.Ret + 1
    dcrsp['cumret'] = dcrsp.groupby('PERMNO').mltp.cumprod()
    # count how many times I cumulated
    dcrsp['ct'] = 1
    dcrsp['ct'] = dcrsp.groupby('PERMNO').ct.cumsum()
    
    # divide by cumulative product
    dcrsp['Ranking_Ret'] = np.where(dcrsp['ct'] == 11, dcrsp.cumret-1, dcrsp.cumret/dcrsp.groupby('PERMNO').cumret.shift(11)-1)
    dcrsp.drop(['mltp','cumret','ct'], axis = 1,inplace=True)
    dcrsp = dcrsp[dcrsp['Ranking_Ret'].notna()].copy() 

    # Make sure data date is what we want
    dcrsp = dcrsp[(dcrsp['date'].dt.year>1926) & (dcrsp['date'].dt.year<2024)].copy()  

    # Creat Year Variable for output
    dcrsp['Year'] = dcrsp['date'].dt.year

    # Creat Month Variable for output
    dcrsp['Month'] = dcrsp['date'].dt.month
    dcrsp = dcrsp[['Year','Month','PERMNO','EXCHCD','lag_Mkt_Cap',"Ret",'Ranking_Ret']].copy()
    return dcrsp

In [12]:
CRSP_Stocks_Momentum = PS3_Q1(CRSP_Stocks)
CRSP_Stocks_Momentum.head(5)

Unnamed: 0,Year,Month,PERMNO,EXCHCD,lag_Mkt_Cap,Ret,Ranking_Ret
6040,1927,1,14162,1.0,0.007009,-0.003175,-0.186483
6042,1927,1,14189,1.0,0.01725,0.03913,0.186715
6043,1927,1,14197,1.0,0.010111,0.038251,0.348966
6044,1927,1,14218,1.0,0.3374,0.018672,0.357422
6045,1927,1,14226,1.0,0.005024,0.019108,-0.076804


In [13]:
# Function to calculte breakpoints given in Momentum file
# Version 3: Ben Chen modified to allow different breakpoints:
def bins3(df,K,s,d,use_cutoff_flag = False):
    df = df.copy()
    # Defining the momentum percentiles
    df['brkpts_flag'] = True
    if use_cutoff_flag:
        df['brkpts_flag'] = (df['cutoff_flag'] == 1)
    #what makes the stock into different buckets
    def diff_brkpts(x,K):
        brkpts_flag = x['brkpts_flag']
        x = x[s]
        loc_nyse = x.notna() & brkpts_flag  
        if np.sum(loc_nyse) > 0:
            breakpoints = pd.qcut(x[loc_nyse], K, retbins=True, labels=False)[1]
            breakpoints[0] = -np.inf
            breakpoints[K] = np.inf
            y = pd.cut(x, bins=breakpoints, labels=False) + 1
        else:
            y = x + np.nan
        return y

    df['bin'] = df.groupby(d).apply(lambda x:diff_brkpts(x,K)).reset_index()[s]
    df['bin'] = df['bin'].astype('float')
    df.drop('brkpts_flag',axis=1,inplace=True)
    return df

In [14]:
def PS3_Q2(Q3):
    K = 2 # number of bins to use
    Signal_lag = 2 # number of weeks to lag your signal. For momentun, we lag about (~2 month) becuase of short-term reversal.
    Q3['signal'] = Q3['lag_Mkt_Cap']\
#                                  .groupby('PERMNO')['Ranking_Ret'].shift(Signal_lag)
    # Only keep signal not na
    Q3 = Q3[Q3['signal'].notna()].copy()
    # Create Flag indicating which permnos-week to use when defining breakpoints
        # Here we use only NYSE firms to define breakpoints in KRF Method
    Q3['cutoff_flag'] = np.where(Q3['EXCHCD'] == 1,1,0)

    # Droping missing signals
    Q3 = Q3[Q3['signal'].notna() & Q3['lag_Mkt_Cap'].notna()]
    Q3 = Q3.merge(pd.DataFrame(Q3.groupby(['Year','Month'])['signal'].count()).rename(columns={'signal':"ct"}),on=['Year','Month'],how='left')

    # Create bins for KRF method and DM method
    Q3 = bins3(Q3,K,'signal',['Year','Month'],True).rename(columns={'bin':'KRF_decile'})
    Q3 = bins3(Q3,K,'signal',['Year','Month']).rename(columns={'bin':'DM_decile'})
    
    # Changes bins into integers
    Q3[['KRF_decile','DM_decile']] = Q3[['KRF_decile','DM_decile']].astype(int)
    
    # Output only what we want
    Q3 = Q3[['Year','Month','PERMNO','lag_Mkt_Cap',"Ret",'DM_decile','KRF_decile','EXCHCD']].copy()
    return Q3

In [15]:
CRSP_Stocks_Momentum_decile = PS3_Q2(CRSP_Stocks_Momentum)
CRSP_Stocks_Momentum_decile.head(5)

  df['bin'] = df.groupby(d).apply(lambda x:diff_brkpts(x,K)).reset_index()[s]
  df['bin'] = df.groupby(d).apply(lambda x:diff_brkpts(x,K)).reset_index()[s]


Unnamed: 0,Year,Month,PERMNO,lag_Mkt_Cap,Ret,DM_decile,KRF_decile,EXCHCD
0,1927,1,14162,0.007009,-0.003175,1,1,1.0
1,1927,1,14189,0.01725,0.03913,2,2,1.0
2,1927,1,14197,0.010111,0.038251,1,1,1.0
3,1927,1,14218,0.3374,0.018672,2,2,1.0
4,1927,1,14226,0.005024,0.019108,1,1,1.0


In [16]:
# Prepare for input
FF3 = pandas_datareader.famafrench.FamaFrenchReader('F-F_Research_Data_Factors', start='1926', end='2024')
FF3 = FF3.read()[0] / 100  # Monthly data
FF3.columns = ['MktRF', 'SMB', 'HML', 'Rf']
FF3 = FF3.reset_index().rename(columns={"Date": "date"}).copy()

# Set date column to datetime format
FF3['date'] = pd.DataFrame(FF3[['date']].values.astype('datetime64[ns]')) + MonthEnd(0)

In [17]:
def PS3_Q3(Q4,FF):
    # Create the month and year variables for merge with our dataset from question 2
    FF['Year'] = FF['date'].dt.year   
    FF['Month'] = FF['date'].dt.month
    
    # Sort data by year month and DM bins preparing for calculating the value weighted return in each bins
    Q4 = Q4[['Year','Month','PERMNO','KRF_decile','DM_decile','lag_Mkt_Cap','EXCHCD','Ret']]\
                    .sort_values(['Year','Month','DM_decile']).reset_index(drop=True).copy()
    Q4 = Q4[Q4['lag_Mkt_Cap'] > 0].reset_index(drop=True).copy()
    
    # Calculate the total laged market equity in each bins
    Q4 = Q4.merge(Q4[['Year','Month','DM_decile','lag_Mkt_Cap']].groupby(['Year','Month','DM_decile']).sum()\
                            .reset_index().rename(columns={"lag_Mkt_Cap":"LmeTotal_DM"}),on=['Year','Month','DM_decile'],
                            how='left')
    
    # vw_return_DM is how much you would get
    # vw_share_DM means how much to hold in each bins for DM
    Q4['vw_return_DM'] = Q4['Ret'] * Q4['lag_Mkt_Cap'] / Q4['LmeTotal_DM']

    Q4 = Q4.sort_values(['DM_decile','Year','Month','lag_Mkt_Cap']).reset_index(drop=True).copy()

    # Sort data by year month and KRF bins preparing for calculating the value weighted return in each bins
    Q4 = Q4[['Year','Month','PERMNO','KRF_decile','DM_decile','lag_Mkt_Cap','EXCHCD','Ret','LmeTotal_DM','vw_return_DM']]\
                    .sort_values(['Year','Month','KRF_decile']).reset_index(drop=True).copy()
    Q4 = Q4[Q4['lag_Mkt_Cap'] > 0].reset_index(drop=True).copy()
    
    # Calculate the total laged market equity in each bins
    Q4 = Q4.merge(Q4[['Year','Month','KRF_decile','lag_Mkt_Cap']].groupby(['Year','Month','KRF_decile']).sum()\
                            .reset_index().rename(columns={"lag_Mkt_Cap":"LmeTotal_KRF"}),on=['Year','Month','KRF_decile'],
                            how='left')
    
    # vw_return_DM is how much you would get
    # vw_share_DM means how much to hold in each bins for DM
    Q4['vw_return_KRF'] =  Q4['Ret'] * Q4['lag_Mkt_Cap'] / Q4['LmeTotal_KRF']
    Q4 = Q4.sort_values(['KRF_decile','Year','Month','lag_Mkt_Cap']).reset_index(drop=True).copy()

    # Calculate the return for each bins
    # Make copies
    Q5 = Q4.copy()
    Q6 = Q4.copy()
    
    # sum the value weighted return of firms within same bins for DM and KRF methods
    Q5 = Q5[['Year','Month', 'DM_decile','vw_return_DM','vw_return_KRF','KRF_decile']].groupby(['Year','Month','DM_decile'])\
                            .agg({'vw_return_DM': 'sum'}).reset_index()\
                            .rename(columns={'vw_return_DM': 'DM_Ret',"DM_decile":"Decile"})
    Q6 = Q6[['Year','Month', 'DM_decile','vw_return_DM','vw_return_KRF','KRF_decile']].groupby(['Year','Month', 'KRF_decile'])\
                            .agg({'vw_return_KRF': 'sum'}).reset_index()\
                            .rename(columns={'vw_return_KRF': 'KRF_Ret',"KRF_decile":"Decile"})
    
    # Merge DM returns and KRF returns to one dataframe and prepare for output
    Q6 = Q5.merge(Q6, on=['Year', 'Month','Decile'], how='outer')
    Q6 = Q6.merge(FF[['Year','Month','Rf']],on=['Year','Month'],how='outer').sort_values(['Year','Month','Decile']).copy().dropna()
    return Q6

In [18]:
CRSP_Stocks_Momentum_returns = PS3_Q3(CRSP_Stocks_Momentum_decile,FF3)

In [19]:
CRSP_Stocks_Momentum_returns.head(5)

Unnamed: 0,Year,Month,Decile,DM_Ret,KRF_Ret,Rf
6,1927,1,1.0,0.020942,0.020942,0.0025
7,1927,1,2.0,0.000555,0.000555,0.0025
8,1927,2,1.0,0.062194,0.062194,0.0026
9,1927,2,2.0,0.044594,0.044594,0.0026
10,1927,3,1.0,-0.030893,-0.030893,0.003


In [20]:
column_names = ['date', 'Decile', 'Ret', 'Me', 'NF']
DM_returns = pd.read_csv('m_m_pt_tot.txt', delim_whitespace=True, names=column_names)
DM_returns['date'] = pd.to_datetime(DM_returns['date'], format='%Y%m%d', errors='ignore') 

In [23]:
def PS3_Q4(Q7,compare):
    # Create year and month variables for merge
    compare['Year'] = compare['date'].dt.year   
    compare['Month'] = compare['date'].dt.month

    # Merge the return obtained by other with out replication
    Q7 = Q7.merge(compare[['Year','Month','Decile','Ret']],on=['Year','Month','Decile'],how='left').copy()
    
    # Extract the 1st decile and 10th decile to create WML portfolio
    D1 = Q7[Q7['Decile'] == 1][['Year','Month','Decile','DM_Ret','Ret','Rf']].sort_values(['Year','Month'])
    D10 = Q7[Q7['Decile'] == 10][['Year', 'Month','Decile','DM_Ret','Ret','Rf']].sort_values(['Year','Month'])
    WML = D1.merge(D10[['Year','Month','Decile','DM_Ret','Ret','Rf']],on=['Year','Month'],how='left').copy()
    
    # WML portoflio return is winner minus loser becuase we long the winners and short the losers
    WML['DM_Ret'] = WML['DM_Ret_y'] - WML['DM_Ret_x']
    
    # Similarly apply to the author's return
    WML['Ret'] = WML['Ret_y'] - WML['Ret_x']
    
    # create a decile label for WML portfolio
    WML['Decile'] = 'WML'
    
    # Keep the riskfree return
    WML['Rf'] = WML['Rf_x']
    WML = WML[['Year', 'Month','Decile','DM_Ret','Ret','Rf']].sort_values(['Year','Month'])

    # Concat our WML portfolio for DM method with the returns we obtained for each decile using two methods
    Q7 = pd.concat([Q7, WML]).copy()
    Q7 = Q7.sort_values(['Year','Month','Decile']).reset_index().drop(['index'], axis = 1)
    
    # Calcualte the log return for calcuation of skewness
    Q7['DM_log'] = np.log(1+Q7['DM_Ret'])
    Q7 = Q7.sort_values(['Year','Month']).copy()
    
    # Calculate excess returns for calculation of statistics
    Q7['DM_excess'] = Q7['DM_Ret'] - Q7['Rf']
    Q7['CP_excess'] = Q7['Ret'] - Q7['Rf']
    
    # Create a df to store our statistics
    rowname = ['r-rf', 'sigma','Sharpe Ratio', 'Skewness','Correlation']
    colname = Q7['Decile'].unique()
    df = pd.DataFrame(np.nan, index=rowname, columns=colname)

    # Calculate the statistics
    excess_ret = round(Q7.groupby('Decile')[['DM_excess']].mean()*1200,2)
    sigma = round(Q7.groupby('Decile')[['DM_excess']].std()*np.sqrt(12)*100,2)
    sr = round(excess_ret/sigma,2)
    sk = round(Q7.groupby('Decile')[['DM_log']].skew(axis = 0, skipna = True),2)
#    correlation = Q7.groupby('Decile')[['DM_excess', 'CP_excess']].corr().reset_index()
#    correlation = round(correlation.loc[np.arange(1,22,2),'DM_excess'].reset_index(),4)
#    correlation.index = Q7['Decile'].unique()

    df.loc['r-rf'] = excess_ret.iloc[:,0]
    df.loc['sigma'] = sigma.iloc[:,0]
    df.loc['Sharpe Ratio'] = sr.iloc[:,0]
    df.loc['Skewness'] = sk.iloc[:,0]
#    df.loc['Correlation'] = correlation.iloc[:,1]
    
    return df

In [24]:
PS3_Q4(CRSP_Stocks_Momentum_returns,DM_returns)

Unnamed: 0,1.0,2.0,WML
r-rf,10.71,8.13,
sigma,27.54,18.34,
Sharpe Ratio,0.39,0.44,
Skewness,0.2,-0.55,
Correlation,,,


In [25]:
KRF_returns = pandas_datareader.famafrench.FamaFrenchReader('10_Portfolios_Prior_12_2', start='1926', end='2024')
KRF_returns = KRF_returns.read()[0] / 100  # Monthly data
KRF_returns = KRF_returns.reset_index().rename(columns={"Date": "date"}).copy()
KRF_returns['date'] = pd.DataFrame(KRF_returns[['date']].values.astype('datetime64[ns]')) + MonthEnd(0)
KRF_returns.columns = ['date',1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
KRF_returns = KRF_returns.melt(id_vars=['date'],var_name='Decile', value_name='Ret').copy()

In [26]:
compare = KRF_returns.copy()
Q7 = CRSP_Stocks_Momentum_returns.copy()
# Create year and month variables for merge
compare['Year'] = compare['date'].dt.year   
compare['Month'] = compare['date'].dt.month

# Merge the return obtained by other with out replication
Q7 = Q7.merge(compare[['Year','Month','Decile','Ret']],on=['Year','Month','Decile'],how='left').copy()

# Extract the 1st decile and 10th decile to create WML portfolio
D1 = Q7[Q7['Decile'] == 1][['Year','Month','Decile','DM_Ret','Ret','Rf']].sort_values(['Year','Month'])
D10 = Q7[Q7['Decile'] == 10][['Year', 'Month','Decile','DM_Ret','Ret','Rf']].sort_values(['Year','Month'])
WML = D1.merge(D10[['Year','Month','Decile','DM_Ret','Ret','Rf']],on=['Year','Month'],how='left').copy()

# WML portoflio return is winner minus loser becuase we long the winners and short the losers
WML['DM_Ret'] = WML['DM_Ret_y'] - WML['DM_Ret_x']

# Similarly apply to the author's return
WML['Ret'] = WML['Ret_y'] - WML['Ret_x']

# create a decile label for WML portfolio
WML['Decile'] = 'WML'

# Keep the riskfree return
WML['Rf'] = WML['Rf_x']
WML = WML[['Year', 'Month','Decile','DM_Ret','Ret','Rf']].sort_values(['Year','Month'])

# Concat our WML portfolio for DM method with the returns we obtained for each decile using two methods
Q7 = pd.concat([Q7, WML]).copy()
Q7 = Q7.sort_values(['Year','Month','Decile']).reset_index().drop(['index'], axis = 1)

# Calcualte the log return for calcuation of skewness
Q7['DM_log'] = np.log(1+Q7['DM_Ret'])
Q7 = Q7.sort_values(['Year','Month']).copy()

# Calculate excess returns for calculation of statistics
Q7['DM_excess'] = Q7['DM_Ret'] - Q7['Rf']
Q7['CP_excess'] = Q7['Ret'] - Q7['Rf']
start = np.where((Q7['Year'] == 1927) & (Q7['Month'] == 3))[0][0]
end = np.where((Q7['Year'] == 2004) & (Q7['Month'] == 12))[0][0]
Q7 = Q7[start:(end+1)].copy().sort_values(['Year','Month'])

In [29]:
Q7[(Q7['Month'] == 1) & (Q7['Decile']==1)]['DM_excess'].mean()*100

6.676248317278654

In [None]:

# Create a df to store our statistics 
rowname = ['r-rf', 'sigma','Sharpe Ratio', 'Skewness','Correlation']
colname = Q7['Decile'].unique()
df = pd.DataFrame(np.nan, index=rowname, columns=colname)

# Calculate the statistics
excess_ret = round(Q7.groupby('Decile')[['KRF_excess']].mean()*1200,2)
sigma = round(Q7.groupby('Decile')[['KRF_excess']].std()*np.sqrt(12)*100,2)
sr = round(excess_ret/sigma,2)
sk = round(Q7.groupby('Decile')[['KRF_log']].skew(axis = 0, skipna = True),2)
correlation = Q7.groupby('Decile')[['KRF_excess', 'CP_excess']].corr().reset_index()
correlation = round(correlation.loc[np.arange(1,22,2),'KRF_excess'].reset_index(),4)
correlation.index = Q7['Decile'].unique()

In [24]:
def PS3_Q5(Q7,compare):
    # Create year and month variables for merge
    compare['Year'] = compare['date'].dt.year   
    compare['Month'] = compare['date'].dt.month

    # Merge the return obtained by other with out replication
    Q7 = Q7.merge(compare[['Year','Month','Decile','Ret']],on=['Year','Month','Decile'],how='left').copy()
    
    # Extract the 1st decile and 10th decile to create WML portfolio
    D1 = Q7[Q7['Decile'] == 1][['Year','Month','Decile','KRF_Ret','Ret','Rf']].sort_values(['Year','Month'])
    D10 = Q7[Q7['Decile'] == 10][['Year', 'Month','Decile','KRF_Ret','Ret','Rf']].sort_values(['Year','Month'])
    
    # WML portoflio return is winner minus loser becuase we long the winners and short the losers
    WML = D1.merge(D10[['Year','Month','Decile','KRF_Ret','Ret','Rf']],on=['Year','Month'],how='left').copy()
    
    # Similarly apply to the author's return
    WML['KRF_Ret'] = WML['KRF_Ret_y'] - WML['KRF_Ret_x']
    
    # Keep the riskfree return
    WML['Ret'] = WML['Ret_y'] - WML['Ret_x']
    
    # create a decile label for WML portfolio
    WML['Decile'] = 'WML'
    
    # Keep the riskfree return
    WML['Rf'] = WML['Rf_x']
    
    WML = WML[['Year', 'Month','Decile','KRF_Ret','Ret','Rf']].sort_values(['Year','Month'])
    
    # Concat our WML portfolio for DM method with the returns we obtained for each decile using two methods
    Q7 = pd.concat([Q7, WML]).copy()
    Q7 = Q7.sort_values(['Year','Month','Decile']).reset_index().drop(['index'], axis = 1)
    
    # Calcualte the log return for calcuation of skewness
    Q7['KRF_log'] = np.log(1+Q7['KRF_Ret'])
    Q7 = Q7.sort_values(['Year','Month']).copy()
    
    # Calculate excess returns for calculation of statistics
    Q7['KRF_excess'] = Q7['KRF_Ret'] - Q7['Rf']
    Q7['CP_excess'] = Q7['Ret'] - Q7['Rf']
    
    # Create a df to store our statistics 
    rowname = ['r-rf', 'sigma','Sharpe Ratio', 'Skewness','Correlation']
    colname = Q7['Decile'].unique()
    df = pd.DataFrame(np.nan, index=rowname, columns=colname)

    # Calculate the statistics
    excess_ret = round(Q7.groupby('Decile')[['KRF_excess']].mean()*1200,2)
    sigma = round(Q7.groupby('Decile')[['KRF_excess']].std()*np.sqrt(12)*100,2)
    sr = round(excess_ret/sigma,2)
    sk = round(Q7.groupby('Decile')[['KRF_log']].skew(axis = 0, skipna = True),2)
    correlation = Q7.groupby('Decile')[['KRF_excess', 'CP_excess']].corr().reset_index()
    correlation = round(correlation.loc[np.arange(1,22,2),'KRF_excess'].reset_index(),4)
    correlation.index = Q7['Decile'].unique()

    df.loc['r-rf'] = excess_ret.iloc[:,0]
    df.loc['sigma'] = sigma.iloc[:,0]
    df.loc['Sharpe Ratio'] = sr.iloc[:,0]
    df.loc['Skewness'] = sk.iloc[:,0]
    df.loc['Correlation'] = correlation.iloc[:,1]
    return df

In [25]:
PS3_Q5(CRSP_Stocks_Momentum_returns,KRF_returns)

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,WML
r-rf,1.42,5.65,6.27,7.45,7.63,7.93,8.94,10.08,10.82,14.34,9.71
sigma,34.25,28.18,24.14,21.84,20.33,19.9,18.86,18.37,19.29,22.34,27.65
Sharpe Ratio,0.04,0.2,0.26,0.34,0.38,0.4,0.47,0.55,0.56,0.64,0.35
Skewness,0.14,-0.15,-0.08,0.05,-0.15,-0.33,-0.61,-0.51,-0.76,-0.91,-6.28
Correlation,0.9979,0.997,0.9976,0.9969,0.9966,0.9968,0.9968,0.9979,0.9981,0.9975,0.9951


## Question 7 
### Calculations for past 10 years

In [26]:
past_10 = CRSP_Stocks_Momentum_returns[CRSP_Stocks_Momentum_returns.Year > 2013].copy()
past_10_DM = DM_returns[DM_returns['date'].dt.year > 2013].copy()
PS3_Q4(past_10,past_10_DM)

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,WML
r-rf,2.13,8.21,8.95,12.47,12.88,8.28,11.78,9.59,9.27,14.57,11.28
sigma,40.9,30.55,24.58,20.89,17.91,16.37,15.4,15.27,14.85,20.66,34.16
Sharpe Ratio,0.05,0.27,0.36,0.6,0.72,0.51,0.76,0.63,0.62,0.71,0.33
Skewness,0.16,-0.51,-0.37,-0.62,-0.6,-0.69,-0.34,-0.26,-0.44,-0.17,-1.35
Correlation,0.9887,0.984,0.9869,0.9918,0.9923,0.9955,0.9965,0.9961,0.9851,0.9922,0.9864


In [27]:
past_10_KRF = KRF_returns[KRF_returns['date'].dt.year > 2013].copy()
PS3_Q5(past_10,past_10_KRF)

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,WML
r-rf,8.72,11.22,12.8,12.04,10.9,11.14,10.65,8.81,9.37,11.99,2.1
sigma,35.28,26.37,21.27,18.04,16.63,16.16,15.59,15.18,14.87,19.36,28.91
Sharpe Ratio,0.25,0.43,0.6,0.67,0.66,0.69,0.68,0.58,0.63,0.62,0.07
Skewness,0.28,-0.71,-0.38,-0.67,-0.48,-0.6,-0.09,-0.3,-0.5,-0.29,-1.28
Correlation,0.9956,0.9935,0.9928,0.9964,0.9962,0.9923,0.9913,0.9946,0.9958,0.9965,0.992
