In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.tseries.offsets import *
import wrds
from scipy import stats
import pandas_datareader
import datetime
os.chdir('/Users/yiyujie/Desktop/program/Quantitative Asset Management')

In [2]:
conn = wrds.Connection(wrds_username='aspyyj612')

Loading library list...
Done


In [3]:
import warnings
warnings.filterwarnings('ignore')

## Q1
Prepare data for analysis. Combine necessary CRSP and Compustat datasets needed to define
size and book-to-market decile portfolios as defined in Fama and French (1992)1, as well as the
HML and SMB factors as defined in Fama and French (1993)2. Detail which datasets you use,
how you merged them, how you calculated the portfolios, and any differences between the building
of the decile portfolios and the factors. Output should be between January 1973 and December
2023.

In [4]:
crsp_cstat=conn.raw_sql("""
                  select gvkey, lpermno as permno, lpermco as permco, linktype, linkprim, liid,
                  linkdt, linkenddt
                  from crspq.ccmxpf_linktable
                  where substr(linktype,1,1)='L'
                  and (linkprim ='C' or linkprim='P')
                  """)

In [5]:
Pension = conn.raw_sql("""
                        select gvkey, datadate, prba
                        from comp.aco_pnfnda
                        where indfmt='INDL'
                        and datafmt='STD'
                        and popsrc='D'
                        and consol='C'
                        """)

In [6]:
crsp_raw = conn.raw_sql("""
                      select a.permno, a.permco, a.date, b.exchcd, b.shrcd, b.siccd, b.naics,
                      a.ret, a.retx, a.shrout, a.prc
                      from crspq.msf as a
                      left join crspq.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where b.shrcd in (10,11)
                      and b.exchcd in (1,2,3)
                      """)

In [7]:
cstat = conn.raw_sql("""
                    select a.gvkey, a.datadate, a.at, a.pstkl, a.txditc, a.fyear, a.ceq, a.lt, 
                    a.mib, a.itcb, a.txdb, a.pstkrv, a.seq, a.pstk, b.sic, b.year1, b.naics
                    from comp.funda as a
                    left join comp.names as b
                    on a.gvkey = b.gvkey
                    where indfmt='INDL'
                    and datafmt='STD'
                    and popsrc='D'
                    and consol='C'
                    """)

In [8]:
dlret_raw = conn.raw_sql('''select permno, dlret, dlstdt, dlstcd 
                            from crspq.msedelist
                         ''')

In [9]:
crsp_cstat.to_pickle("crsp_cstat.pkl")
crsp_raw.to_pickle("crsp_raw.pkl")
cstat.to_pickle("cstat.pkl")
dlret_raw.to_pickle("dlret_raw.pkl")
Pension.to_pickle("Pension.pkl")

In [4]:
crsp_cstat = pd.read_pickle("crsp_cstat.pkl")
crsp_raw = pd.read_pickle("crsp_raw.pkl")
cstat = pd.read_pickle("cstat.pkl")
dlret_raw = pd.read_pickle("dlret_raw.pkl")
Pension = pd.read_pickle("Pension.pkl")

In [5]:
crsp_raw = crsp_raw.sort_values(['permno','date']).reset_index(drop=True).copy()
crsp_raw[['permno','permco']] = crsp_raw[['permno','permco']].astype(int)
crsp_raw['date'] = pd.to_datetime(crsp_raw['date'], format='%Y-%m-%d', errors='ignore')

dlret_raw = dlret_raw.sort_values(['permno','dlstdt']).reset_index(drop=True).copy()
dlret_raw.permno = dlret_raw.permno.astype(int)
dlret_raw['dlstdt'] = pd.to_datetime(dlret_raw['dlstdt'])
dlret_raw['date'] = dlret_raw['dlstdt'] + MonthEnd(0)

crsp_stocks = crsp_raw.merge(dlret_raw, how='outer', on=['date','permno'])
crsp_stocks

Unnamed: 0,permno,permco,date,exchcd,shrcd,siccd,naics,ret,retx,shrout,prc,dlret,dlstdt,dlstcd
0,10006,22156.0,1925-12-31,1.0,10.0,3740.0,,,,600.0,109.00,,NaT,
1,10022,22158.0,1925-12-31,1.0,10.0,3420.0,,,,200.0,56.00,,NaT,
2,10030,22160.0,1925-12-31,1.0,10.0,3310.0,,,,156.0,150.00,,NaT,
3,10057,20020.0,1925-12-31,1.0,11.0,3540.0,,,,500.0,12.25,,NaT,
4,10073,22162.0,1925-12-31,1.0,10.0,3520.0,,,,138.0,17.50,,NaT,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3808179,93426,,2024-03-31,,,,,,,,,,2024-03-28,100.0
3808180,93427,,2024-03-31,,,,,,,,,,2024-03-28,100.0
3808181,93429,,2024-03-31,,,,,,,,,,2024-03-28,100.0
3808182,93434,,2024-03-31,,,,,,,,,,2024-03-28,100.0


In [6]:
crsp_stocks = crsp_stocks[(crsp_stocks['shrcd'].isin([10, 11])) & 
            (crsp_stocks['exchcd'].isin([1, 2, 3]))]

# calculate Market value
crsp_stocks['ME'] = crsp_stocks['prc'].abs() * crsp_stocks['shrout'] * 1e-6
# ** **
crsp_stocks = crsp_stocks[crsp_stocks['ME'].notna()]
crsp_stocks['lag_mkt_cap'] = crsp_stocks[["permno", "ME"]].groupby("permno").shift(1)
crsp_stocks['lag_mkt_cap_permco'] = crsp_stocks.groupby(["date", "permco"])['lag_mkt_cap'].transform('sum')

# calculate RET
crsp_stocks['dlret'] = pd.to_numeric(crsp_stocks['dlret'], errors='coerce')
crsp_stocks['ret'] = pd.to_numeric(crsp_stocks['ret'], errors='coerce')
crsp_stocks = crsp_stocks.loc[crsp_stocks['dlret'].notna()|crsp_stocks['ret'].notna()].reset_index(drop=True)
crsp_stocks['ret'] = np.where(crsp_stocks['ret'].notna() & crsp_stocks['dlret'].notna(), 
                        (1 + crsp_stocks['ret']) * (1 + crsp_stocks['dlret']) - 1, crsp_stocks['ret'])
crsp_stocks['ret'] = np.where(crsp_stocks['ret'].isna()  & crsp_stocks['dlret'].notna(), crsp_stocks['dlret'], crsp_stocks['ret'])

In [7]:
# Step 1: merge all links
crsp_linktable = pd.merge(crsp_cstat, crsp_stocks, on = ['permno', 'permco'], how='inner')

# Step 2: restrict to valid links
crsp_linktable['date'] = pd.to_datetime(crsp_linktable['date'])
crsp_linktable['linkdt'] = pd.to_datetime(crsp_linktable['linkdt'])
# crsp_linktable['linkenddt'] = pd.to_datetime(crsp_linktable['linkenddt'].fillna('2028-01-01'))
crsp_linktable = crsp_linktable[(crsp_linktable['linkdt'].isna() | (crsp_linktable['date'] >= crsp_linktable['linkdt'])) & (crsp_linktable['linkenddt'].isna() | (crsp_linktable['date'] <= crsp_linktable['linkenddt']))].copy()
crsp_linktable = crsp_linktable.sort_values(by=['permno', 'date']).reset_index(drop=True).copy()

# Step 3: if LC not LC linktype, only keep LC (LC stands for "Link research complete. Standard connection between databases.")
crsp_linktable['keep'] = True
crsp_linktable['flag'] = np.where(crsp_linktable['linktype'] == 'LC', 1, 0)  # Link research complete. Standard connection between databases.
crsp_linktable = crsp_linktable.merge(crsp_linktable[['permno', 'date', 'flag']].groupby(['permno', 'date']).sum().reset_index().rename(columns={'flag': 'ct_flag'}),on=['permno', 'date'], how='left')
crsp_linktable.loc[crsp_linktable[['permno', 'date']].duplicated(keep=False) & (crsp_linktable['ct_flag'] >= 1) & (crsp_linktable['flag'] == 0), 'keep'] = False
crsp_linktable = crsp_linktable[crsp_linktable['keep']].copy()
crsp_linktable.drop(['keep', 'flag', 'ct_flag'], axis=1, inplace=True)

# Step 4: if P and not P linkprim, only keep P
crsp_linktable['keep'] = True
crsp_linktable['flag'] = np.where((crsp_linktable['linkprim'] == 'P'), 1, 0)  # Primary Link Marker: "P" indicates a primary link marker, as identified by Compustat in monthly security data.
crsp_linktable = crsp_linktable.merge(crsp_linktable[['permno', 'date', 'flag']].groupby(['permno', 'date']).sum().reset_index().rename(columns={'flag': 'ct_flag'}),on=['permno', 'date'], how='left')
crsp_linktable.loc[crsp_linktable[['permno', 'date']].duplicated(keep=False) & (crsp_linktable['ct_flag'] >= 1) & (crsp_linktable['flag'] == 0), 'keep'] = False
crsp_linktable = crsp_linktable[crsp_linktable['keep']].copy()

crsp_linktable = crsp_linktable[['permno','gvkey','date','exchcd','lag_mkt_cap','lag_mkt_cap_permco','ret','retx']]
crsp_linktable['Year'] = crsp_linktable['date'].dt.year
crsp_linktable['Month'] = crsp_linktable['date'].dt.month
# crsp_linktable = crsp_linktable[(crsp_linktable['Year'] <= 2023)&(crsp_linktable['Year'] >= 1973)]

crsp_linktable

Unnamed: 0,permno,gvkey,date,exchcd,lag_mkt_cap,lag_mkt_cap_permco,ret,retx,Year,Month
0,10000.0,013007,1986-02-28,3.0,0.016100,0.016100,-0.257143,-0.257143,1986,2
1,10000.0,013007,1986-03-31,3.0,0.011960,0.011960,0.365385,0.365385,1986,3
2,10000.0,013007,1986-04-30,3.0,0.016330,0.016330,-0.098592,-0.098592,1986,4
3,10000.0,013007,1986-05-30,3.0,0.015172,0.015172,-0.222656,-0.222656,1986,5
4,10000.0,013007,1986-06-30,3.0,0.011794,0.011794,-0.005025,-0.005025,1986,6
...,...,...,...,...,...,...,...,...,...,...
3218364,93436.0,184996,2023-11-30,3.0,638.454494,638.454494,0.195379,0.195379,2023,11
3218365,93436.0,184996,2023-12-29,3.0,763.195354,763.195354,0.034988,0.034988,2023,12
3218366,93436.0,184996,2024-01-31,3.0,791.408800,791.408800,-0.246257,-0.246257,2024,1
3218367,93436.0,184996,2024-02-29,3.0,596.479287,596.479287,0.077901,0.077901,2024,2


In [8]:
cstat = pd.merge(cstat, Pension, on = ['gvkey','datadate'], how = 'left')

cstat['SHE'] = cstat['seq']  # Using the shareholder equity seq directly from Compustat

# If data on shareholders' equity is missing, use the option
cstat['SHE'] = cstat['SHE'].fillna(cstat['ceq'] + cstat['pstk'])
cstat['SHE'] = cstat['SHE'].fillna(cstat['at'] - cstat['lt'] - cstat['mib'])
cstat['SHE'] = cstat['SHE'].fillna(cstat['at'] - cstat['lt'])

# Treatment of deferred taxes and investment tax credits (DT)
cstat['DT'] = cstat['txditc'].fillna(cstat['itcb'] + cstat['txdb'])
cstat['DT'] = cstat['DT'].fillna(cstat['itcb'])
cstat['DT'] = cstat['DT'].fillna(cstat['txdb'])

# Carrying value of preferred shares (PS)
cstat['PS'] = cstat['pstkrv'].fillna(cstat['pstkl'])
cstat['PS'] = cstat['PS'].fillna(cstat['pstk'])

# calculate Book Equity (BE)
cstat['BE'] = cstat['SHE']
cstat.loc[(-cstat['PS']+cstat['DT']-cstat['prba']).notna(), 'BE'] = cstat.loc[(-cstat['PS']+cstat['DT']-cstat['prba']).notna()].apply(lambda x : x['BE'] - x['PS'] + x['DT'] - x['prba'], axis = 1)

cstat_BE = cstat[['gvkey','datadate','fyear','BE']].copy()

In [9]:
ME_Breakpoints = pd.read_csv('ME_Breakpoints.CSV').iloc[:-2]
ME_Breakpoints['YearMonth'] = pd.to_numeric(ME_Breakpoints['YearMonth'])
ME_Breakpoints[(ME_Breakpoints['YearMonth'] >= 197300)&(ME_Breakpoints['YearMonth'] <= 202400)]

Unnamed: 0,YearMonth,Unnamed: 1,1,2,3,4,5,6,7,8,...,11,12,13,14,15,16,17,18,19,20
565,197301,1390.0,18.83,26.25,33.51,41.84,53.65,64.01,75.68,91.95,...,167.34,206.34,257.96,345.43,438.75,559.63,704.05,1023.59,1806.89,50592.47
566,197302,1392.0,17.43,24.40,30.15,37.85,48.01,58.01,69.48,84.82,...,150.85,187.33,248.52,306.12,410.46,514.23,676.83,981.07,1703.23,50127.79
567,197303,1393.0,17.28,23.39,29.70,36.43,45.91,55.93,67.89,82.51,...,148.95,181.86,240.40,305.53,402.80,498.17,654.46,952.22,1726.88,50127.79
568,197304,1394.0,15.24,21.63,27.51,34.41,42.24,51.76,61.76,76.34,...,138.80,172.33,225.18,283.79,386.16,465.06,628.99,894.60,1676.31,47397.77
569,197305,1397.0,14.06,19.59,25.45,31.67,38.39,48.22,55.68,68.68,...,124.83,162.54,208.92,263.42,357.52,448.38,609.10,874.87,1651.87,45706.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1172,202308,1216.0,149.84,322.23,519.14,773.35,1094.94,1417.86,1912.07,2545.21,...,4584.57,5491.96,7179.51,9012.20,12895.21,17603.72,27563.52,41937.56,78124.53,785533.69
1173,202309,1208.0,153.71,324.88,518.55,755.53,1028.56,1327.22,1782.11,2401.13,...,4363.38,5224.27,6738.59,8348.83,12194.62,16543.36,24318.26,39274.17,76042.62,763816.12
1174,202310,1210.0,145.91,287.31,475.22,697.18,929.51,1252.82,1709.86,2257.50,...,4066.03,4856.66,6270.45,7895.12,11065.85,16422.71,22902.81,38243.13,73393.81,744232.88
1175,202311,1205.0,147.33,313.69,510.70,759.55,1004.27,1355.46,1919.10,2427.24,...,4441.26,5491.83,6920.91,8628.68,12083.80,17390.11,25669.91,41813.44,78390.29,784797.69


In [10]:
BE_ME_Breakpoints = pd.read_csv('BE-ME_Breakpoints.CSV').iloc[:-2]
BE_ME_Breakpoints.Year = pd.to_numeric(BE_ME_Breakpoints.Year)
BE_ME_Breakpoints[(BE_ME_Breakpoints.Year >= 1973)&(BE_ME_Breakpoints.Year <= 2023)]#['1']

Unnamed: 0,Year,Month,Unnamed: 2,1,2,3,4,5,6,7,...,11,12,13,14,15,16,17,18,19,20
47,1973,9.0,1356.0,0.165,0.233,0.31,0.381,0.455,0.508,0.57,...,0.809,0.859,0.907,0.99,1.078,1.171,1.308,1.506,1.967,44.705
48,1974,8.0,1365.0,0.266,0.389,0.525,0.651,0.76,0.88,0.987,...,1.34,1.429,1.549,1.68,1.832,2.029,2.294,2.707,3.448,22.386
49,1975,6.0,1374.0,0.439,0.644,0.871,1.078,1.247,1.4,1.527,...,2.018,2.16,2.33,2.538,2.821,3.182,3.606,4.237,5.454,60.511
50,1976,8.0,1369.0,0.366,0.506,0.661,0.765,0.872,0.995,1.114,...,1.471,1.539,1.654,1.792,1.914,2.13,2.396,2.873,3.572,46.769
51,1977,5.0,1399.0,0.342,0.461,0.574,0.647,0.737,0.807,0.885,...,1.138,1.213,1.28,1.356,1.432,1.566,1.754,2.0,2.475,23.801
52,1978,5.0,1394.0,0.408,0.512,0.621,0.703,0.785,0.86,0.93,...,1.173,1.241,1.315,1.401,1.491,1.602,1.778,2.036,2.543,8.411
53,1979,8.0,1383.0,0.4,0.533,0.636,0.738,0.817,0.901,0.999,...,1.292,1.356,1.429,1.533,1.628,1.729,1.86,2.105,2.535,7.811
54,1980,8.0,1371.0,0.352,0.454,0.527,0.623,0.697,0.781,0.859,...,1.163,1.263,1.358,1.458,1.578,1.687,1.816,2.034,2.388,6.779
55,1981,7.0,1364.0,0.278,0.372,0.451,0.535,0.608,0.695,0.769,...,1.117,1.209,1.327,1.413,1.551,1.686,1.833,2.06,2.413,6.86
56,1982,8.0,1348.0,0.363,0.453,0.534,0.598,0.685,0.765,0.84,...,1.191,1.304,1.39,1.5,1.616,1.725,1.866,2.062,2.551,5.511


In [11]:
cstat_BE['fyear'] += 1

In [12]:
data = pd.merge(crsp_linktable, cstat_BE, left_on = ['gvkey','Year'], right_on = ['gvkey','fyear'], how = 'left')
data = data.sort_values(['permno','gvkey','date']).reset_index(drop=True).copy()
data['lag_mkt_cap_permco'] = data.groupby(['permno','gvkey'])['lag_mkt_cap_permco'].shift(5) # we use ME of Dec of year t-1
# ** **
data = data[data['BE'].notna()]
data = data[data['lag_mkt_cap_permco'].notna()]

data['BtM'] = data['BE'] / data['lag_mkt_cap_permco']
#data['Month'] = data['date'].dt.month
data['port_year'] = data['Year']
data.loc[data.Month <= 6,'port_year'] -= 1 # we reconstruct portfolio at the end of June
data = data[(data['date'] >= '1972') & (data['date'] <= '2024')]
data

Unnamed: 0,permno,gvkey,date,exchcd,lag_mkt_cap,lag_mkt_cap_permco,ret,retx,Year,Month,datadate,fyear,BE,BtM,port_year
5,10000.0,013007,1986-07-31,3.0,0.011735,0.016100,-0.080808,-0.080808,1986,7,1985-10-31,1986.0,-0.343,-21.304348,1986
6,10000.0,013007,1986-08-29,3.0,0.010786,0.011960,-0.615385,-0.615385,1986,8,1985-10-31,1986.0,-0.343,-28.678930,1986
7,10000.0,013007,1986-09-30,3.0,0.004149,0.016330,-0.057143,-0.057143,1986,9,1985-10-31,1986.0,-0.343,-21.004287,1986
8,10000.0,013007,1986-10-31,3.0,0.003912,0.015172,-0.242424,-0.242424,1986,10,1985-10-31,1986.0,-0.343,-22.607435,1986
9,10000.0,013007,1986-11-28,3.0,0.003002,0.011794,0.060000,0.060000,1986,11,1985-10-31,1986.0,-0.343,-29.082884,1986
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3218321,93436.0,184996,2023-08-31,3.0,848.821184,650.887660,-0.034962,-0.034962,2023,8,2022-12-31,2023.0,44704.000,68.681591,2023
3218322,93436.0,184996,2023-09-29,3.0,819.144340,657.505914,-0.030456,-0.030456,2023,9,2022-12-31,2023.0,44704.000,67.990263,2023
3218323,93436.0,184996,2023-10-31,3.0,795.449380,520.781202,-0.197346,-0.197346,2023,10,2022-12-31,2023.0,44704.000,85.840272,2023
3218324,93436.0,184996,2023-11-30,3.0,638.454494,646.356919,0.195379,0.195379,2023,11,2022-12-31,2023.0,44704.000,69.163025,2023


// data = data[(data['BE'].notna())&(data['BtM'].notna())]

In [13]:
june_data = data[(data['Month'] == 6) & (data['exchcd'] == 1)]
# Sort by ME, BE/ME and assign deciles
def compute_deciles(group):
    market_cap_breakpoints = group['lag_mkt_cap'].quantile([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).tolist()
    beme_breakpoints = group['BtM'].quantile([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).tolist()
    # print(market_cap_breakpoints[0]*1e3, market_cap_breakpoints[8]*1e3)
    group['size_decile'] = pd.cut(group['lag_mkt_cap'], bins=[0] + market_cap_breakpoints + [float('inf')], labels=range(1, 11))
    group['beme_decile'] = pd.cut(group['BtM'], bins=[-float('inf')] + beme_breakpoints + [float('inf')], labels=range(1, 11))
    return group

# 应用每年的断点
june_data = june_data.groupby(june_data['date'].dt.year).apply(compute_deciles)
june_data['port_year'] += 1

data = data.merge(june_data[['permno', 'port_year', 'size_decile', 'beme_decile']], on=['permno', 'port_year'], how='left')
data

Unnamed: 0,permno,gvkey,date,exchcd,lag_mkt_cap,lag_mkt_cap_permco,ret,retx,Year,Month,datadate,fyear,BE,BtM,port_year,size_decile,beme_decile
0,10000.0,013007,1986-07-31,3.0,0.011735,0.016100,-0.080808,-0.080808,1986,7,1985-10-31,1986.0,-0.343,-21.304348,1986,,
1,10000.0,013007,1986-08-29,3.0,0.010786,0.011960,-0.615385,-0.615385,1986,8,1985-10-31,1986.0,-0.343,-28.678930,1986,,
2,10000.0,013007,1986-09-30,3.0,0.004149,0.016330,-0.057143,-0.057143,1986,9,1985-10-31,1986.0,-0.343,-21.004287,1986,,
3,10000.0,013007,1986-10-31,3.0,0.003912,0.015172,-0.242424,-0.242424,1986,10,1985-10-31,1986.0,-0.343,-22.607435,1986,,
4,10000.0,013007,1986-11-28,3.0,0.003002,0.011794,0.060000,0.060000,1986,11,1985-10-31,1986.0,-0.343,-29.082884,1986,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2764056,93436.0,184996,2023-08-31,3.0,848.821184,650.887660,-0.034962,-0.034962,2023,8,2022-12-31,2023.0,44704.000,68.681591,2023,,
2764057,93436.0,184996,2023-09-29,3.0,819.144340,657.505914,-0.030456,-0.030456,2023,9,2022-12-31,2023.0,44704.000,67.990263,2023,,
2764058,93436.0,184996,2023-10-31,3.0,795.449380,520.781202,-0.197346,-0.197346,2023,10,2022-12-31,2023.0,44704.000,85.840272,2023,,
2764059,93436.0,184996,2023-11-30,3.0,638.454494,646.356919,0.195379,0.195379,2023,11,2022-12-31,2023.0,44704.000,69.163025,2023,,


In [14]:
#june_data = data[data['date'].dt.month == 6]
# Sort by ME, BE/ME and assign deciles
def compute_factors(group):
    market_cap_breakpoints = group['lag_mkt_cap'].quantile(0.5).tolist()
    beme_breakpoints = group['BtM'].quantile([0.3, 0.7]).tolist()
    group['size_decile'] = pd.cut(group['lag_mkt_cap'], bins=[0] + [market_cap_breakpoints] + [float('inf')], labels=['Small','Big'])
    group['beme_decile'] = pd.cut(group['BtM'], bins=[0] + beme_breakpoints + [float('inf')], labels=['Low','Medium','High'])
    group['decile'] = group['size_decile'].astype('str') + '_' + group['beme_decile'].astype('str')
    return group

# 应用每年的断点
june_data = june_data.groupby(june_data['date'].dt.year).apply(compute_factors)

data = data.merge(june_data[['permno', 'port_year', 'decile']], on=['permno', 'port_year'], how='left')
data

Unnamed: 0,permno,gvkey,date,exchcd,lag_mkt_cap,lag_mkt_cap_permco,ret,retx,Year,Month,datadate,fyear,BE,BtM,port_year,size_decile,beme_decile,decile
0,10000.0,013007,1986-07-31,3.0,0.011735,0.016100,-0.080808,-0.080808,1986,7,1985-10-31,1986.0,-0.343,-21.304348,1986,,,
1,10000.0,013007,1986-08-29,3.0,0.010786,0.011960,-0.615385,-0.615385,1986,8,1985-10-31,1986.0,-0.343,-28.678930,1986,,,
2,10000.0,013007,1986-09-30,3.0,0.004149,0.016330,-0.057143,-0.057143,1986,9,1985-10-31,1986.0,-0.343,-21.004287,1986,,,
3,10000.0,013007,1986-10-31,3.0,0.003912,0.015172,-0.242424,-0.242424,1986,10,1985-10-31,1986.0,-0.343,-22.607435,1986,,,
4,10000.0,013007,1986-11-28,3.0,0.003002,0.011794,0.060000,0.060000,1986,11,1985-10-31,1986.0,-0.343,-29.082884,1986,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2764056,93436.0,184996,2023-08-31,3.0,848.821184,650.887660,-0.034962,-0.034962,2023,8,2022-12-31,2023.0,44704.000,68.681591,2023,,,
2764057,93436.0,184996,2023-09-29,3.0,819.144340,657.505914,-0.030456,-0.030456,2023,9,2022-12-31,2023.0,44704.000,67.990263,2023,,,
2764058,93436.0,184996,2023-10-31,3.0,795.449380,520.781202,-0.197346,-0.197346,2023,10,2022-12-31,2023.0,44704.000,85.840272,2023,,,
2764059,93436.0,184996,2023-11-30,3.0,638.454494,646.356919,0.195379,0.195379,2023,11,2022-12-31,2023.0,44704.000,69.163025,2023,,,


In [15]:
# 计算每个组合每月的价值加权回报
def weighted_return(group):
    # 确保计算加权回报之前，group非空
    if not group.empty:
        return (group['ret'] * group['lag_mkt_cap']).sum() / group['lag_mkt_cap'].sum()
    else:
        return None

In [16]:
size = data.groupby(['Year', 'Month', 'size_decile']).apply(weighted_return).reset_index(name='Size_Ret').rename(columns = {'size_decile':'port'})
btm = data.groupby(['Year', 'Month', 'beme_decile']).apply(weighted_return).reset_index(name='BtM_Ret').rename(columns = {'beme_decile':'port'})
results = pd.merge(size, btm, on = ['Year', 'Month', 'port'])
results = results[(results.Year >= 1973) & (results.Year <= 2023)]

# 应用加权回报计算
weighted_returns = data.groupby(['Year', 'Month', 'decile']).apply(weighted_return).reset_index(name='weighted_return')

# 重新将加权回报合并到原始DataFrame
factors = data.merge(weighted_returns, on=['Year', 'Month', 'decile'])

# 计算SMB和HML因子
def factor_returns(group):
    # 计算SMB
    smb = (group[group['decile'].isin(['Small_High','Small_Medium','Small_Low'])]['weighted_return'].mean() - 
           group[group['decile'].isin(['Big_High','Big_Medium','Big_Low'])]['weighted_return'].mean())
    # 计算HML
    hml = (group[group['decile'].isin(['Small_High','Big_High'])]['weighted_return'].mean() - 
           group[group['decile'].isin(['Small_Low','Big_Low'])]['weighted_return'].mean())
    return pd.Series({'SMB': smb, 'HML': hml})

# 按年月分组计算SMB和HML
factors = factors.groupby(['Year', 'Month']).apply(factor_returns).reset_index()
factors = factors[(factors.Year >= 1973) & (factors.Year <= 2023)].reset_index(drop=True)
results = pd.merge(results, factors, on = ['Year','Month'])
results

Unnamed: 0,Year,Month,port,Size_Ret,BtM_Ret,SMB,HML
0,1973,1,1,-0.028980,-0.006438,-0.028095,-0.002324
1,1973,1,2,-0.029299,-0.061764,-0.028095,-0.002324
2,1973,1,3,-0.049119,-0.052442,-0.028095,-0.002324
3,1973,1,4,-0.067755,-0.065624,-0.028095,-0.002324
4,1973,1,5,-0.053580,-0.002782,-0.028095,-0.002324
...,...,...,...,...,...,...,...
6115,2023,12,6,0.105329,0.036930,0.063225,0.050498
6116,2023,12,7,0.084715,0.058699,0.063225,0.050498
6117,2023,12,8,0.088657,0.065975,0.063225,0.050498
6118,2023,12,9,0.059157,0.105723,0.063225,0.050498


In [17]:
factors['decile'].value_counts()

KeyError: 'decile'

In [18]:
factors = results

## Q2
For each size decile and the long-short portfolio, report the annualized average excess returns,
annualized volatility, Sharpe Ratio, and skewness. Report these statistics for both the data from
French’s website (original) and our replication. Also report the correlation between the portfolios
that you have constructed (the 10 portfolios and the long-short portfolio) and those from French’s
website.

In [19]:
ME_data = pandas_datareader.famafrench.FamaFrenchReader('Portfolios_Formed_on_ME',start='1900', end=str(datetime.datetime.now().year+1))
ME_data = ME_data.read()[0][['Lo 10', 'Dec 2', 'Dec 3', 'Dec 4', 'Dec 5', 'Dec 6', 'Dec 7', 'Dec 8', 'Dec 9', 'Hi 10']] / 100
ME_data.columns = range(1,11)
ME_data = ME_data['1973':'2023']
ME_data

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1973-01,-0.0283,-0.0505,-0.0510,-0.0725,-0.0617,-0.0623,-0.0648,-0.0503,-0.0678,-0.0056
1973-02,-0.0806,-0.0882,-0.0841,-0.0760,-0.0808,-0.0749,-0.0596,-0.0591,-0.0366,-0.0329
1973-03,-0.0242,-0.0292,-0.0236,-0.0235,-0.0297,-0.0277,-0.0240,-0.0284,-0.0132,0.0028
1973-04,-0.0722,-0.0766,-0.0697,-0.0782,-0.0577,-0.0640,-0.0743,-0.0576,-0.0517,-0.0384
1973-05,-0.0936,-0.0909,-0.0856,-0.0822,-0.0685,-0.0613,-0.0606,-0.0466,-0.0234,-0.0068
...,...,...,...,...,...,...,...,...,...,...
2023-08,-0.0801,-0.0827,-0.0592,-0.0566,-0.0579,-0.0489,-0.0235,-0.0300,-0.0443,-0.0102
2023-09,-0.0618,-0.0684,-0.0638,-0.0623,-0.0646,-0.0555,-0.0577,-0.0494,-0.0500,-0.0459
2023-10,-0.0767,-0.0830,-0.0701,-0.0694,-0.0627,-0.0546,-0.0606,-0.0611,-0.0425,-0.0163
2023-11,0.0588,0.0860,0.0887,0.0943,0.1063,0.0979,0.0937,0.0954,0.0958,0.0920


In [20]:
data2 = pandas_datareader.famafrench.FamaFrenchReader('F-F_Research_Data_Factors',start='1900', end=str(datetime.datetime.now().year+1))
french = data2.read()[0] / 100 # Monthly data
french = french['1973':'2023']
ME_data = ME_data.merge(french[['RF']], left_index = True, right_index = True)

In [21]:
french = french.reset_index()
french['Year'] = french['Date'].dt.year
french['Month'] = french['Date'].dt.month
factors = factors.merge(french[['Year','Month','RF']], on = ['Year','Month'])

In [22]:
# 计算年化回报、波动性、夏普比率（基于无风险利率假设为0），以及偏度
factors['excess_Ret'] = factors['Size_Ret'] - factors['RF']
annual_ret = factors.groupby('port')['excess_Ret'].mean() * 12 * 100  # 假设月度回报，所以乘以12得到年化回报
volatility = factors.groupby('port')['Size_Ret'].std() * np.sqrt(12) * 100  # 年化标准差
sharpe_ratio = annual_ret / volatility  # 假设无风险利率为0
skewness = factors.groupby('port')['Size_Ret'].apply(pd.Series.skew)

# 合并统计数据
stats = pd.DataFrame({
    'ret': annual_ret,
    'vol': volatility,
    'SR': sharpe_ratio,
    'skewness': skewness
}).T

stats.loc['ret','1-10'] = stats.loc['ret',1] - stats.loc['ret',10]
stats.loc['vol','1-10'] = (factors[factors.port==1]['Size_Ret'].reset_index(drop=True) - factors[factors.port==10]['Size_Ret'].reset_index(drop=True)).std() * np.sqrt(12) * 100
stats.loc['SR','1-10'] = stats.loc['ret','1-10'] / stats.loc['vol','1-10']
stats.loc['skewness','1-10'] = (factors[factors.port==1]['Size_Ret'].reset_index(drop=True) - factors[factors.port==10]['Size_Ret'].reset_index(drop=True)).skew()

# 计算Ken French数据的年化回报和其他统计量
fama_volatility = ME_data.iloc[:,:-1].std() * np.sqrt(12) * 100
fama_skewness = ME_data.iloc[:,:-1].apply(pd.Series.skew)
fama_stats = pd.DataFrame({
    'skewness': fama_skewness,
    'vol': fama_volatility}).T

for i in range(10):
    fama_stats.loc['corr w/ original', i+1] = factors[factors['port'] == i+1]['Size_Ret'].reset_index(drop=True).corr(ME_data[i+1].reset_index(drop=True))
    ME_data.iloc[:,i] -= ME_data.iloc[:,-1]
fama_annual_ret = ME_data.iloc[:,:-1].mean() * 12 * 100
fama_sharpe_ratio = fama_annual_ret / fama_volatility


# 合并Ken French的统计数据
fama_stats.loc['ret'] = fama_annual_ret
fama_stats.loc['SR'] = fama_sharpe_ratio
fama_stats.loc['ret','1-10'] = fama_stats.loc['ret',1] - fama_stats.loc['ret',10]
fama_stats.loc['vol','1-10'] = (ME_data[1] - ME_data[10]).std() * np.sqrt(12) * 100
fama_stats.loc['SR','1-10'] = fama_stats.loc['ret','1-10'] / fama_stats.loc['vol','1-10']
fama_stats.loc['skewness','1-10'] = (ME_data[1] - ME_data[10]).skew()
fama_stats.loc['corr w/ original','1-10'] = ((ME_data[1] - ME_data[10]).reset_index(drop=True)).corr(factors[factors.port==1]['Size_Ret'].reset_index(drop=True) - factors[factors.port==10]['Size_Ret'].reset_index(drop=True))

fama_stats = fama_stats.iloc[[3,1,4,0,2]]

In [23]:
stats

port,1,2,3,4,5,6,7,8,9,10,1-10
ret,11.74708,11.10606,10.488843,9.881812,10.137833,9.21904,8.928289,8.668584,7.958796,5.758759,5.988321
vol,24.722416,24.816133,21.721068,20.815789,19.818513,18.418038,18.346632,17.502481,16.061442,14.863013,18.59703
SR,0.475159,0.447534,0.482888,0.474727,0.511534,0.500544,0.486645,0.495277,0.495522,0.387456,0.322004
skewness,-0.094188,0.452534,-0.529754,-0.620072,-0.523084,-0.544801,-0.50197,-0.541425,-0.482182,-0.313789,0.649594


In [24]:
fama_stats

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,1-10
ret,8.288431,8.894314,9.357255,8.714118,9.242745,8.543137,8.890392,8.539608,8.02,6.67,1.618431
vol,21.727134,22.57111,21.35558,20.70151,20.208252,18.80242,18.739802,17.983771,16.638002,15.372801,16.266463
SR,0.381478,0.394057,0.438164,0.420941,0.457375,0.454364,0.474412,0.474851,0.482029,0.433883,0.099495
skewness,-0.148741,-0.238932,-0.471417,-0.506797,-0.443268,-0.518309,-0.472055,-0.480349,-0.440567,-0.333696,0.799695
corr w/ original,0.910841,0.899675,0.953108,0.956032,0.957644,0.964606,0.962108,0.971666,0.977444,0.976675,0.851911


## Q3
For each book-to-market decile and the long-short portfolio, report the annualized average
excess returns, annualized volatility, Sharpe Ratio, and skewness. Report these statistics for both
the data from French’s website (original) and our replication. Also report the correlation between
the portfolios that you have constructed (the 10 portfolios and the long-short portfolio) and those
from French’s website.

In [25]:
BM_data = pandas_datareader.famafrench.FamaFrenchReader('Portfolios_Formed_on_BE-ME',start='1900', end=str(datetime.datetime.now().year+1))
BM_data = BM_data.read()[0][['Lo 10', 'Dec 2', 'Dec 3', 'Dec 4', 'Dec 5', 'Dec 6', 'Dec 7', 'Dec 8', 'Dec 9', 'Hi 10']] / 100
BM_data.columns = range(1,11)
BM_data = BM_data['1973':'2023']
BM_data

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1973-01,-0.0121,-0.0568,-0.0517,-0.0451,-0.0589,-0.0037,-0.0015,-0.0045,-0.0397,-0.0461
1973-02,-0.0240,-0.0544,-0.0435,-0.0482,-0.0551,-0.0489,-0.0575,-0.0553,-0.0412,-0.0388
1973-03,-0.0133,-0.0040,-0.0223,-0.0119,-0.0173,0.0186,0.0098,0.0105,-0.0071,0.0238
1973-04,-0.0651,-0.0621,-0.0518,-0.0385,-0.0464,-0.0121,-0.0241,-0.0087,-0.0315,-0.0443
1973-05,0.0010,-0.0293,-0.0392,-0.0201,-0.0232,-0.0160,-0.0459,-0.0266,-0.0561,-0.0683
...,...,...,...,...,...,...,...,...,...,...
2023-08,-0.0090,-0.0129,-0.0099,-0.0270,-0.0230,-0.0402,-0.0344,-0.0400,-0.0596,-0.0690
2023-09,-0.0589,-0.0500,-0.0446,-0.0446,-0.0095,-0.0466,-0.0287,-0.0407,-0.0275,-0.0576
2023-10,-0.0142,-0.0129,-0.0498,-0.0572,-0.0369,-0.0282,-0.0341,-0.0396,-0.0439,-0.0723
2023-11,0.1108,0.0758,0.0757,0.0714,0.0666,0.0876,0.0888,0.0890,0.1358,0.1282


In [26]:
data2 = pandas_datareader.famafrench.FamaFrenchReader('F-F_Research_Data_Factors',start='1900', end=str(datetime.datetime.now().year+1))
french = data2.read()[0] / 100 # Monthly data
french = french['1973':'2023']
BM_data = BM_data.merge(french[['RF']], left_index = True, right_index = True)

In [27]:
# Calculate annualized returns, volatility, Sharpe ratio (based on the risk-free rate assumption of zero), and skewness
factors['excess_Ret'] = factors['BtM_Ret'] - factors['RF']
annual_ret = factors.groupby('port')['excess_Ret'].mean() * 12 * 100
volatility = factors.groupby('port')['BtM_Ret'].std() * np.sqrt(12) * 100
sharpe_ratio = annual_ret / volatility
skewness = factors.groupby('port')['BtM_Ret'].apply(pd.Series.skew)

# 合并统计数据
stats = pd.DataFrame({
    'ret': annual_ret,
    'vol': volatility,
    'SR': sharpe_ratio,
    'skewness': skewness
}).T

stats.loc['ret','10-1'] = stats.loc['ret',10] - stats.loc['ret',1]
stats.loc['vol','10-1'] = (factors[factors.port==10]['BtM_Ret'].reset_index(drop=True) - factors[factors.port==1]['BtM_Ret'].reset_index(drop=True)).std() * np.sqrt(12) * 100
stats.loc['SR','10-1'] = stats.loc['ret','10-1'] / stats.loc['vol','10-1']
stats.loc['skewness','10-1'] = (factors[factors.port==10]['BtM_Ret'].reset_index(drop=True) - factors[factors.port==1]['BtM_Ret'].reset_index(drop=True)).skew()


# 计算Ken French数据的年化回报和其他统计量
fama_volatility = BM_data.iloc[:,:-1].std() * np.sqrt(12) * 100
fama_skewness = BM_data.iloc[:,:-1].apply(pd.Series.skew)
fama_stats = pd.DataFrame({
    'skewness': fama_skewness,
    'vol': fama_volatility}).T

for i in range(10):
    fama_stats.loc['corr w/ original', i+1] = factors[factors['port'] == i+1]['BtM_Ret'].reset_index(drop=True).corr(BM_data[i+1].reset_index(drop=True))
    BM_data.iloc[:,i] -= BM_data.iloc[:,-1]
fama_annual_ret = BM_data.iloc[:,:-1].mean() * 12 * 100
fama_sharpe_ratio = fama_annual_ret / fama_volatility


# 合并Ken French的统计数据
fama_stats.loc['ret'] = fama_annual_ret
fama_stats.loc['SR'] = fama_sharpe_ratio
fama_stats.loc['ret','10-1'] = fama_stats.loc['ret',10] - fama_stats.loc['ret',1]
fama_stats.loc['vol','10-1'] = (BM_data[10] - BM_data[1]).std() * np.sqrt(12) * 100
fama_stats.loc['SR','10-1'] = fama_stats.loc['ret','10-1'] / fama_stats.loc['vol','10-1']
fama_stats.loc['skewness','10-1'] = (BM_data[10] - BM_data[1]).skew()
fama_stats.loc['corr w/ original','10-1'] = ((BM_data[10] - BM_data[1]).reset_index(drop=True)).corr(factors[factors.port==10]['BtM_Ret'].reset_index(drop=True) - factors[factors.port==1]['BtM_Ret'].reset_index(drop=True))

fama_stats = fama_stats.iloc[[3,1,4,0,2]]

In [28]:
stats

port,1,2,3,4,5,6,7,8,9,10,10-1
ret,5.979713,5.634289,8.028556,7.599029,7.506937,8.615034,7.160076,8.328208,9.396409,11.069629,5.089917
vol,16.718186,15.719399,15.83442,16.289413,16.324139,16.889097,16.24887,17.074515,18.265406,22.177368,17.280542
SR,0.357677,0.358429,0.507032,0.466501,0.459867,0.510094,0.440651,0.487757,0.514437,0.499141,0.294546
skewness,-0.096797,-0.407093,-0.370326,-0.579521,-0.189199,-0.331016,-0.406331,-0.623239,-0.375271,-0.182665,0.61373


In [29]:
fama_stats

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,10-1
ret,6.272549,8.013529,7.933922,8.051765,7.636863,8.75549,7.122549,8.726078,10.750588,11.080588,4.808039
vol,18.229525,16.646583,16.338495,16.542076,15.731457,16.326922,16.629107,17.407296,18.386122,22.212019,17.55601
SR,0.344087,0.481392,0.485597,0.486745,0.485452,0.536261,0.428318,0.501289,0.584712,0.498856,0.273869
skewness,-0.183379,-0.422987,-0.504264,-0.476558,-0.48125,-0.440235,-0.456691,-0.692754,-0.434154,-0.478599,0.048685
corr w/ original,0.946819,0.960312,0.94689,0.948986,0.941475,0.941682,0.943082,0.943836,0.93643,0.962848,0.899557


## Q4
Has the value and size anomaly worked in the past few years? Show some empirical evidence.

## Q5
For both HML and SMB portfolios, report the annualized average excess returns, annualized
volatility, Sharpe Ratio, and skewness. Report these statistics for both the data from Frence's website (original) and our replication. Also, report correlations between the replicated factors
and the factor from French’s website. Have the factors been consistent across time? Show some
empirical evidence.

In [36]:
# Here is an alternative way using wrds package (FF3 factors only...)
FF3 = conn.get_table(library='ff', table='factors_monthly')
FF3 = FF3[['date','mktrf','smb','hml','rf']]
FF3['mkt'] = FF3['mktrf'] + FF3['rf']
FF3['date']=FF3['date']+MonthEnd(0)
FF3['date'] = pd.to_datetime(FF3['date'])
FF3 = FF3[(FF3['date']>='1973')&(FF3['date']<='2024')].reset_index(drop=True)
FF3.iloc[:,1:] = FF3.iloc[:,1:].apply(pd.to_numeric, errors='coerce')
FF3

Unnamed: 0,date,mktrf,smb,hml,rf,mkt
0,1973-01-31,-0.0329,-0.0349,0.0268,0.0044,-0.0285
1,1973-02-28,-0.0485,-0.0387,0.016,0.0041,-0.0444
2,1973-03-31,-0.013,-0.0282,0.0262,0.0046,-0.0084
3,1973-04-30,-0.0568,-0.0385,0.0541,0.0052,-0.0516
4,1973-05-31,-0.0294,-0.063,0.0041,0.0051,-0.0243
...,...,...,...,...,...,...
607,2023-08-31,-0.0239,-0.0316,-0.0106,0.0045,-0.0194
608,2023-09-30,-0.0524,-0.0251,0.0152,0.0043,-0.0481
609,2023-10-31,-0.0319,-0.0388,0.0018,0.0047,-0.0272
610,2023-11-30,0.0884,-0.0002,0.0164,0.0044,0.0928


In [37]:
factors1 = factors[factors.port == 1]

In [45]:
columns = ['HML', 'SMB']
rows = ['ret', 'vol', 'SR', 'skewness', 'corr w/ original']
stats = pd.DataFrame(0, index=rows[:-1], columns=columns)
stats.loc['ret','HML'] = (factors1['HML']).mean() * 12 * 100
stats.loc['vol','HML'] = (factors1['HML']).std() * np.sqrt(12) * 100
stats.loc['SR','HML'] = stats.loc['ret','HML'] / stats.loc['vol','HML']
stats.loc['skewness','HML'] = (factors1['HML']).skew()

stats.loc['ret','SMB'] = (factors1['SMB']).mean() * 12 * 100
stats.loc['vol','SMB'] = (factors1['SMB']).std() * np.sqrt(12) * 100
stats.loc['SR','SMB'] = stats.loc['ret','SMB'] / stats.loc['vol','SMB']
stats.loc['skewness','SMB'] = (factors1['SMB']).skew()
stats

Unnamed: 0,HML,SMB
ret,3.832388,3.539639
vol,10.832576,10.406058
SR,0.353784,0.340152
skewness,0.23033,0.179717


In [44]:
fama_stats = pd.DataFrame(0, index=rows, columns=columns)
fama_stats.loc['ret','HML'] = (FF3['hml']).mean() * 12 * 100
fama_stats.loc['vol','HML'] = (FF3['hml']).std() * np.sqrt(12) * 100
fama_stats.loc['SR','HML'] = fama_stats.loc['ret','HML'] / fama_stats.loc['vol','HML']
fama_stats.loc['skewness','HML'] = (FF3['hml']).skew()
fama_stats.loc['corr w/ original','HML'] = FF3['hml'].reset_index(drop=True).corr(factors1['HML'].reset_index(drop=True))

fama_stats.loc['ret','SMB'] = (FF3['smb']).mean() * 12 * 100
fama_stats.loc['vol','SMB'] = (FF3['smb']).std() * np.sqrt(12) * 100
fama_stats.loc['SR','SMB'] = fama_stats.loc['ret','SMB'] / fama_stats.loc['vol','SMB']
fama_stats.loc['skewness','SMB'] = (FF3['smb']).skew()
fama_stats.loc['corr w/ original','SMB'] = FF3['smb'].reset_index(drop=True).corr(factors1['SMB'].reset_index(drop=True))
fama_stats

Unnamed: 0,HML,SMB
ret,3.581569,1.731961
vol,10.833103,10.602694
SR,0.330613,0.163351
skewness,0.06781,0.456362
corr w/ original,0.76241,0.861105


In [43]:
FF3['smb'].reset_index(drop=True).corr(factors1['SMB'].reset_index(drop=True))

0.8611053007681093

In [46]:
stats

Unnamed: 0,HML,SMB
ret,3.832388,3.539639
vol,10.832576,10.406058
SR,0.353784,0.340152
skewness,0.23033,0.179717


In [47]:
fama_stats

Unnamed: 0,HML,SMB
ret,3.581569,1.731961
vol,10.833103,10.602694
SR,0.330613,0.163351
skewness,0.06781,0.456362
corr w/ original,0.76241,0.861105


## Q6
Compare and contrast using the characteristic portfolios (Fama and French 1992) and the factor portfolios (Fama and French 1993).