## CFM 301 DA4
### Q1
#### Jeongseop Yi (Patrick), j22yi

In [14]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm

In [15]:
q1_df = pd.read_sas('ca.sas7bdat')

q1_df['date'] = pd.to_datetime(q1_df['date'])
q1_df['year'] = q1_df['date'].dt.year
q1_df['month'] = q1_df['date'].dt.month
q1_df['yyyymm'] = q1_df['year'].astype(str) + q1_df['month'].astype(str).str.zfill(2)

q1_df = q1_df[(q1_df['yyyymm'] >= '200001') & (q1_df['yyyymm'] <= '202111')]

#### a)

In [16]:
def get_monthly_returns_quintile(df, col):
    df[col + '_q'] = df.groupby('yyyymm')[col].transform(lambda x: pd.qcut(x, 5, labels=False))
    df[col + '_q_ret'] = df.groupby(['yyyymm', col + '_q'])['ret_t1'].transform('mean')
    # create data frame with monthly returns for each quintile
    df_q_ret = pd.DataFrame(columns=['1', '2', '3', '4', '5'])
    for i in range(1, 6):
        df_q_ret[str(i)] = df[df[col + '_q'] == i - 1].groupby('yyyymm')[col + '_q_ret'].mean()
    df_q_ret = df_q_ret.reset_index()
    with pd.ExcelWriter("../DA4_data.xlsx", mode='a', engine="openpyxl", if_sheet_exists='replace') as writer:
        df_q_ret.to_excel(writer, sheet_name='q1_' + col + '_ret', index=False)
    return df, df_q_ret

In [17]:
# factors
factors = ['lnSize_winsorized', 'bk2mkt_winsorized', 'ep1_winsorized', 'beta_winsorized', 'ivol_winsorized', 'mom_winsorized']
factors_quinlst = []
q1_df.set_index('yyyymm', inplace=True)
for factor in factors:
    q1_df, factor_ret = get_monthly_returns_quintile(q1_df, factor)
    factors_quinlst.append(factor_ret)
q1_df.reset_index(inplace=True)

b)

In [18]:
# lnSize q1 - q5
factors_quinlst[0]['port'] = factors_quinlst[0]['1'] - factors_quinlst[0]['5']

# bk2mkt q5 - q1
factors_quinlst[1]['port'] = factors_quinlst[1]['5'] - factors_quinlst[1]['1']

# ep1 q1 - q5
factors_quinlst[2]['port'] = factors_quinlst[2]['1'] - factors_quinlst[2]['5']

# beta q1 - q5
factors_quinlst[3]['port'] = factors_quinlst[3]['1'] - factors_quinlst[3]['5']

# ivol q1 - q5
factors_quinlst[4]['port'] = factors_quinlst[4]['1'] - factors_quinlst[4]['5']

# mom q5 - q1
factors_quinlst[5]['port'] = factors_quinlst[5]['5'] - factors_quinlst[5]['1']

In [19]:
factors_q5_q1 = pd.DataFrame(columns=['yyyymm'].append(factors))
factors_q5_q1['yyyymm'] = factors_quinlst[0]['yyyymm']
for i in range(len(factors)):
    factors_q5_q1[factors[i]] = factors_quinlst[i]['port']

In [20]:
ff4_df = pd.read_sas('ff4.sas7bdat')
ff4_df.columns = map(str.lower, ff4_df.columns)
ff4_df['date'] = pd.to_datetime(ff4_df['dateff'])
ff4_df['year'] = ff4_df['date'].dt.year
ff4_df['month'] = ff4_df['date'].dt.month
ff4_df['yyyymm'] = ff4_df['year'].astype(str) + ff4_df['month'].astype(str).str.zfill(2)
ff4_df = ff4_df[(ff4_df['yyyymm'] >= '200001') & (ff4_df['yyyymm'] <= '202111')]

factors_q5_q1 = pd.merge(factors_q5_q1, ff4_df, on='yyyymm', how='left')

In [21]:
summary_df = pd.DataFrame(columns=['index'].append(factors))
summary_df['index'] = ['overall return', 'excess return',
                       'CAPM alpha', 'CAPM t-stat', 'FF4 alpha', 'FF4 t-stat', 'Sharpe']
summary_df.set_index('index', inplace=True)
for factor in factors:
    mkt = sm.add_constant(factors_q5_q1['mktrf'])
    model = sm.OLS(factors_q5_q1[factor] - factors_q5_q1['rf'], mkt).fit()
    model_alpha = model.params[0]
    model_tstat = model.tvalues[0]

    ff4 = sm.add_constant(factors_q5_q1[['mktrf', 'smb', 'hml', 'umd']])
    model_ff4 = sm.OLS(factors_q5_q1[factor] - factors_q5_q1['rf'], ff4).fit()
    model_ff4_alpha = model_ff4.params[0]
    model_ff4_tstat = model_ff4.tvalues[0]

    summary_df[factor] = [factors_q5_q1[factor].mean(),
                          (factors_q5_q1[factor] - factors_q5_q1['rf'] -
                           factors_q5_q1['mktrf']).mean(),
                          model_alpha,
                          model_tstat.round(2),
                          model_ff4_alpha,
                          model_ff4_tstat.round(2),
                          (factors_q5_q1[factor] - factors_q5_q1['rf']).mean() / (factors_q5_q1[factor] - factors_q5_q1['rf']).std() * np.sqrt(12)]

display(summary_df.round(4).T)


index,overall return,excess return,CAPM alpha,CAPM t-stat,FF4 alpha,FF4 t-stat,Sharpe
lnSize_winsorized,0.0266,0.0194,0.0249,5.91,0.0263,6.29,1.3
bk2mkt_winsorized,-0.0019,-0.0091,-0.003,-0.78,-0.0033,-0.85,-0.1775
ep1_winsorized,0.0079,0.0007,0.0052,1.11,0.0068,1.5,0.3055
beta_winsorized,-0.0044,-0.0116,-0.0052,-0.98,-0.0055,-1.03,-0.2285
ivol_winsorized,-0.0156,-0.0228,-0.0163,-3.05,-0.0169,-3.14,-0.6809
mom_winsorized,0.0041,-0.0031,0.0028,0.54,0.0037,0.71,0.1208


In [22]:
with pd.ExcelWriter("../DA4_data.xlsx", mode='a', engine="openpyxl", if_sheet_exists='replace') as writer:
    q1_df.to_excel(writer, sheet_name="q1", index=False)