## CFM 301 DA4
### Q4
#### Jeongseop Yi (Patrick), j22yi

In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

#### a)

In [4]:
q4_df = pd.read_sas('ca.sas7bdat')

q4_df['date'] = pd.to_datetime(q4_df['date'])
q4_df['year'] = q4_df['date'].dt.year
q4_df['month'] = q4_df['date'].dt.month
q4_df['yyyymm'] = q4_df['year'].astype(str) + q4_df['month'].astype(str).str.zfill(2)

q4_df = q4_df[(q4_df['yyyymm'] >= '200001') & (q4_df['yyyymm'] <= '202111')]

ff4_df = pd.read_sas('ff4.sas7bdat')
ff4_df.columns = map(str.lower, ff4_df.columns)
ff4_df['dateff'] = pd.to_datetime(ff4_df['dateff'])
ff4_df['year'] = ff4_df['dateff'].dt.year
ff4_df['month'] = ff4_df['dateff'].dt.month
ff4_df['yyyymm'] = ff4_df['year'].astype(str) + ff4_df['month'].astype(str).str.zfill(2)
ff4_df = ff4_df[(ff4_df['yyyymm'] >= '200001') & (ff4_df['yyyymm'] <= '202111')]

In [5]:
def get_monthly_returns_quintile(df, col):
    df[col + '_q'] = df.groupby('yyyymm')[col].transform(lambda x: pd.qcut(x, 5, labels=False))
    df[col + '_q_ret'] = df.groupby(['yyyymm', col + '_q'])['ret_t1'].transform('mean')
    df[col + '_q_avg'] = df.groupby(['yyyymm', col + '_q'])[col].transform('mean')
    # create data frame with monthly returns for each quintile
    df_q_ret = pd.DataFrame(columns=['1', '2', '3', '4', '5'])
    for i in range(1, 6):
        df_q_ret[str(i)] = df[df[col + '_q'] == i - 1].groupby('yyyymm')[col + '_q_ret'].mean()
        df_q_ret[str(i) + '_beta'] = df[df[col + '_q'] == i - 1].groupby('yyyymm')[col].mean()
    df_q_ret.reset_index(inplace=True)
    return df, df_q_ret

In [6]:
factors = ['beta_winsorized', 'ivol_winsorized']
factors_quinlst = []
for factor in factors:
    q4_df, factor_ret = get_monthly_returns_quintile(q4_df, factor)
    factor_ret = pd.merge(factor_ret, ff4_df, on='yyyymm', how='left')
    factors_quinlst.append(factor_ret)

In [7]:
BAB_IVOL = pd.DataFrame()
BAB_IVOL['yyyymm'] = factors_quinlst[0]['yyyymm']

BAB_IVOL['BAB'] = (factors_quinlst[0]['1'] - factors_quinlst[0]['rf']) / factors_quinlst[0]['1_beta'] - (factors_quinlst[0]['5'] - factors_quinlst[0]['rf']) / factors_quinlst[0]['5_beta']

BAB_IVOL['IVOL'] = (factors_quinlst[1]['1'] - factors_quinlst[1]['5'])

In [8]:
BAB_IVOL['AVG'] = (BAB_IVOL['BAB'] + BAB_IVOL['IVOL']) / 2
BAB_IVOL = BAB_IVOL.merge(ff4_df, on='yyyymm', how='left')

In [9]:
factors = ['AVG']
summary_df = pd.DataFrame(columns=['index'].append(factors))
summary_df['index'] = ['overall return', 'excess return',
                       'CAPM alpha', 'CAPM t-stat', 'FF4 alpha', 'FF4 t-stat', 'Sharpe']
summary_df.set_index('index', inplace=True)
for factor in factors:
    mkt = sm.add_constant(BAB_IVOL['mktrf'])
    model = sm.OLS(BAB_IVOL[factor] - BAB_IVOL['rf'], mkt).fit()
    model_alpha = model.params[0]
    model_tstat = model.tvalues[0]

    ff4 = sm.add_constant(BAB_IVOL[['mktrf', 'smb', 'hml', 'umd']])
    model_ff4 = sm.OLS(BAB_IVOL[factor] - BAB_IVOL['rf'], ff4).fit()
    model_ff4_alpha = model_ff4.params[0]
    model_ff4_tstat = model_ff4.tvalues[0]

    summary_df[factor] = [BAB_IVOL[factor].mean(),
                          (BAB_IVOL[factor] - BAB_IVOL['rf'] -
                           BAB_IVOL['mktrf']).mean(),
                          model_alpha,
                          model_tstat.round(2),
                          model_ff4_alpha,
                          model_ff4_tstat.round(2),
                          (BAB_IVOL[factor] - BAB_IVOL['rf']).mean() / (BAB_IVOL[factor] - BAB_IVOL['rf']).std() * np.sqrt(12)]

display(summary_df.round(4).T)

index,overall return,excess return,CAPM alpha,CAPM t-stat,FF4 alpha,FF4 t-stat,Sharpe
AVG,0.0057,-0.0015,0.0042,0.99,0.0044,1.02,0.2269


#### b)

In [10]:
BAB_IVOL['AVG_fee'] = BAB_IVOL['AVG'] - 0.0005

factors = ['AVG_fee']
summary_df = pd.DataFrame(columns=['index'].append(factors))
summary_df['index'] = ['overall return', 'excess return',
                       'CAPM alpha', 'CAPM t-stat', 'FF3 alpha', 'FF3 t-stat', 'Sharpe']
summary_df.set_index('index', inplace=True)
for factor in factors:
    mkt = sm.add_constant(BAB_IVOL['mktrf'])
    model = sm.OLS(BAB_IVOL[factor] - BAB_IVOL['rf'], mkt).fit()
    model_alpha = model.params[0]
    model_tstat = model.tvalues[0]

    ff4 = sm.add_constant(BAB_IVOL[['mktrf', 'smb', 'hml']])
    model_ff4 = sm.OLS(BAB_IVOL[factor] - BAB_IVOL['rf'], ff4).fit()
    model_ff4_alpha = model_ff4.params[0]
    model_ff4_tstat = model_ff4.tvalues[0]

    summary_df[factor] = [BAB_IVOL[factor].mean(),
                          (BAB_IVOL[factor] - BAB_IVOL['rf'] -
                           BAB_IVOL['mktrf']).mean(),
                          model_alpha,
                          model_tstat.round(2),
                          model_ff4_alpha,
                          model_ff4_tstat.round(2),
                          (BAB_IVOL[factor] - BAB_IVOL['rf']).mean() / (BAB_IVOL[factor] - BAB_IVOL['rf']).std() * np.sqrt(12)]

display(summary_df.round(4).T)

index,overall return,excess return,CAPM alpha,CAPM t-stat,FF3 alpha,FF3 t-stat,Sharpe
AVG_fee,0.0052,-0.002,0.0037,0.87,0.0038,0.9,0.2014


In [11]:
with pd.ExcelWriter("../DA4_data.xlsx", mode='a', engine="openpyxl", if_sheet_exists='replace') as writer:
    q4_df.to_excel(writer, sheet_name="q4", index=False)