## CFM 301 DA4
### Q2
#### Jeongseop Yi (Patrick), j22yi

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm

In [2]:
q2_df = pd.read_sas('ca.sas7bdat')

q2_df['date'] = pd.to_datetime(q2_df['date'])
q2_df['year'] = q2_df['date'].dt.year
q2_df['month'] = q2_df['date'].dt.month
q2_df['yyyymm'] = q2_df['year'].astype(str) + q2_df['month'].astype(str).str.zfill(2)

q2_df = q2_df[(q2_df['yyyymm'] >= '200001') & (q2_df['yyyymm'] <= '202111')]

ff4_df = pd.read_sas('ff4.sas7bdat')
ff4_df.columns = map(str.lower, ff4_df.columns)
ff4_df['dateff'] = pd.to_datetime(ff4_df['dateff'])
ff4_df['year'] = ff4_df['dateff'].dt.year
ff4_df['month'] = ff4_df['dateff'].dt.month
ff4_df['yyyymm'] = ff4_df['year'].astype(str) + ff4_df['month'].astype(str).str.zfill(2)
ff4_df = ff4_df[(ff4_df['yyyymm'] >= '200001') & (ff4_df['yyyymm'] <= '202111')]

In [3]:
def get_monthly_returns_quintile(df, col):
    df[col + '_q'] = df.groupby('yyyymm')[col].transform(lambda x: pd.qcut(x, 5, labels=False))
    df[col + '_q_ret'] = df.groupby(['yyyymm', col + '_q'])['ret_t1'].transform('mean')
    df[col + '_q_avg'] = df.groupby(['yyyymm', col + '_q'])[col].transform('mean')
    # create data frame with monthly returns for each quintile
    df_q_ret = pd.DataFrame(columns=['1', '2', '3', '4', '5'])
    for i in range(1, 6):
        df_q_ret[str(i)] = df[df[col + '_q'] == i - 1].groupby('yyyymm')[col + '_q_ret'].mean()
        df_q_ret[str(i) + '_beta'] = df[df[col + '_q'] == i - 1].groupby('yyyymm')[col].mean()
    df_q_ret.reset_index(inplace=True)
    return df, df_q_ret

In [4]:
factors = ['beta_winsorized']
factors_quinlst = []
for factor in factors:
    q2_df, factor_ret = get_monthly_returns_quintile(q2_df, factor)
    factor_ret = pd.merge(factor_ret, ff4_df, on='yyyymm', how='left')
    factors_quinlst.append(factor_ret)


In [5]:
for df in factors_quinlst:
    df['port'] = (df['1'] - df['rf']) / df['1_beta'] - (df['5'] - df['rf']) / df['5_beta']

In [6]:
factors_q5_q1 = pd.DataFrame(columns=['yyyymm'].append(factors))
factors_q5_q1['yyyymm'] = factors_quinlst[0]['yyyymm']
for i in range(len(factors)):
    factors_q5_q1[factors[i]] = factors_quinlst[i]['port']

factors_q5_q1 = pd.merge(factors_q5_q1, ff4_df, on='yyyymm', how='left')

In [7]:
summary_df = pd.DataFrame(columns=['index'].append(factors))
summary_df['index'] = ['overall return', 'excess return',
                       'CAPM alpha', 'CAPM t-stat', 'FF4 alpha', 'FF4 t-stat', 'Sharpe']
summary_df.set_index('index', inplace=True)
for factor in factors:
    mkt = sm.add_constant(factors_q5_q1['mktrf'])
    model = sm.OLS(factors_q5_q1[factor] - factors_q5_q1['rf'], mkt).fit()
    model_alpha = model.params[0]
    model_tstat = model.tvalues[0]

    ff4 = sm.add_constant(factors_q5_q1[['mktrf', 'smb', 'hml', 'umd']])
    model_ff4 = sm.OLS(factors_q5_q1[factor] - factors_q5_q1['rf'], ff4).fit()
    model_ff4_alpha = model_ff4.params[0]
    model_ff4_tstat = model_ff4.tvalues[0]

    summary_df[factor] = [factors_q5_q1[factor].mean(),
                          (factors_q5_q1[factor] - factors_q5_q1['rf'] -
                           factors_q5_q1['mktrf']).mean(),
                          model_alpha,
                          model_tstat,
                          model_ff4_alpha,
                          model_ff4_tstat,
                          (factors_q5_q1[factor] - factors_q5_q1['rf']).mean() / (factors_q5_q1[factor] - factors_q5_q1['rf']).std() * np.sqrt(12)]

display(summary_df.T)

index,overall return,excess return,CAPM alpha,CAPM t-stat,FF4 alpha,FF4 t-stat,Sharpe
beta_winsorized,0.026965,0.019756,0.024629,3.548065,0.025634,3.664855,0.797607


In [8]:
with pd.ExcelWriter("../DA4_data.xlsx", mode='a', engine="openpyxl", if_sheet_exists='replace') as writer:
    q2_df.to_excel(writer, sheet_name="q2", index=False)