## CFM 301 DA4
### Q3
#### Jeongseop Yi (Patrick), j22yi

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm

In [2]:
q3_df = pd.read_sas('ca.sas7bdat')

q3_df['date'] = pd.to_datetime(q3_df['date'])
q3_df['year'] = q3_df['date'].dt.year
q3_df['month'] = q3_df['date'].dt.month
q3_df['yyyymm'] = q3_df['year'].astype(str) + q3_df['month'].astype(str).str.zfill(2)

q3_df = q3_df[(q3_df['yyyymm'] >= '200001') & (q3_df['yyyymm'] <= '202111')]

In [3]:
ff4_df = pd.read_sas('ff4.sas7bdat')
ff4_df.columns = map(str.lower, ff4_df.columns)
ff4_df['date'] = pd.to_datetime(ff4_df['dateff'])
ff4_df['year'] = ff4_df['date'].dt.year
ff4_df['month'] = ff4_df['date'].dt.month
ff4_df['yyyymm'] = ff4_df['year'].astype(str) + ff4_df['month'].astype(str).str.zfill(2)
ff4_df = ff4_df[(ff4_df['yyyymm'] >= '200001') & (ff4_df['yyyymm'] <= '202111')]

#### a)

In [4]:
def get_monthly_returns_quintile(df, col):
    df[col + '_q'] = df.groupby('yyyymm')[col].transform(lambda x: pd.qcut(x, 5, labels=False))
    df[col + '_q_ret'] = df.groupby(['yyyymm', col + '_q'])['ret_t1'].transform('mean')
    # create data frame with monthly returns for each quintile
    df_q_ret = pd.DataFrame(columns=['1', '2', '3', '4', '5'])
    for i in range(1, 6):
        df_q_ret[str(i)] = df[df[col + '_q'] == i - 1].groupby('yyyymm')[col + '_q_ret'].mean()
    df_q_ret = df_q_ret.reset_index()
    return df, df_q_ret

In [5]:
# factors
factors = ['ivol_winsorized']
factors_quinlst = []
q3_df.set_index('yyyymm', inplace=True)
for factor in factors:
    q3_df, factor_ret = get_monthly_returns_quintile(q3_df, factor)
    factors_quinlst.append(factor_ret)
q3_df.reset_index(inplace=True)

factors_quinlst = list(map(lambda x: pd.merge(x, ff4_df, on='yyyymm', how='left'), factors_quinlst))

In [6]:
summary_df = pd.DataFrame(columns=['index'].append(factors))
summary_df['index'] = ['overall return', 'excess return',
                       'CAPM alpha', 'CAPM t-stat', 'FF4 alpha', 'FF4 t-stat', 'Sharpe']

summary_df.set_index('index', inplace=True)
for factor in factors_quinlst:
    for i in range(1, 6):
        mkt = sm.add_constant(factor['mktrf'])
        model = sm.OLS(factor[str(i)] - factor['rf'], mkt).fit()
        model_alpha = model.params[0]
        model_tstat = model.tvalues[0]

        ff4 = sm.add_constant(factor[['mktrf', 'smb', 'hml', 'umd']])
        model_ff4 = sm.OLS(factor[str(i)] - factor['rf'], ff4).fit()
        model_ff4_alpha = model_ff4.params[0]
        model_ff4_tstat = model_ff4.tvalues[0]

        summary_df[str(i)] = [factor[str(i)].mean(),
                              (factor[str(i)] - factor['rf'] -
                              factor['mktrf']).mean(),
                              model_alpha,
                              model_tstat.round(2),
                              model_ff4_alpha,
                              model_ff4_tstat.round(2),
                              (factor[str(i)] - factor['rf']).mean() / (factor[str(i)] - factor['rf']).std() * np.sqrt(12)]

display(summary_df.T)

index,overall return,excess return,CAPM alpha,CAPM t-stat,FF4 alpha,FF4 t-stat,Sharpe
1,0.013985,0.006776,0.012514,4.55,0.01282,4.62,0.999295
2,0.012085,0.004877,0.010495,3.19,0.010822,3.26,0.709194
3,0.0175,0.010291,0.015873,3.93,0.016148,3.99,0.867776
4,0.022115,0.014907,0.021057,4.5,0.021422,4.53,0.96237
5,0.029561,0.022352,0.027499,4.23,0.028457,4.35,0.939054


#### b)

In [7]:
# calculate the turnover for each quintile
def get_turnover(df, col):
    df_q_ret = pd.DataFrame(columns=['1', '2', '3', '4', '5'])
    
    for i in range(1, 6):
        df_q_ret[str(i)] = df[df[col + '_q'] == i - 1].groupby('yyyymm')['permno'].unique()
    
    return df_q_ret

In [8]:
# calculate the turnover for each quintile
permno_df = get_turnover(q3_df, 'ivol_winsorized')

for i in range(1, 6):
    permno_df[str(i) + '_t1'] = permno_df[str(i)].shift(1)
    permno_df[str(i) + '_t1'].values[0] = []
    permno_df[str(i) + '_turnover'] = permno_df.apply(lambda x: len(set(x[str(i)]) - set(x[str(i) + '_t1'])) / len(x[str(i)]), axis=1)

# set first turnover to 0
for i in range(1, 6):
    permno_df[str(i) + '_turnover'].values[0] = np.nan

In [9]:
permno_df.reset_index(inplace=True)
permno_df['year'] = permno_df['yyyymm'].str[:4]

turnover_df = pd.DataFrame()
turnover_df['turnover_q1'] = permno_df.groupby('year')['1_turnover'].mean() * 12
display(turnover_df.round(4))

Unnamed: 0_level_0,turnover_q1
year,Unnamed: 1_level_1
2000,4.8182
2001,3.9423
2002,5.8681
2003,5.4286
2004,4.8571
2005,5.7857
2006,6.3571
2007,6.6333
2008,6.6667
2009,6.8667


In [10]:
with pd.ExcelWriter("../DA4_data.xlsx", mode='a', engine="openpyxl", if_sheet_exists='replace') as writer:
    q3_df.to_excel(writer, sheet_name="q3", index=False)