# $$Empirical \text{ } Finance$$
## Portfolio Returns Assignment: 
### _Stock Split Annoucements_

Rohitashwa Chakraborty _(rc47878)_

In [48]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt

import statsmodels.api as sm
from patsy import dmatrices
import yfinance as yf
from datetime import datetime

from scipy.stats.mstats import gmean

import wrds
connection = wrds.Connection()

from tqdm.notebook import tqdm
tqdm.pandas()

%matplotlib inline
plt.style.use("bmh")

Loading library list...
Done


# Fetch Dataset

## Daily Stock data
- [CRSP Daily Stock - Securities (dsf)](https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/stock-security-files/daily-stock-file/)

In [49]:
daily_returns = pd.read_csv('CRSP_Daily.csv', dtype={'date':'str'}, low_memory = False)#, nrows = 10000)
daily_returns.date = pd.DatetimeIndex(daily_returns.date)
daily_returns = daily_returns[~daily_returns.RET.isin(['B','C'])]
daily_returns = daily_returns[daily_returns.SHRCD.isin([10,11])]
daily_returns = daily_returns[~daily_returns.SICCD.isin(set(range(6000,7000)))]
daily_returns = daily_returns[daily_returns.CFACPR>0]
daily_returns = daily_returns[daily_returns.PRC>0]
daily_returns = daily_returns[daily_returns.SHROUT>0]
daily_returns

Unnamed: 0,PERMNO,date,NAMEENDT,SHRCD,EXCHCD,SICCD,NCUSIP,TICKER,COMNAM,SHRCLS,...,VOL,RET,BID,ASK,SHROUT,CFACPR,CFACSHR,OPENPRC,NUMTRD,RETX
0,10001,2000-03-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1721.0,0.007353,8.43750,8.56250,2450.0,1.5,1.5,8.43750,2.0,0.007353
1,10001,2000-04-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1080.0,-0.014599,8.43750,8.56250,2450.0,1.5,1.5,8.43750,2.0,-0.014599
2,10001,2000-05-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1711.0,0.014815,8.43750,8.56250,2450.0,1.5,1.5,8.43750,5.0,0.014815
3,10001,2000-06-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,580.0,-0.007299,8.43750,8.56250,2450.0,1.5,1.5,8.50000,2.0,-0.007299
4,10001,2000-07-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1406.0,-0.007353,8.43750,8.56250,2450.0,1.5,1.5,8.56250,4.0,-0.007353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37919647,93436,2020-12-24,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,22813214.0,0.024444,661.62000,661.78003,947901.0,1.0,1.0,642.98999,465870.0,0.024444
37919648,93436,2020-12-28,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,32232732.0,0.002901,663.59998,663.77002,947901.0,1.0,1.0,674.51001,631681.0,0.002901
37919649,93436,2020-12-29,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,22867542.0,0.003465,665.82001,665.97998,947901.0,1.0,1.0,661.00000,466884.0,0.003465
37919650,93436,2020-12-30,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,42766263.0,0.043229,694.64001,694.78003,947901.0,1.0,1.0,672.00000,767565.0,0.043229


## Fama French Daily Data

- [kenneth french library](https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html)

In [50]:
def convert_to_date(date):
    try:
        dt = pd.to_datetime(date[:4]+"/"+date[4:6]+"/"+date[6:]).date()
    except:
        print(date)
    return pd.to_datetime(date[:4]+"/"+date[4:6]+"/"+date[6:])#.date()

In [51]:
ff_returns = pd.read_csv('F-F_Research_Data_Factors_daily.csv', dtype={'Date':'str'}).rename({'Mkt-RF': 'MRP'}, axis = 1)
ff_returns.Date = ff_returns.Date.progress_apply(lambda x: convert_to_date(x))
ff_returns  = ff_returns[ff_returns.Date>=pd.to_datetime('1980-01-01')]
ff_returns  = ff_returns[ff_returns.Date<=pd.to_datetime('2020-12-31')]
ff_returns['MRP'] = ff_returns['MRP']/100
ff_returns['SMB'] = ff_returns['SMB']/100
ff_returns['HML'] = ff_returns['HML']/100
ff_returns['RF'] = ff_returns['RF']/100
ff_returns

  0%|          | 0/25105 [00:00<?, ?it/s]

Unnamed: 0,Date,MRP,SMB,HML,RF
14556,1980-01-02,-0.0205,0.0017,0.0108,0.00036
14557,1980-01-03,-0.0073,-0.0090,0.0033,0.00036
14558,1980-01-04,0.0132,0.0060,-0.0040,0.00036
14559,1980-01-07,0.0039,0.0023,0.0004,0.00036
14560,1980-01-08,0.0192,-0.0035,-0.0069,0.00036
...,...,...,...,...,...
24891,2020-12-24,0.0021,-0.0039,-0.0018,0.00000
24892,2020-12-28,0.0046,-0.0073,0.0034,0.00000
24893,2020-12-29,-0.0040,-0.0152,0.0023,0.00000
24894,2020-12-30,0.0027,0.0099,0.0006,0.00000


## Stock Splits Data

- [CRSP Daily Stock Event -Distribution (dsedist)](https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/stock-events/distribution/)

In [52]:
def get_final_date(row: pd.Series):
    dt = None
    row = row.dropna()
    if 'DCLRDT' in row.index:
        dt = row.DCLRDT
        
    if 'RCRDDT' in row.index:
        dt = row.RCRDDT
        # dt = row.PAYDT
        
    if 'PAYDT' in row.index:
        dt = row.PAYDT
        # dt = row.RCRDDT

    if 'EXDT' in row.index:
        dt = row.EXDT
    return dt

In [53]:
stock_splits = pd.read_csv('StockSplits.csv')
stock_splits = stock_splits[stock_splits['DISTCD']==5523]
stock_splits = stock_splits[stock_splits['FACPR']>=0]
stock_splits = stock_splits[stock_splits['FACPR']==stock_splits['FACSHR']]
stock_splits = stock_splits[stock_splits['FACSHR']>=1]
stock_splits['DATE'] = stock_splits.progress_apply(lambda x: get_final_date(x), axis = 1)


stock_splits.DATE = stock_splits.DATE.astype(int)
stock_splits.DATE = stock_splits.DATE.astype(str)
stock_splits.DATE = pd.DatetimeIndex(stock_splits.DATE)
stock_splits = stock_splits[['DATE','PERMNO', 'DISTCD', 'FACPR', 'FACSHR']]
stock_splits

  0%|          | 0/9896 [00:00<?, ?it/s]

Unnamed: 0,DATE,PERMNO,DISTCD,FACPR,FACSHR
1,1994-06-27,10001,5523.0,1.0,1.0
9,1963-09-25,10006,5523.0,1.0,1.0
10,1965-09-21,10006,5523.0,1.0,1.0
14,1995-09-01,10009,5523.0,1.0,1.0
24,1990-01-23,10016,5523.0,2.0,2.0
...,...,...,...,...,...
59804,2019-08-23,93356,5523.0,1.0,1.0
59869,2011-06-28,93423,5523.0,1.0,1.0
59870,2013-06-27,93423,5523.0,1.0,1.0
59872,2013-08-29,93425,5523.0,1.0,1.0


In [54]:
print(type(stock_splits['DATE'].iloc[0]), type(daily_returns['date'].iloc[0]))

<class 'pandas._libs.tslibs.timestamps.Timestamp'> <class 'pandas._libs.tslibs.timestamps.Timestamp'>


# Part 1 

Comparing return on day of announcement and next day vs market.

In [55]:
# Same Day
d1 = pd.merge(stock_splits, daily_returns[['PERMNO','date', 'RET' ]], left_on= ['PERMNO','DATE'],right_on=['PERMNO','date'])
d1 = d1[['PERMNO', 'DISTCD', 'FACPR', 'FACSHR', 'DATE', 'RET']]
d1

Unnamed: 0,PERMNO,DISTCD,FACPR,FACSHR,DATE,RET
0,10026,5523.0,1.0,1.0,2006-01-06,0.001546
1,10078,5523.0,1.0,1.0,2000-12-06,0.004864
2,10104,5523.0,1.0,1.0,2000-01-19,0.026966
3,10104,5523.0,1.0,1.0,2000-10-13,0.130952
4,10107,5523.0,1.0,1.0,2003-02-18,0.033540
...,...,...,...,...,...,...
1236,93312,5523.0,1.0,1.0,2016-06-27,-0.037131
1237,93356,5523.0,1.0,1.0,2019-08-23,-0.032811
1238,93423,5523.0,1.0,1.0,2011-06-28,0.039550
1239,93423,5523.0,1.0,1.0,2013-06-27,-0.010321


In [56]:
# Next Day
d2 = stock_splits.copy()
d2['DATE'] = d2['DATE']+pd.DateOffset(1)
d2 = pd.merge(d2, daily_returns[['PERMNO','date', 'RET' ]], left_on= ['PERMNO','DATE'],right_on=['PERMNO','date'])
d2 = d2[['PERMNO', 'DISTCD', 'FACPR', 'FACSHR', 'DATE', 'RET']]
d2

Unnamed: 0,PERMNO,DISTCD,FACPR,FACSHR,DATE,RET
0,10032,5523.0,1.0,1.0,2000-09-02,0.004191
1,10078,5523.0,1.0,1.0,2000-12-07,0.035343
2,10104,5523.0,1.0,1.0,2000-01-20,0.037199
3,10107,5523.0,1.0,1.0,2003-02-19,-0.014022
4,10108,5523.0,1.0,1.0,2001-06-20,0.024435
...,...,...,...,...,...,...
1119,93038,5523.0,1.0,1.0,2010-02-09,-0.026152
1120,93312,5523.0,1.0,1.0,2016-06-28,0.031002
1121,93423,5523.0,1.0,1.0,2011-06-29,-0.005284
1122,93423,5523.0,1.0,1.0,2013-06-28,-0.009019


In [75]:
final = pd.concat([d1,d2])
final.dropna(inplace=True)
final = pd.merge(final, ff_returns[['Date','MRP']], left_on = ['DATE'],right_on = ['Date'])
final['RET'] = final['RET'].astype(float)
final['MRP'] = final['MRP'].astype(float)
final['excess_return'] = final['RET'].astype(float) - final['MRP'].astype(float)
final

Unnamed: 0,PERMNO,DISTCD,FACPR,FACSHR,DATE,RET,Date,MRP,excess_return
0,10026,5523.0,1.0,1.0,2006-01-06,0.001546,2006-01-06,0.0092,-0.007654
1,10078,5523.0,1.0,1.0,2000-12-06,0.004864,2000-12-06,-0.0178,0.022664
2,83386,5523.0,1.0,1.0,2000-12-06,-0.064169,2000-12-06,-0.0178,-0.046369
3,10104,5523.0,1.0,1.0,2000-01-19,0.026966,2000-01-19,0.0044,0.022566
4,46886,5523.0,1.0,1.0,2000-01-19,-0.015311,2000-01-19,0.0044,-0.019711
...,...,...,...,...,...,...,...,...,...
2277,92854,5523.0,1.0,1.0,2011-08-30,0.046348,2011-08-30,0.0032,0.043148
2278,92874,5523.0,1.0,1.0,2004-11-03,0.001937,2004-11-03,0.0117,-0.009763
2279,92942,5523.0,1.0,1.0,2009-11-18,0.060000,2009-11-18,-0.0013,0.061300
2280,93038,5523.0,1.0,1.0,2010-02-09,-0.026152,2010-02-09,0.0133,-0.039452


In [76]:
# Avg.return
print('Average daily return on stock split is ', round(final.RET.astype(float).mean() * 100, 2 ))
print('Average excess daily return over market on stock split is ', round(final.excess_return.mean()*100,2))
# final.plot(x='FACPR', y='RET', label='Returns vs FACPR', kind = 'scatter')
# final.plot(x='FACSHR', y='RET', label='Returns vs FACSHR', kind = 'scatter')

Average daily return on stock split is  0.36
Average excess daily return over market on stock split is  0.35


# Part 2

Building Portfolio

In [77]:

dates = pd.to_datetime(ff_returns[ff_returns.Date>'1980-01-01'].Date.values)
dates_minus_6months = dates + pd.DateOffset(-180)

stocks_to_consider={}

for date in range(len(dates)):
    temp = stock_splits[['PERMNO','DATE']][stock_splits.DATE <= dates[date]]
    temp = temp[temp.DATE >= dates_minus_6months[date]]
    stocks_to_consider[dates[date]] = temp.PERMNO.values

stock_split_returns = daily_returns[daily_returns.PERMNO.isin(stock_splits.PERMNO)]
stock_split_returns = stock_split_returns[stock_split_returns.date>='1980-01-01']
stock_split_returns

Unnamed: 0,PERMNO,date,NAMEENDT,SHRCD,EXCHCD,SICCD,NCUSIP,TICKER,COMNAM,SHRCLS,...,VOL,RET,BID,ASK,SHROUT,CFACPR,CFACSHR,OPENPRC,NUMTRD,RETX
0,10001,2000-03-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1721.0,0.007353,8.43750,8.56250,2450.0,1.5,1.5,8.43750,2.0,0.007353
1,10001,2000-04-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1080.0,-0.014599,8.43750,8.56250,2450.0,1.5,1.5,8.43750,2.0,-0.014599
2,10001,2000-05-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1711.0,0.014815,8.43750,8.56250,2450.0,1.5,1.5,8.43750,5.0,0.014815
3,10001,2000-06-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,580.0,-0.007299,8.43750,8.56250,2450.0,1.5,1.5,8.50000,2.0,-0.007299
4,10001,2000-07-01,,11.0,3.0,4920,29274A10,EWST,ENERGY WEST INC,,...,1406.0,-0.007353,8.43750,8.56250,2450.0,1.5,1.5,8.56250,4.0,-0.007353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37919647,93436,2020-12-24,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,22813214.0,0.024444,661.62000,661.78003,947901.0,1.0,1.0,642.98999,465870.0,0.024444
37919648,93436,2020-12-28,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,32232732.0,0.002901,663.59998,663.77002,947901.0,1.0,1.0,674.51001,631681.0,0.002901
37919649,93436,2020-12-29,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,22867542.0,0.003465,665.82001,665.97998,947901.0,1.0,1.0,661.00000,466884.0,0.003465
37919650,93436,2020-12-30,,11.0,3.0,9999,88160R10,TSLA,TESLA INC,,...,42766263.0,0.043229,694.64001,694.78003,947901.0,1.0,1.0,672.00000,767565.0,0.043229


In [78]:
stock_split_returns_v2 = stock_split_returns[['PERMNO', 'RET', 'date']].copy()
stock_split_returns_v2.RET = stock_split_returns_v2.RET.astype(float)
stock_split_returns_v2 = pd.pivot_table(data = stock_split_returns_v2, columns = 'PERMNO', values='RET', index = 'date').fillna(0.0)
stock_split_returns_v2

PERMNO,10001,10009,10016,10026,10032,10035,10056,10078,10085,10092,...,92942,92982,93038,93101,93105,93132,93312,93356,93423,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-02,0.030769,0.000000,0.042279,0.000000,0.003360,-0.058333,0.000000,0.027844,0.006329,0.043919,...,0.0,0.0,0.0,0.000000,0.006289,0.000000,0.000000,0.000000,0.000000,0.000000
2000-01-03,0.000000,0.011086,-0.051948,-0.037288,-0.012175,-0.010870,0.012195,0.028215,-0.007143,0.013333,...,0.0,0.0,0.0,0.000000,-0.014706,0.000000,0.000000,0.000000,0.000000,0.000000
2000-01-05,0.019305,0.004869,0.006410,-0.007843,-0.038336,-0.021739,-0.006211,0.000680,0.200000,-0.018970,...,0.0,0.0,0.0,0.000000,0.061644,0.000000,0.000000,0.000000,0.000000,0.000000
2000-01-06,0.000000,0.000000,-0.015385,-0.029412,0.073353,0.214286,0.000000,0.079119,0.000000,0.033784,...,0.0,0.0,0.0,0.000000,0.020548,0.000000,0.000000,0.000000,0.000000,0.000000
2000-01-08,0.031746,-0.005703,-0.042208,-0.025210,-0.101939,0.000000,0.000000,-0.020154,0.000000,-0.003289,...,0.0,0.0,0.0,0.000000,-0.025641,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-24,0.000000,0.000000,0.000000,0.001727,0.002702,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,-0.012661,0.000000,-0.002995,0.002938,-0.003163,-0.011457,0.024444
2020-12-28,0.000000,0.000000,0.000000,-0.002618,0.006928,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,-0.004003,0.000000,-0.003338,0.009625,-0.018129,0.001486,0.002901
2020-12-29,0.000000,0.000000,0.000000,-0.013637,-0.015800,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,-0.002449,0.000000,-0.011789,-0.006770,-0.000462,-0.025816,0.003465
2020-12-30,0.000000,0.000000,0.000000,0.000584,0.005308,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,0.0,0.010451,0.000000,0.003321,0.009320,0.006742,0.044776,0.043229


## Equal Weighted

In [79]:
portfolio_return = pd.DataFrame(columns = ['eq_wght_return'], index = stock_split_returns_v2.index).fillna(0.0)

for i in portfolio_return.index:
    portfolio_return.loc[i,'eq_wght_return'] = sum(stock_split_returns_v2.loc[i])/len(stock_split_returns_v2.loc[i]>0)

portfolio_return.reset_index()

ff_market_reg = pd.merge(left = ff_returns, right = portfolio_return.reset_index(), left_on = ['Date'], right_on = ['date'] ).fillna(1.0)[['Date','MRP','eq_wght_return','RF','SMB','HML']]
ff_market_reg['eq_wght_return_premium'] = ff_market_reg['eq_wght_return'] - ff_market_reg['RF']
ff_market_reg.head()

Unnamed: 0,Date,MRP,eq_wght_return,RF,SMB,HML,eq_wght_return_premium
0,2000-01-03,-0.0071,0.01014,0.00021,0.0062,-0.0142,0.00993
1,2000-01-05,-0.0009,0.014646,0.00021,0.002,0.0016,0.014436
2,2000-01-06,-0.0073,0.018967,0.00021,-0.0044,0.0126,0.018757
3,2000-01-11,-0.0171,7e-05,0.00021,0.0025,0.0081,-0.00014
4,2000-01-12,-0.0069,0.016247,0.00021,-0.0049,0.0078,0.016037


### CAPM

In [80]:
y, X = dmatrices('eq_wght_return_premium ~ MRP', data=ff_market_reg, return_type='dataframe')
capm_reg = sm.OLS(y, X).fit()
capm_reg.summary()

0,1,2,3
Dep. Variable:,eq_wght_return_premium,R-squared:,0.436
Model:,OLS,Adj. R-squared:,0.436
Method:,Least Squares,F-statistic:,3612.0
Date:,"Thu, 02 Dec 2021",Prob (F-statistic):,0.0
Time:,16:18:01,Log-Likelihood:,17342.0
No. Observations:,4674,AIC:,-34680.0
Df Residuals:,4672,BIC:,-34670.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0002,8.66e-05,2.179,0.029,1.9e-05,0.000
MRP,0.4108,0.007,60.098,0.000,0.397,0.424

0,1,2,3
Omnibus:,761.591,Durbin-Watson:,1.917
Prob(Omnibus):,0.0,Jarque-Bera (JB):,14869.302
Skew:,0.026,Prob(JB):,0.0
Kurtosis:,11.738,Cond. No.,78.9


### Fama-French

In [81]:
y, X = dmatrices('eq_wght_return_premium ~ MRP + SMB + HML', data=ff_market_reg, return_type='dataframe')
ff_reg = sm.OLS(y, X).fit()
ff_reg.summary()

0,1,2,3
Dep. Variable:,eq_wght_return_premium,R-squared:,0.466
Model:,OLS,Adj. R-squared:,0.466
Method:,Least Squares,F-statistic:,1359.0
Date:,"Thu, 02 Dec 2021",Prob (F-statistic):,0.0
Time:,16:18:01,Log-Likelihood:,17470.0
No. Observations:,4674,AIC:,-34930.0
Df Residuals:,4670,BIC:,-34910.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0002,8.43e-05,1.942,0.052,-1.6e-06,0.000
MRP,0.3952,0.007,58.697,0.000,0.382,0.408
SMB,0.2092,0.014,15.247,0.000,0.182,0.236
HML,0.0849,0.012,7.375,0.000,0.062,0.107

0,1,2,3
Omnibus:,826.695,Durbin-Watson:,1.892
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19362.499
Skew:,0.078,Prob(JB):,0.0
Kurtosis:,12.97,Cond. No.,166.0


In [82]:
final_output = pd.DataFrame()
final_output.loc['Capm Alpha','Equal Weighted'] =  capm_reg.params[0]
final_output.loc['Capm Alpha t-stat','Equal Weighted'] = capm_reg.tvalues.Intercept
final_output.loc['Sharpe Ratio','Equal Weighted'] =  y.mean()[0]/y.std()[0] * math.sqrt(252)
final_output.loc['3 Factor FF Alpha','Equal Weighted'] =  ff_reg.params[0]
final_output.loc['3 Factor Alpha t-stat','Equal Weighted'] = ff_reg.tvalues.Intercept
final_output

Unnamed: 0,Equal Weighted
Capm Alpha,0.000189
Capm Alpha t-stat,2.179356
Sharpe Ratio,0.625091
3 Factor FF Alpha,0.000164
3 Factor Alpha t-stat,1.941503


## Value Weighted

In [83]:
finding_mcap = daily_returns[['PERMNO', 'date', 'PRC', 'RET', 'SHROUT', 'CFACPR', 'CFACSHR']]
finding_mcap.sort_values(['PERMNO','date'],inplace=True)
finding_mcap['date'] = finding_mcap.groupby(['PERMNO'])['date'].shift(-1)
finding_mcap.dropna(inplace = True)
finding_mcap['mcap'] = finding_mcap['PRC']* finding_mcap['SHROUT']
finding_mcap= finding_mcap[['PERMNO', 'date', 'mcap']]
finding_mcap = finding_mcap[finding_mcap.PERMNO.isin(stock_splits.PERMNO)]
finding_mcap = pd.pivot_table(finding_mcap, values = 'mcap', columns = 'PERMNO', index = 'date').fillna(0.0)
finding_mcap = finding_mcap.mask(finding_mcap < 0, 0)
finding_mcap_v2 = finding_mcap.mask(stock_split_returns_v2 == 0, 0)
finding_mcap_v2.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  finding_mcap['date'] = finding_mcap.groupby(['PERMNO'])['date'].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  finding_mcap['mcap'] = finding_mcap['PRC']* finding_mcap['SHROUT']


PERMNO,10001,10009,10016,10026,10032,10035,10056,10078,10085,10092,...,92942,92982,93038,93101,93105,93132,93312,93356,93423,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,0.0,33739.875,316616.34375,172450.125,822922.9,138806.375,47190.0,126059300.0,461139.75,286235.44991,...,0.0,0.0,0.0,0.0,420000.0,0.0,0.0,0.0,0.0,0.0
2000-01-05,20212.5,32874.75,326109.25,160051.75,983869.7,111782.125,37661.25,152890800.0,403134.75,337924.5,...,0.0,0.0,0.0,0.0,351750.0,0.0,0.0,0.0,0.0,0.0
2000-01-06,0.0,0.0,332584.875,139308.125,1312374.0,57819.375,0.0,146153600.0,0.0,268287.25,...,0.0,0.0,0.0,0.0,402641.5625,0.0,0.0,0.0,0.0,0.0
2000-01-08,20328.0,74400.75,305046.0,127194.375,1636463.0,0.0,0.0,131359500.0,0.0,226918.125,...,0.0,0.0,0.0,0.0,387055.4375,0.0,0.0,0.0,0.0,0.0
2000-01-09,20109.375,0.0,312460.3125,124149.0,1859084.0,70998.125,35629.0,164990100.0,371416.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
value_weights_daywise = finding_mcap_v2.copy()
value_weights_daywise['Total_Mcap'] = finding_mcap_v2.sum(axis=1)

for i in finding_mcap_v2.columns.values:
    value_weights_daywise[i] = value_weights_daywise[i]/value_weights_daywise['Total_Mcap']

value_weights_daywise = value_weights_daywise[finding_mcap.columns.values]

In [85]:
vw_returns = value_weights_daywise*stock_split_returns_v2
portfolio_return['value_wt_return'] = vw_returns.sum(axis=1)


ff_market_reg = pd.merge(left = ff_returns, right = portfolio_return.reset_index(), left_on = ['Date'], right_on = ['date'] ).fillna(1.0)[['Date','MRP','value_wt_return','RF','SMB','HML']]
ff_market_reg['value_wt_return_premium'] = ff_market_reg['value_wt_return'] - ff_market_reg['RF']
ff_market_reg.head()

Unnamed: 0,Date,MRP,value_wt_return,RF,SMB,HML,value_wt_return_premium
0,2000-01-03,-0.0071,0.012363,0.00021,0.0062,-0.0142,0.012153
1,2000-01-05,-0.0009,0.01589,0.00021,0.002,0.0016,0.01568
2,2000-01-06,-0.0073,0.027671,0.00021,-0.0044,0.0126,0.027461
3,2000-01-11,-0.0171,-0.005054,0.00021,0.0025,0.0081,-0.005264
4,2000-01-12,-0.0069,0.00713,0.00021,-0.0049,0.0078,0.00692


### CAPM

In [86]:
y, X = dmatrices('value_wt_return ~ MRP', data=ff_market_reg, return_type='dataframe')
capm_reg = sm.OLS(y, X).fit()
capm_reg.summary()

0,1,2,3
Dep. Variable:,value_wt_return,R-squared:,0.49
Model:,OLS,Adj. R-squared:,0.49
Method:,Least Squares,F-statistic:,4487.0
Date:,"Thu, 02 Dec 2021",Prob (F-statistic):,0.0
Time:,16:18:19,Log-Likelihood:,15367.0
No. Observations:,4674,AIC:,-30730.0
Df Residuals:,4672,BIC:,-30720.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0002,0.000,1.477,0.140,-6.39e-05,0.000
MRP,0.6987,0.010,66.987,0.000,0.678,0.719

0,1,2,3
Omnibus:,1054.292,Durbin-Watson:,1.973
Prob(Omnibus):,0.0,Jarque-Bera (JB):,44607.589
Skew:,-0.215,Prob(JB):,0.0
Kurtosis:,18.128,Cond. No.,78.9


### Fama-French

In [87]:
y, X = dmatrices('value_wt_return ~ MRP + SMB + HML', data=ff_market_reg, return_type='dataframe')
ff_reg = sm.OLS(y, X).fit()
ff_reg.summary()

0,1,2,3
Dep. Variable:,value_wt_return,R-squared:,0.491
Model:,OLS,Adj. R-squared:,0.491
Method:,Least Squares,F-statistic:,1502.0
Date:,"Thu, 02 Dec 2021",Prob (F-statistic):,0.0
Time:,16:18:19,Log-Likelihood:,15373.0
No. Observations:,4674,AIC:,-30740.0
Df Residuals:,4670,BIC:,-30710.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0002,0.000,1.526,0.127,-5.74e-05,0.000
MRP,0.7013,0.011,66.483,0.000,0.681,0.722
SMB,-0.0668,0.021,-3.109,0.002,-0.109,-0.025
HML,0.0110,0.018,0.610,0.542,-0.024,0.046

0,1,2,3
Omnibus:,1056.43,Durbin-Watson:,1.976
Prob(Omnibus):,0.0,Jarque-Bera (JB):,44724.621
Skew:,-0.219,Prob(JB):,0.0
Kurtosis:,18.148,Cond. No.,166.0


In [88]:
final_output.loc['Capm Alpha','Value Weighted'] =  capm_reg.params[0]
final_output.loc['Capm Alpha t-stat','Value Weighted'] = capm_reg.tvalues.Intercept
final_output.loc['Sharpe Ratio','Value Weighted'] =  y.mean()[0]/y.std()[0] * math.sqrt(252)
final_output.loc['3 Factor FF Alpha','Value Weighted'] =  ff_reg.params[0]
final_output.loc['3 Factor Alpha t-stat','Value Weighted'] = ff_reg.tvalues.Intercept
final_output

Unnamed: 0,Equal Weighted,Value Weighted
Capm Alpha,0.000189,0.000195
Capm Alpha t-stat,2.179356,1.476785
Sharpe Ratio,0.625091,0.50461
3 Factor FF Alpha,0.000164,0.000202
3 Factor Alpha t-stat,1.941503,1.525669


# Part 3

Market Returns Removing Top 500 stocks by Market Cap (Daily Basis)

In [89]:
daily_returns_exc_500 =  daily_returns.copy()
daily_returns_exc_500.RET = daily_returns_exc_500.RET.astype(float)
daily_returns_exc_500['mcap'] = daily_returns_exc_500.SHROUT * daily_returns_exc_500.PRC
daily_returns_exc_500['rank'] = daily_returns_exc_500.groupby('date')['mcap'].rank('dense', ascending = False)
daily_returns_exc_500 = daily_returns_exc_500[daily_returns_exc_500['rank'] > 500]

## Equally Weighted Market returns

In [90]:
ff_returns['Mkt_exc500_ew'] = daily_returns_exc_500.groupby('date')['RET'].mean()
ff_returns['Mkt_exc500_ew_RF'] = ff_returns['Mkt_exc500_ew'] - ff_returns['RF']

ff_market_reg = pd.merge(left = ff_returns, right = portfolio_return.reset_index(), left_on = ['Date'], right_on = ['date'] ).fillna(1.0)[['Date','Mkt_exc500_ew_RF','eq_wght_return','RF','SMB','HML']]
ff_market_reg['Equal_weight_returns_minus_RF'] = ff_market_reg['eq_wght_return'] - ff_market_reg['RF']

y, X = dmatrices('Equal_weight_returns_minus_RF ~ Mkt_exc500_ew_RF', data=ff_market_reg, return_type='dataframe')
capm_reg = sm.OLS(y, X).fit()

y, X = dmatrices('Equal_weight_returns_minus_RF ~ Mkt_exc500_ew_RF + SMB + HML', data=ff_market_reg, return_type='dataframe')
ff_reg = sm.OLS(y, X).fit()

final_output.loc['Capm Alpha','Mid-Cap Equal Weighted'] =  capm_reg.params[0]
final_output.loc['Capm Alpha t-stat','Mid-Cap Equal Weighted'] = capm_reg.tvalues.Intercept
final_output.loc['Sharpe Ratio','Mid-Cap Equal Weighted'] =  y.mean()[0]/y.std()[0] * math.sqrt(252)
final_output.loc['3 Factor FF Alpha','Mid-Cap Equal Weighted'] =  ff_reg.params[0] 
final_output.loc['3 Factor Alpha t-stat','Mid-Cap Equal Weighted'] = ff_reg.tvalues.Intercept

## Value Weight Market Return (Change in total Market Cap)

In [91]:
daily_total_mcap = daily_returns_exc_500.groupby('date')['mcap'].sum()
ff_returns['Mkt_exc500_vw'] = daily_total_mcap.pct_change().fillna(1.0)
ff_returns['Mkt_exc500_vw_RF'] = ff_returns['Mkt_exc500_vw'] - ff_returns['RF']

ff_market_reg = pd.merge(left = ff_returns, right = portfolio_return.reset_index(), left_on = ['Date'], right_on = ['date'] ).fillna(1.0)[['Date','Mkt_exc500_vw_RF','value_wt_return','RF','SMB','HML']]
ff_market_reg['Value_weight_returns_minus_RF'] = ff_market_reg['value_wt_return'] - ff_market_reg['RF']

y, X = dmatrices('Value_weight_returns_minus_RF ~ Mkt_exc500_vw_RF', data=ff_market_reg, return_type='dataframe')
capm_reg = sm.OLS(y, X).fit()

y, X = dmatrices('Value_weight_returns_minus_RF ~ Mkt_exc500_vw_RF + SMB + HML', data=ff_market_reg, return_type='dataframe')
ff_reg = sm.OLS(y, X).fit()

final_output.loc['Capm Alpha','Mid-Cap Value Weighted'] =  capm_reg.params[0]
final_output.loc['Capm Alpha t-stat','Mid-Cap Value Weighted'] = capm_reg.tvalues.Intercept
final_output.loc['Sharpe Ratio','Mid-Cap Value Weighted'] =  y.mean()[0]/y.std()[0] * math.sqrt(252)
final_output.loc['3 Factor FF Alpha','Mid-Cap Value Weighted'] =  ff_reg.params[0] 
final_output.loc['3 Factor Alpha t-stat','Mid-Cap Value Weighted'] = ff_reg.tvalues.Intercept

In [92]:
final_output

Unnamed: 0,Equal Weighted,Value Weighted,Mid-Cap Equal Weighted,Mid-Cap Value Weighted
Capm Alpha,0.000189,0.000195,0.000155,0.00017
Capm Alpha t-stat,2.179356,1.476785,2.692076,1.840561
Sharpe Ratio,0.625091,0.50461,0.625091,0.427372
3 Factor FF Alpha,0.000164,0.000202,0.000133,0.00016
3 Factor Alpha t-stat,1.941503,1.525669,2.385351,1.735865


***
# END OF CODE
***