# $$Empirical \text{ } Finance$$
## Portfolio Returns Assignment: 
### _Stock Split Annoucements_

Rohitashwa Chakraborty _(rc47878)_

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm
from patsy import dmatrices
import yfinance as yf
from datetime import datetime

from scipy.stats.mstats import gmean

import wrds
connection = wrds.Connection()

from tqdm.notebook import tqdm
tqdm.pandas()

%matplotlib inline
plt.style.use("bmh")

Loading library list...
Done


## CRSP Dataset

- [CRSP Daily Stock - Securities (dsf)](https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/stock-security-files/daily-stock-file/)
- [CRSP Daily Stock Events - Share Outstanding Information (dse)](https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/stock-events/share-outstanding/)
- [CRSP Daily Stock Event -Distribution (dsedist)](https://wrds-www.wharton.upenn.edu/pages/get-data/center-research-security-prices-crsp/annual-update/stock-events/distribution/)

# Table Description

## Distributions Data

In [2]:
df_stock_splits = connection.raw_sql('''
                    SELECT *
                    FROM crsp.dsedist
                    WHERE facshr >= 1.0
                        AND facshr = facpr
                        AND dclrdt IS NOT NULL
                        AND permno IS NOT NULL
                    ORDER BY dclrdt
                ''',
                date_cols= ['dclrdt', 'exdt', 'rcrddt', 'paydt'])
df_stock_splits = df_stock_splits.drop_duplicates(subset= ['permno','dclrdt']).dropna(subset=['permno','dclrdt','facshr'])
df_stock_splits

Unnamed: 0,permno,distcd,divamt,facpr,facshr,dclrdt,exdt,rcrddt,paydt,acperm,accomp,permco,compno,issuno,hexcd,hsiccd,cusip
0,10735.0,5523.0,0.0,1.0,1.0,1962-05-15,1962-08-06,1962-07-16,1962-08-03,0.0,0.0,22220.0,0.0,0.0,1.0,2834.0,07699090
1,26294.0,5523.0,0.0,1.0,1.0,1962-05-15,1962-08-16,1962-07-31,1962-08-15,0.0,0.0,21109.0,0.0,0.0,1.0,3812.0,53802110
2,30322.0,5523.0,0.0,1.0,1.0,1962-06-11,1962-07-11,1962-06-25,1962-07-10,0.0,0.0,747.0,60000746.0,961.0,3.0,6552.0,55752420
3,27836.0,5523.0,0.0,1.0,1.0,1962-06-20,1962-09-17,1962-08-31,1962-09-14,0.0,0.0,5297.0,70005549.0,70005949.0,3.0,2335.0,09677920
4,20095.0,5523.0,0.0,1.0,1.0,1962-07-02,1962-10-02,1962-09-10,1962-10-01,0.0,0.0,22973.0,0.0,0.0,1.0,5311.0,62664310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8556,89058.0,5523.0,0.0,1.0,1.0,2020-11-06,2020-12-07,2020-12-02,2020-12-04,0.0,0.0,37493.0,60020878.0,0.0,4.0,6726.0,46428748
8557,88228.0,5523.0,0.0,1.0,1.0,2020-11-06,2020-12-07,2020-12-02,2020-12-04,0.0,0.0,37493.0,60020878.0,0.0,4.0,6726.0,46428778
8558,88291.0,5523.0,0.0,3.0,3.0,2020-11-06,2020-12-07,2020-12-02,2020-12-04,0.0,0.0,37493.0,60020878.0,0.0,4.0,6726.0,46428758
8559,88292.0,5523.0,0.0,1.0,1.0,2020-11-06,2020-12-07,2020-12-02,2020-12-04,0.0,0.0,37493.0,60020878.0,0.0,4.0,6726.0,46428769


In [2]:
# df_stock_splits = df_stock_splits.set_index(['permno','dclrdt'])[['facshr']]
# df_stock_splits = df_stock_splits.dropna()
# df_stock_splits

## Merged Query

In [3]:
df_stock_daily = connection.raw_sql('''
                    SELECT dsf.permno, date, dclrdt, ret, shrout, facshr, prc, crsp.dsf.date - stock_split.dclrdt AS days_since_stock_split
                    FROM crsp.dsf
                    LEFT JOIN
                    (
                        SELECT permno, dclrdt, facshr
                        FROM crsp.dsedist
                        WHERE facshr >= 1.0
                            AND facpr = facshr
                            AND dclrdt IS NOT NULL
                            AND permno IS NOT NULL
                        ORDER BY dclrdt
                    ) stock_split ON stock_split.permno = crsp.dsf.permno 
                    WHERE crsp.dsf.date - stock_split.dclrdt <=  1
                        AND crsp.dsf.date - stock_split.dclrdt >=  0
                    ORDER BY date, permno
                ''',
                date_cols= ['date', 'rcrddt', 'trtsendt', 'shrenddt'])
df_stock_daily

Unnamed: 0,permno,date,dclrdt,ret,shrout,facshr,prc,days_since_stock_split
0,10735.0,1962-05-15,1962-05-15,0.047794,3212.0,1.0,71.250000,0
1,26294.0,1962-05-15,1962-05-15,0.006250,4368.0,1.0,120.750000,0
2,10735.0,1962-05-16,1962-05-15,0.001754,3212.0,1.0,71.375000,1
3,26294.0,1962-05-16,1962-05-15,-0.033126,4368.0,1.0,116.750000,1
4,27836.0,1962-06-20,1962-06-20,-0.029304,1511.0,1.0,33.125000,0
...,...,...,...,...,...,...,...,...
15532,88292.0,2020-11-06,2020-11-06,-0.003173,5450.0,1.0,157.100006,0
15533,88302.0,2020-11-06,2020-11-06,-0.000799,7650.0,1.0,175.039993,0
15534,89058.0,2020-11-06,2020-11-06,0.005881,75650.0,1.0,189.860001,0
15535,90526.0,2020-11-06,2020-11-06,-0.000194,12500.0,1.0,154.750000,0


In [4]:
df_stock_merged = df_stock_daily.set_index(['permno','dclrdt','date'])[['ret','shrout','prc','facshr']]
df_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ret,shrout,prc,facshr
permno,dclrdt,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
10735.0,1962-05-15,1962-05-15,0.047794,3212.0,71.250000,1.0
26294.0,1962-05-15,1962-05-15,0.006250,4368.0,120.750000,1.0
10735.0,1962-05-15,1962-05-16,0.001754,3212.0,71.375000,1.0
26294.0,1962-05-15,1962-05-16,-0.033126,4368.0,116.750000,1.0
27836.0,1962-06-20,1962-06-20,-0.029304,1511.0,33.125000,1.0
...,...,...,...,...,...,...
88292.0,2020-11-06,2020-11-06,-0.003173,5450.0,157.100006,1.0
88302.0,2020-11-06,2020-11-06,-0.000799,7650.0,175.039993,1.0
89058.0,2020-11-06,2020-11-06,0.005881,75650.0,189.860001,1.0
90526.0,2020-11-06,2020-11-06,-0.000194,12500.0,154.750000,1.0


In [5]:
df_split_returns = df_stock_merged['ret'].unstack().apply(lambda x: (gmean(1+x.dropna()) -1), axis = 1)#.mean()
df_split_returns.name = "avg_rets"
df_split_returns = df_split_returns.reset_index().sort_values(by = ['dclrdt','permno'])#.set_index(['dclrdt']).sort_index()
df_split_returns

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,permno,dclrdt,avg_rets
198,10735.0,1962-05-15,0.024516
1965,26294.0,1962-05-15,-0.013635
2183,27836.0,1962-06-20,-0.009200
1307,20095.0,1962-07-02,0.058301
1210,19166.0,1962-07-12,0.004519
...,...,...,...
8000,88292.0,2020-11-06,-0.003173
8003,88302.0,2020-11-06,-0.000799
8106,89058.0,2020-11-06,0.005881
8269,90526.0,2020-11-06,-0.000194


In [6]:
# splits_date = dict(df_split_returns.set_index('dclrdt')['permno'])
# splits_date
# splits_date[pd.to_datetime('15-05-1962').date()]

## Daily Stocks Data

In [3]:
daily_stock_data = connection.raw_sql('''
                SELECT date, permno, ret
                FROM crsp.dsf
                WHERE date IS NOT NULL
                    AND permno IS NOT NULL
                    AND ret IS NOT NULL
                    AND date > '1962-11-01'
                ORDER BY date
            ''',
            date_cols= ['date'])[['permno','date','ret']]
daily_stock_data = daily_stock_data[daily_stock_data['date'].progress_apply(lambda x: x.year >= 1962)]
daily_stock_data

NameError: name 'connection' is not defined

In [4]:
daily_stock_data['year'] = daily_stock_data['date'].dt.year
daily_stock_data['month'] = daily_stock_data['date'].dt.month
daily_stock_data

NameError: name 'daily_stock_data' is not defined

In [5]:
daily_stock_data.set_index(['year','month','permno','date']).unstack()#.progress_apply(lambda rets: (gmean(1+rets) - 1))

NameError: name 'daily_stock_data' is not defined

In [1]:
daily_stock_data

NameError: name 'daily_stock_data' is not defined

In [36]:
# daily_stock_data = daily_stock_data.set_index(['date','permno']).sort_index().dropna()
# daily_stock_data

In [37]:
monthly_dates = pd.date_range(start='1962', end='2020', freq="M")

In [38]:
from dateutil.relativedelta import relativedelta

def get_stock_splits_in_past(curr_date, lookback = 6):
    threshold = curr_date + relativedelta(months= -lookback)
    return set(df_split_returns[(df_split_returns['dclrdt'] >= threshold) & (df_split_returns['dclrdt'] <= curr_date)]['permno'].unique())
    

In [39]:
df = dict()

In [40]:
for date in monthly_dates:
    permno = get_stock_splits_in_past(date, lookback = 6)
    if len(permno) == 0:
        continue
    print("Stocks that split upto 6 months before", date.date(), permno)
    mask1 = daily_stock_data['permno'].progress_apply(lambda x: (x in permno))
    mask2 = daily_stock_data['date'].progress_apply(lambda x: (x.year == date.year) & (x.month == date.month))
    temp_df = daily_stock_data[mask1 & mask2]
    # print(mask1.sum(), temp_df)
    temp_series = temp_df.groupby('permno').progress_apply(lambda row: (gmean(1+row['ret']) -1))
    # print(temp_series if temp_series.shape[0] else np.NaN)
    df[date] = temp_series if temp_series.shape[0] > 1 else np.NaN
    
df

Stocks that split upto 6 months before 1962-05-31 {26294.0, 10735.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-06-30 {27836.0, 26294.0, 10735.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-07-31 {10735.0, 24213.0, 26294.0, 27836.0, 19166.0, 20095.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-08-31 {17961.0, 10735.0, 24213.0, 26294.0, 34649.0, 27836.0, 19166.0, 20095.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-09-30 {17726.0, 29794.0, 17961.0, 10735.0, 24213.0, 26294.0, 34649.0, 11674.0, 27836.0, 19166.0, 20095.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-10-31 {17726.0, 29794.0, 17961.0, 24109.0, 10735.0, 24213.0, 26294.0, 30357.0, 32598.0, 34649.0, 11674.0, 27836.0, 19166.0, 20095.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-11-30 {17726.0, 29794.0, 17961.0, 12538.0, 24109.0, 24432.0, 24213.0, 30357.0, 32598.0, 34649.0, 11674.0, 27836.0, 19166.0, 20095.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

Stocks that split upto 6 months before 1962-12-31 {17726.0, 29794.0, 17961.0, 12538.0, 24109.0, 35502.0, 24432.0, 24213.0, 30357.0, 32598.0, 24440.0, 34649.0, 11674.0, 27932.0, 19166.0, 20095.0}


  0%|          | 0/87795341 [00:00<?, ?it/s]

  0%|          | 0/87795341 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
df_join = pd.merge(df_stock_daily.reset_index(), 
                    df_stock_splits.reset_index(), 
                    how= 'left',
                    left_on = ['permno','date'],
                    right_on = ['permno','dclrdt'])[['permno','date','ret', 'prc','facshr','shrout']]
df_join = df_join.set_index(['permno','date'])
df_join

NameError: name 'df_stock_splits' is not defined