In [1]:
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS

### Cusips - Day

In [5]:
# entity MUST be placed first in index, NOT DATE!
df = pd.read_csv('source/fractions-cusips.csv', index_col=['CusipId', 'TrdExctnDt'], parse_dates=['TrdExctnDt'])

pooled_x = sm.add_constant(df[['LagDayInstitutionalFraction', 'LagDayRetailFraction', 'LagDayUnFraction']])
pooled_y = df['InstitutionalFraction']
pooled_olsr_model = PanelOLS(dependent=pooled_y, exog=pooled_x, entity_effects=True)
pooled_olsr_model.fit(cov_type='clustered', cluster_entity=True)

0,1,2,3
Dep. Variable:,InstitutionalFraction,R-squared:,0.0024
Estimator:,PanelOLS,R-squared (Between):,0.0052
No. Observations:,7903,R-squared (Within):,0.0024
Date:,"Mon, Sep 18 2023",R-squared (Overall):,0.0032
Time:,22:18:39,Log-likelihood,-9233.4
Cov. Estimator:,Clustered,,
,,F-statistic:,5.4195
Entities:,1241,P-value,0.0010
Avg Obs:,6.3683,Distribution:,"F(3,6659)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0105,0.0004,-23.779,0.0000,-0.0114,-0.0096
LagDayInstitutionalFraction,0.0100,0.0131,0.7581,0.4484,-0.0158,0.0357
LagDayRetailFraction,0.0464,0.0174,2.6764,0.0075,0.0124,0.0804
LagDayUnFraction,0.0281,0.0116,2.4286,0.0152,0.0054,0.0508


### Cusips - Week

In [6]:
# entity MUST be placed first in index, NOT DATE!
df = pd.read_csv('source/fractions-cusips.csv', index_col=['CusipId', 'TrdExctnDt'], parse_dates=['TrdExctnDt'])

pooled_x = sm.add_constant(df[['LagWeekInstitutionalFraction', 'LagWeekRetailFraction', 'LagWeekUnFraction']])
pooled_y = df['InstitutionalFraction']
pooled_olsr_model = PanelOLS(dependent=pooled_y, exog=pooled_x, entity_effects=True)
pooled_olsr_model.fit(cov_type='clustered', cluster_entity=True)

0,1,2,3
Dep. Variable:,InstitutionalFraction,R-squared:,0.0012
Estimator:,PanelOLS,R-squared (Between):,0.0026
No. Observations:,7903,R-squared (Within):,0.0012
Date:,"Mon, Sep 18 2023",R-squared (Overall):,0.0017
Time:,22:19:44,Log-likelihood,-9238.1
Cov. Estimator:,Clustered,,
,,F-statistic:,2.7669
Entities:,1241,P-value,0.0403
Avg Obs:,6.3683,Distribution:,"F(3,6659)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0101,0.0006,-15.796,0.0000,-0.0113,-0.0088
LagWeekInstitutionalFraction,-0.0014,0.0128,-0.1107,0.9118,-0.0265,0.0237
LagWeekRetailFraction,0.0368,0.0157,2.3352,0.0196,0.0059,0.0676
LagWeekUnFraction,0.0172,0.0117,1.4747,0.1403,-0.0057,0.0400


### Cusips - Month

In [7]:
# entity MUST be placed first in index, NOT DATE!
df = pd.read_csv('source/fractions-cusips.csv', index_col=['CusipId', 'TrdExctnDt'], parse_dates=['TrdExctnDt'])

pooled_x = sm.add_constant(df[['LagMonthInstitutionalFraction', 'LagMonthRetailFraction', 'LagMonthUnFraction']])
pooled_y = df['InstitutionalFraction']
pooled_olsr_model = PanelOLS(dependent=pooled_y, exog=pooled_x, entity_effects=True)
pooled_olsr_model.fit(cov_type='clustered', cluster_entity=True)

0,1,2,3
Dep. Variable:,InstitutionalFraction,R-squared:,0.0001
Estimator:,PanelOLS,R-squared (Between):,0.0006
No. Observations:,7903,R-squared (Within):,0.0001
Date:,"Mon, Sep 18 2023",R-squared (Overall):,0.0002
Time:,22:19:56,Log-likelihood,-9242.6
Cov. Estimator:,Clustered,,
,,F-statistic:,0.2630
Entities:,1241,P-value,0.8521
Avg Obs:,6.3683,Distribution:,"F(3,6659)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0115,0.0004,-30.308,0.0000,-0.0123,-0.0108
LagMonthInstitutionalFraction,0.0104,0.0128,0.8161,0.4145,-0.0147,0.0355
LagMonthRetailFraction,0.0048,0.0150,0.3212,0.7481,-0.0245,0.0342
LagMonthUnFraction,-0.0002,0.0115,-0.0152,0.9879,-0.0227,0.0224


### Issuers - Day

In [8]:
# entity MUST be placed first in index, NOT DATE!
df = pd.read_csv('source/fractions-issuers.csv', index_col=['IssuerId', 'TrdExctnDt'], parse_dates=['TrdExctnDt'])

pooled_x = sm.add_constant(df[['LagDayInstitutionalFraction', 'LagDayRetailFraction', 'LagDayUnFraction']])
pooled_y = df['InstitutionalFraction']
pooled_olsr_model = PanelOLS(dependent=pooled_y, exog=pooled_x, entity_effects=True)
pooled_olsr_model.fit(cov_type='clustered', cluster_entity=True)

0,1,2,3
Dep. Variable:,InstitutionalFraction,R-squared:,0.0011
Estimator:,PanelOLS,R-squared (Between):,0.0061
No. Observations:,136222,R-squared (Within):,0.0011
Date:,"Mon, Sep 18 2023",R-squared (Overall):,0.0018
Time:,22:20:56,Log-likelihood,-1.562e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,50.821
Entities:,847,P-value,0.0000
Avg Obs:,160.83,Distribution:,"F(3,135372)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0149,0.0002,65.048,0.0000,0.0144,0.0153
LagDayInstitutionalFraction,0.0245,0.0034,7.1142,0.0000,0.0178,0.0313
LagDayRetailFraction,0.0225,0.0044,5.0982,0.0000,0.0138,0.0311
LagDayUnFraction,0.0129,0.0033,3.9078,0.0001,0.0064,0.0193


### Issuers - Week

In [9]:
# entity MUST be placed first in index, NOT DATE!
df = pd.read_csv('source/fractions-issuers.csv', index_col=['IssuerId', 'TrdExctnDt'], parse_dates=['TrdExctnDt'])

pooled_x = sm.add_constant(df[['LagWeekInstitutionalFraction', 'LagWeekRetailFraction', 'LagWeekUnFraction']])
pooled_y = df['InstitutionalFraction']
pooled_olsr_model = PanelOLS(dependent=pooled_y, exog=pooled_x, entity_effects=True)
pooled_olsr_model.fit(cov_type='clustered', cluster_entity=True)

0,1,2,3
Dep. Variable:,InstitutionalFraction,R-squared:,0.0004
Estimator:,PanelOLS,R-squared (Between):,-0.0004
No. Observations:,136222,R-squared (Within):,0.0004
Date:,"Mon, Sep 18 2023",R-squared (Overall):,0.0008
Time:,22:21:12,Log-likelihood,-1.563e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,17.298
Entities:,847,P-value,0.0000
Avg Obs:,160.83,Distribution:,"F(3,135372)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0160,0.0002,93.631,0.0000,0.0157,0.0163
LagWeekInstitutionalFraction,0.0138,0.0029,4.7674,0.0000,0.0082,0.0195
LagWeekRetailFraction,0.0145,0.0039,3.6882,0.0002,0.0068,0.0221
LagWeekUnFraction,0.0048,0.0028,1.6846,0.0921,-0.0008,0.0103


### Issuers - Month

In [10]:
# entity MUST be placed first in index, NOT DATE!
df = pd.read_csv('source/fractions-issuers.csv', index_col=['IssuerId', 'TrdExctnDt'], parse_dates=['TrdExctnDt'])

pooled_x = sm.add_constant(df[['LagMonthInstitutionalFraction', 'LagMonthRetailFraction', 'LagMonthUnFraction']])
pooled_y = df['InstitutionalFraction']
pooled_olsr_model = PanelOLS(dependent=pooled_y, exog=pooled_x, entity_effects=True)
pooled_olsr_model.fit(cov_type='clustered', cluster_entity=True)

0,1,2,3
Dep. Variable:,InstitutionalFraction,R-squared:,6.948e-05
Estimator:,PanelOLS,R-squared (Between):,0.0004
No. Observations:,136222,R-squared (Within):,6.948e-05
Date:,"Mon, Sep 18 2023",R-squared (Overall):,0.0002
Time:,22:21:22,Log-likelihood,-1.563e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,3.1353
Entities:,847,P-value,0.0244
Avg Obs:,160.83,Distribution:,"F(3,135372)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0166,0.0002,75.238,0.0000,0.0162,0.0171
LagMonthInstitutionalFraction,0.0053,0.0031,1.6880,0.0914,-0.0009,0.0114
LagMonthRetailFraction,-0.0015,0.0039,-0.3824,0.7022,-0.0092,0.0062
LagMonthUnFraction,0.0064,0.0028,2.2934,0.0218,0.0009,0.0119
