In [2]:
import pandas as pd
import numpy as np
from linearmodels import PanelOLS,RandomEffects,FamaMacBeth
from scipy import stats
import statsmodels.api as sm

In [2]:
def x_day_return(series,x):
    
    return series.pct_change(periods=x)

### Import Data

In [17]:
data_directory = "data/"
file = "ab_stock_cross_section.csv"

In [18]:
df = pd.read_csv(data_directory+file)
df = df.iloc[:,1:]
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")

Subset the data from 2002-01-01 in which the regime changes

In [22]:
df = df[df.index>="2002-01-01"]

#### Daily Version Regression

In [25]:
df_wide = df.pivot_table(values="a_price",columns="unique_id",index="date")

daily_return = df_wide.pct_change()

daily_return_long = daily_return.stack()

ratio_wide = df.pivot_table(values="ratio",columns="unique_id",index="date")

In [41]:
ratio_wide = ratio_wide.shift(1)
ratio_long = ratio_wide.stack()


In [66]:
merge_df = pd.concat([daily_return_long,ratio_long],axis=1)
merge_df.columns = ["ret","ratio"]

In [None]:
index_df = merge_df.index.to_frame()
index_df["ret"] = merge_df["ret"]
index_df["ratio"] = merge_df["ratio"]
merge_df = index_df

merge_df = merge_df.set_index(["unique_id","date"])
merge_df = merge_df.dropna()

In [71]:
exog_var = ["ratio"]
depe_var = ["ret"]

In [84]:
exog = merge_df[exog_var]
depe = merge_df[depe_var]

exog = sm.add_constant(exog)

In [86]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)

In [87]:
result = mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

In [88]:
result

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0004
Estimator:,PanelOLS,R-squared (Between):,-15.247
No. Observations:,348328,R-squared (Within):,0.0004
Date:,"Sun, Jan 10 2021",R-squared (Overall):,-0.0015
Time:,15:06:02,Log-likelihood,8.136e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,131.85
Entities:,89,P-value,0.0000
Avg Obs:,3913.8,Distribution:,"F(1,343873)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0020,0.0003,7.8000,0.0000,0.0015,0.0025
ratio,-0.0002,3.075e-05,-6.2342,0.0000,-0.0003,-0.0001


In [89]:
mod.fit(cov_type="kernel",cluster_entity=True,cluster_time=True)

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0004
Estimator:,PanelOLS,R-squared (Between):,-15.247
No. Observations:,348328,R-squared (Within):,0.0004
Date:,"Sun, Jan 10 2021",R-squared (Overall):,-0.0015
Time:,15:06:31,Log-likelihood,8.136e+05
Cov. Estimator:,Driscoll-Kraay,,
,,F-statistic:,131.85
Entities:,89,P-value,0.0000
Avg Obs:,3913.8,Distribution:,"F(1,343873)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0020,0.0002,9.3163,0.0000,0.0016,0.0025
ratio,-0.0002,2.568e-05,-7.4648,0.0000,-0.0002,-0.0001


In [92]:
mod = PanelOLS(depe,exog,entity_effects=False,time_effects=False)
mod.fit(cov_type="clustered",cluster_entity=False,cluster_time=True)

In [94]:
mod = FamaMacBeth(depe,exog)

In [95]:
result = mod.fit(cov_type='kernel', kernel='bartlett')

In [96]:
result

0,1,2,3
Dep. Variable:,ret,R-squared:,3.976e-05
Estimator:,FamaMacBeth,R-squared (Between):,-0.2095
No. Observations:,348328,R-squared (Within):,5.616e-05
Date:,"Sun, Jan 10 2021",R-squared (Overall):,3.976e-05
Time:,15:07:28,Log-likelihood,7.231e+05
Cov. Estimator:,Fama-MacBeth Kernel Cov,,
,,F-statistic:,13.849
Entities:,89,P-value,0.0002
Avg Obs:,3913.8,Distribution:,"F(1,348326)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0006,0.0003,1.8935,0.0583,-2.121e-05,0.0012
ratio,-1.706e-05,9.512e-06,-1.7930,0.0730,-3.57e-05,1.588e-06


### Weekly Version Result

In [97]:
df_wide = df.pivot_table(values="a_price",columns="unique_id",index="date")
ratio_wide = df.pivot_table(values="ratio",columns="unique_id",index="date")

In [105]:
df_wide_weekly = df_wide.resample("W").last()
weekly_ret_df = df_wide_weekly.pct_change()
weekly_ret_df_long = weekly_ret_df.stack()

ratio_wide_weekly = ratio_wide.resample("W").last()
ratio_wide_weekly = ratio_wide_weekly.shift(1)
ratio_long_weekly = ratio_wide_weekly.stack()

In [121]:
merge_df = pd.concat([weekly_ret_df_long,ratio_long_weekly],axis=1)
merge_df.columns = ["ret","ratio"]

index_df = merge_df.index.to_frame()
index_df["ret"] = merge_df["ret"]
index_df["ratio"] = merge_df["ratio"]
merge_df = index_df

merge_df = merge_df.set_index(["unique_id","date"])
merge_df = merge_df.dropna()

In [123]:
exog_var = ["ratio"]
depe_var = ["ret"]

exog = merge_df[exog_var]
depe = merge_df[depe_var]

exog = sm.add_constant(exog)

In [125]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)

In [126]:
result = mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

In [127]:
result

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0015
Estimator:,PanelOLS,R-squared (Between):,-7.2406
No. Observations:,73650,R-squared (Within):,0.0017
Date:,"Sun, Jan 10 2021",R-squared (Overall):,-0.0059
Time:,16:32:32,Log-likelihood,1.121e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,110.59
Entities:,89,P-value,0.0000
Avg Obs:,827.53,Distribution:,"F(1,72649)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0092,0.0011,7.9893,0.0000,0.0069,0.0114
ratio,-0.0009,0.0001,-6.3254,0.0000,-0.0011,-0.0006


In [128]:
mod.fit(cov_type="kernel",cluster_entity=True,cluster_time=True)

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0015
Estimator:,PanelOLS,R-squared (Between):,-7.2406
No. Observations:,73650,R-squared (Within):,0.0017
Date:,"Sun, Jan 10 2021",R-squared (Overall):,-0.0059
Time:,16:33:15,Log-likelihood,1.121e+05
Cov. Estimator:,Driscoll-Kraay,,
,,F-statistic:,110.59
Entities:,89,P-value,0.0000
Avg Obs:,827.53,Distribution:,"F(1,72649)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0092,0.0010,9.0659,0.0000,0.0072,0.0111
ratio,-0.0009,0.0001,-7.2249,0.0000,-0.0011,-0.0006


In [129]:
mod = FamaMacBeth(depe,exog)
result = mod.fit(cov_type='kernel', kernel='bartlett')

In [131]:
result

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0001
Estimator:,FamaMacBeth,R-squared (Between):,-0.0455
No. Observations:,73650,R-squared (Within):,0.0002
Date:,"Sun, Jan 10 2021",R-squared (Overall):,0.0001
Time:,16:33:56,Log-likelihood,9.369e+04
Cov. Estimator:,Fama-MacBeth Kernel Cov,,
,,F-statistic:,10.856
Entities:,89,P-value,0.0010
Avg Obs:,827.53,Distribution:,"F(1,73648)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0026,0.0017,1.5292,0.1262,-0.0007,0.0059
ratio,-6.384e-05,4.256e-05,-1.4999,0.1336,-0.0001,1.958e-05


In [132]:
mod = PanelOLS(depe,exog,entity_effects=False,time_effects=False)
mod.fit(cov_type="clustered",cluster_entity=False,cluster_time=True)

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0002
Estimator:,PanelOLS,R-squared (Between):,-0.1776
No. Observations:,73650,R-squared (Within):,0.0004
Date:,"Sun, Jan 10 2021",R-squared (Overall):,0.0002
Time:,16:35:21,Log-likelihood,9.369e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,16.150
Entities:,89,P-value,0.0001
Avg Obs:,827.53,Distribution:,"F(1,73648)"
Min Obs:,14.000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0030,0.0014,2.2006,0.0278,0.0003,0.0057
ratio,-0.0001,4.935e-05,-2.7787,0.0055,-0.0002,-4.04e-05


### Monthly Version Regression

In [None]:
df_wide = df.pivot_table(values="a_price",columns="unique_id",index="date")
ratio_wide = df.pivot_table(values="ratio",columns="unique_id",index="date")

In [133]:
df_wide_monthly = df_wide.resample("M").last()
monthly_ret_df = df_wide_monthly.pct_change()
monthly_ret_df_long = monthly_ret_df.stack()

ratio_wide_monthly = ratio_wide.resample("M").last()
ratio_wide_monthly = ratio_wide_monthly.shift(1)
ratio_long_monthly = ratio_wide_monthly.stack()

In [136]:
merge_df = pd.concat([monthly_ret_df_long,ratio_long_monthly],axis=1)
merge_df.columns = ["ret","ratio"]

index_df = merge_df.index.to_frame()
index_df["ret"] = merge_df["ret"]
index_df["ratio"] = merge_df["ratio"]
merge_df = index_df

merge_df = merge_df.set_index(["unique_id","date"])
merge_df = merge_df.dropna()

In [138]:
exog_var = ["ratio"]
depe_var = ["ret"]

exog = merge_df[exog_var]
depe = merge_df[depe_var]

exog = sm.add_constant(exog)

In [139]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)

In [140]:
result = mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

In [141]:
result

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0047
Estimator:,PanelOLS,R-squared (Between):,-2.9765
No. Observations:,17625,R-squared (Within):,0.0040
Date:,"Sun, Jan 10 2021",R-squared (Overall):,-0.0180
Time:,16:50:18,Log-likelihood,1.41e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,81.858
Entities:,89,P-value,0.0000
Avg Obs:,198.03,Distribution:,"F(1,17321)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0354,0.0046,7.6563,0.0000,0.0264,0.0445
ratio,-0.0031,0.0006,-5.5969,0.0000,-0.0042,-0.0020


In [142]:
mod.fit(cov_type="kernel",cluster_entity=True,cluster_time=True)

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0047
Estimator:,PanelOLS,R-squared (Between):,-2.9765
No. Observations:,17625,R-squared (Within):,0.0040
Date:,"Sun, Jan 10 2021",R-squared (Overall):,-0.0180
Time:,16:50:52,Log-likelihood,1.41e+04
Cov. Estimator:,Driscoll-Kraay,,
,,F-statistic:,81.858
Entities:,89,P-value,0.0000
Avg Obs:,198.03,Distribution:,"F(1,17321)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0354,0.0046,7.6297,0.0000,0.0263,0.0445
ratio,-0.0031,0.0005,-5.6731,0.0000,-0.0042,-0.0020


In [143]:
mod = FamaMacBeth(depe,exog)
result = mod.fit(cov_type='kernel', kernel='bartlett')

In [144]:
result

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0003
Estimator:,FamaMacBeth,R-squared (Between):,-0.0003
No. Observations:,17625,R-squared (Within):,0.0004
Date:,"Sun, Jan 10 2021",R-squared (Overall):,0.0003
Time:,16:57:42,Log-likelihood,9047.6
Cov. Estimator:,Fama-MacBeth Kernel Cov,,
,,F-statistic:,5.2483
Entities:,89,P-value,0.0220
Avg Obs:,198.03,Distribution:,"F(1,17623)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0111,0.0081,1.3615,0.1734,-0.0049,0.0270
ratio,-0.0002,0.0002,-0.9838,0.3252,-0.0005,0.0002


In [145]:
mod = PanelOLS(depe,exog,entity_effects=False,time_effects=False)
mod.fit(cov_type="clustered",cluster_entity=False,cluster_time=True)

0,1,2,3
Dep. Variable:,ret,R-squared:,0.0005
Estimator:,PanelOLS,R-squared (Between):,-0.0392
No. Observations:,17625,R-squared (Within):,0.0010
Date:,"Sun, Jan 10 2021",R-squared (Overall):,0.0005
Time:,16:58:05,Log-likelihood,9049.2
Cov. Estimator:,Clustered,,
,,F-statistic:,8.4837
Entities:,89,P-value,0.0036
Avg Obs:,198.03,Distribution:,"F(1,17623)"
Min Obs:,4.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0127,0.0064,1.9824,0.0474,0.0001,0.0253
ratio,-0.0004,0.0002,-2.2146,0.0268,-0.0008,-4.977e-05


In [None]:
### Fama Macbeth