### Import Library

In [1]:
import pandas as pd
import numpy as np
from linearmodels import PanelOLS,FamaMacBeth

In [2]:
import statsmodels.api as sm

### Import Data

In [3]:
data_directory = "data/"
file = "yahoo_data_filter_0_1.csv"

In [4]:
df = pd.read_csv(data_directory + file)
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")

### Calculate the momentun signal

In [5]:
universe_df = df

In [6]:
momentum_period = 5
waiting_time = 1

In [7]:
complete_ret_sub_df = universe_df.pct_change(periods=momentum_period).shift(waiting_time).dropna()

In [8]:
complete_ret_sub_df_long = complete_ret_sub_df.stack()

### Create the future return

In [9]:
future_return_days = 1

In [10]:
future_return_df = universe_df.pct_change(future_return_days).shift(-(future_return_days-1))

In [11]:
future_return_df_long = future_return_df.stack()

### Merge Return with Signal

In [12]:
future_return_df_long = pd.DataFrame(future_return_df_long)
complete_ret_sub_df_long = pd.DataFrame(complete_ret_sub_df_long)

In [13]:
future_return_df_long.columns = ["target"]
complete_ret_sub_df_long.columns = ["signal"]

In [14]:
merge_df = future_return_df_long.merge(complete_ret_sub_df_long,left_index=True,right_index=True)

In [15]:
index_df = merge_df.index.to_frame()
index_df.columns = ["date","asset"]
index_df["target"] = merge_df["target"]
index_df["signal"] = merge_df["signal"]
index_df.index = range(0,index_df.shape[0])

In [16]:
merge_df = index_df

In [17]:
merge_df = merge_df.set_index(["asset","date"])

### Panel Regression

In [18]:
exog_vars = ["signal"]
depe_var = ["target"]
exog = sm.add_constant(merge_df[exog_vars])
depe = merge_df[depe_var]

In [19]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)

In [20]:
mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,target,R-squared:,0.0007
Estimator:,PanelOLS,R-squared (Between):,-0.0523
No. Observations:,103645,R-squared (Within):,0.0004
Date:,"Thu, Jan 14 2021",R-squared (Overall):,0.0004
Time:,21:00:33,Log-likelihood,2.83e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,73.082
Entities:,95,P-value,0.0000
Avg Obs:,1091.0,Distribution:,"F(1,102459)"
Min Obs:,1091.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0009,,,,,
signal,-0.0194,0.0045,-4.2820,0.0000,-0.0283,-0.0105


In [21]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)

In [22]:
mod.fit(cov_type="kernel",kernel="parzen",cluster_entity=True,cluster_time=True)

0,1,2,3
Dep. Variable:,target,R-squared:,0.0007
Estimator:,PanelOLS,R-squared (Between):,-0.0523
No. Observations:,103645,R-squared (Within):,0.0004
Date:,"Thu, Jan 14 2021",R-squared (Overall):,0.0004
Time:,21:00:37,Log-likelihood,2.83e+04
Cov. Estimator:,Driscoll-Kraay,,
,,F-statistic:,73.082
Entities:,95,P-value,0.0000
Avg Obs:,1091.0,Distribution:,"F(1,102459)"
Min Obs:,1091.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0009,6.705e-07,1348.9,0.0000,0.0009,0.0009
signal,-0.0194,0.0044,-4.4256,0.0000,-0.0280,-0.0108


### Fama Macbeth Regression

In [27]:
exog_vars = ["signal"]
depe_var = ["target"]
exog = sm.add_constant(merge_df[exog_vars])
depe = merge_df[depe_var]

In [28]:
mod = FamaMacBeth(depe,exog)

In [34]:
res = mod.fit(cov_type='kernel',kernel="bartlett")

In [35]:
res

0,1,2,3
Dep. Variable:,target,R-squared:,-7.399e-05
Estimator:,FamaMacBeth,R-squared (Between):,-0.1069
No. Observations:,103645,R-squared (Within):,2.385e-05
Date:,"Sat, Jan 09 2021",R-squared (Overall):,-7.399e-05
Time:,16:26:11,Log-likelihood,2.486e+04
Cov. Estimator:,Fama-MacBeth Kernel Cov,,
,,F-statistic:,-7.6677
Entities:,95,P-value,1.0000
Avg Obs:,1091.0,Distribution:,"F(1,103643)"
Min Obs:,1091.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-4.135e-05,0.0016,-0.0266,0.9788,-0.0031,0.0030
signal,-0.0294,0.0091,-3.2302,0.0012,-0.0472,-0.0116


### Multiple Signal Panel Regression

#### Create Multiple Signal 

In [38]:
momentum_period = 1
waiting_time = 1

In [39]:
complete_ret_sub_df_s1 = universe_df.pct_change(periods=momentum_period).shift(waiting_time).dropna()
complete_ret_sub_df_s1_long = complete_ret_sub_df_s1.stack()

In [40]:
momentum_period = 3
waiting_time = 1

In [41]:
complete_ret_sub_df_s3 = universe_df.pct_change(periods=momentum_period).shift(waiting_time).dropna()
complete_ret_sub_df_s3_long = complete_ret_sub_df_s3.stack()

In [42]:
momentum_period = 5
waiting_time = 1

In [43]:
complete_ret_sub_df_s5 = universe_df.pct_change(periods=momentum_period).shift(waiting_time).dropna()
complete_ret_sub_df_s5_long = complete_ret_sub_df_s5.stack()

In [44]:
momentum_period = 7
waiting_time = 1

In [45]:
complete_ret_sub_df_s7 = universe_df.pct_change(periods=momentum_period).shift(waiting_time).dropna()
complete_ret_sub_df_s7_long = complete_ret_sub_df_s7.stack()

#### Create Future Return

In [46]:
future_return_days = 1

future_return_df = universe_df.pct_change(future_return_days).shift(-(future_return_days-1))
future_return_df_long = future_return_df.stack()

future_return_df_long = pd.DataFrame(future_return_df_long)

#### Merge the dataframe

In [52]:
future_return_df_long = pd.DataFrame(future_return_df_long)

In [51]:
signal_df = pd.concat([complete_ret_sub_df_s1_long,
                       complete_ret_sub_df_s3_long,
                       complete_ret_sub_df_s5_long,
                       complete_ret_sub_df_s7_long],axis=1)

In [59]:
merge_df = future_return_df_long.merge(signal_df,left_index=True,right_index=True)

In [63]:
merge_df.columns = ["target","s1","s3","s5","s7"]



index_df = merge_df.index.to_frame()
index_df.columns = ["date","asset"]


index_df["target"] = merge_df["target"]
index_df["s1"] = merge_df["s1"]
index_df["s3"] = merge_df["s3"]
index_df["s5"] = merge_df["s5"]
index_df["s7"] = merge_df["s7"]

index_df.index = range(0,index_df.shape[0])

In [64]:
merge_df = index_df

merge_df = merge_df.set_index(["asset","date"])
merge_df = merge_df.dropna()

#### Panel Regression

In [69]:
exog_vars = ["s1","s3","s5","s7"]
depe_var = ["target"]
exog = sm.add_constant(merge_df[exog_vars])
depe = merge_df[depe_var]

In [70]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)
mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

In [72]:
mod = PanelOLS(depe,exog,entity_effects=False,time_effects=False)

In [74]:
mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

0,1,2,3
Dep. Variable:,target,R-squared:,0.0023
Estimator:,PanelOLS,R-squared (Between):,-0.0755
No. Observations:,103455,R-squared (Within):,0.0023
Date:,"Thu, Jan 14 2021",R-squared (Overall):,0.0023
Time:,21:24:22,Log-likelihood,2.496e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,58.860
Entities:,95,P-value,0.0000
Avg Obs:,1089.0,Distribution:,"F(4,103450)"
Min Obs:,1089.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0008,0.0015,0.5454,0.5855,-0.0021,0.0036
s1,-0.0629,0.0170,-3.6974,0.0002,-0.0962,-0.0296
s3,-0.0317,0.0181,-1.7523,0.0797,-0.0672,0.0038
s5,0.0664,0.0392,1.6967,0.0898,-0.0103,0.1432
s7,-0.0289,0.0189,-1.5270,0.1268,-0.0660,0.0082


#### Multiple Fama Macbeth Regression

In [75]:
mod = FamaMacBeth(depe,exog)

In [76]:
res = mod.fit(cov_type='kernel',kernel="bartlett")

In [77]:
res

0,1,2,3
Dep. Variable:,target,R-squared:,-0.0123
Estimator:,FamaMacBeth,R-squared (Between):,-0.6027
No. Observations:,103455,R-squared (Within):,-0.0118
Date:,"Thu, Jan 14 2021",R-squared (Overall):,-0.0123
Time:,21:25:37,Log-likelihood,2.421e+04
Cov. Estimator:,Fama-MacBeth Kernel Cov,,
,,F-statistic:,-314.32
Entities:,95,P-value,1.0000
Avg Obs:,1089.0,Distribution:,"F(4,103450)"
Min Obs:,1089.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0021,0.0018,-1.1654,0.2439,-0.0057,0.0014
s1,-0.1584,0.0579,-2.7349,0.0062,-0.2719,-0.0449
s3,-0.0394,0.0136,-2.9003,0.0037,-0.0660,-0.0128
s5,0.0384,0.0307,1.2476,0.2122,-0.0219,0.0986
s7,-0.0090,0.0068,-1.3209,0.1865,-0.0224,0.0044


### Multiple Holding Days

In [78]:
future_return_days = 2

future_return_df = universe_df.pct_change(future_return_days).shift(-(future_return_days-1))
future_return_df_long = future_return_df.stack()

future_return_df_long = pd.DataFrame(future_return_df_long)

In [79]:
merge_df = future_return_df_long.merge(signal_df,left_index=True,right_index=True)

In [80]:
merge_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,s1,s3,s5,s7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,ADA-USD,0.481712,0.012453,,,
2018-01-02,ADX-USD,-0.080739,0.114517,,,
2018-01-02,AE-USD,0.175440,-0.015999,,,
2018-01-02,ANT-USD,0.532966,0.098899,,,
2018-01-02,ARDR-USD,0.215451,-0.065361,,,
...,...,...,...,...,...,...
2021-01-02,XVG-USD,0.417309,-0.022395,-0.015002,-0.065067,0.100306
2021-01-02,XZC-USD,-0.045698,-0.094004,-0.056278,-0.142921,-0.056563
2021-01-02,ZEC-USD,0.029607,-0.112465,-0.146453,-0.160452,-0.086150
2021-01-02,ZEN-USD,0.182516,0.018508,0.018827,-0.059865,0.050730


In [81]:
merge_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,s1,s3,s5,s7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,ADA-USD,0.481712,0.012453,,,
2018-01-02,ADX-USD,-0.080739,0.114517,,,
2018-01-02,AE-USD,0.175440,-0.015999,,,
2018-01-02,ANT-USD,0.532966,0.098899,,,
2018-01-02,ARDR-USD,0.215451,-0.065361,,,
...,...,...,...,...,...,...
2021-01-02,XVG-USD,0.417309,-0.022395,-0.015002,-0.065067,0.100306
2021-01-02,XZC-USD,-0.045698,-0.094004,-0.056278,-0.142921,-0.056563
2021-01-02,ZEC-USD,0.029607,-0.112465,-0.146453,-0.160452,-0.086150
2021-01-02,ZEN-USD,0.182516,0.018508,0.018827,-0.059865,0.050730


In [82]:
merge_df.columns = ["target","s1","s3","s5","s7"]



index_df = merge_df.index.to_frame()
index_df.columns = ["date","asset"]


index_df["target"] = merge_df["target"]
index_df["s1"] = merge_df["s1"]
index_df["s3"] = merge_df["s3"]
index_df["s5"] = merge_df["s5"]
index_df["s7"] = merge_df["s7"]

index_df.index = range(0,index_df.shape[0])

In [83]:
merge_df = index_df

merge_df = merge_df.set_index(["asset","date"])
merge_df = merge_df.dropna()

#### Panel Regression

In [84]:
exog_vars = ["s1","s3","s5","s7"]
depe_var = ["target"]
exog = sm.add_constant(merge_df[exog_vars])
depe = merge_df[depe_var]

In [85]:
mod = PanelOLS(depe,exog,entity_effects=True,time_effects=True)
mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

  return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")


0,1,2,3
Dep. Variable:,target,R-squared:,0.0018
Estimator:,PanelOLS,R-squared (Between):,-0.0708
No. Observations:,103360,R-squared (Within):,0.0014
Date:,"Thu, Jan 14 2021",R-squared (Overall):,0.0013
Time:,21:32:21,Log-likelihood,4.906e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,46.153
Entities:,95,P-value,0.0000
Avg Obs:,1088.0,Distribution:,"F(4,102174)"
Min Obs:,1088.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0001,,,,,
s1,-0.0325,0.0177,-1.8393,0.0659,-0.0672,0.0021
s3,0.0048,0.0406,0.1191,0.9052,-0.0748,0.0844
s5,-0.0119,0.0545,-0.2182,0.8272,-0.1188,0.0950
s7,0.0095,0.0362,0.2621,0.7933,-0.0615,0.0805


In [86]:
mod = PanelOLS(depe,exog,entity_effects=False,time_effects=False)
mod.fit(cov_type="clustered",cluster_entity=False,cluster_time=True)

0,1,2,3
Dep. Variable:,target,R-squared:,0.0016
Estimator:,PanelOLS,R-squared (Between):,-0.0573
No. Observations:,103360,R-squared (Within):,0.0017
Date:,"Thu, Jan 14 2021",R-squared (Overall):,0.0016
Time:,21:33:02,Log-likelihood,4.094e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,41.770
Entities:,95,P-value,0.0000
Avg Obs:,1088.0,Distribution:,"F(4,103355)"
Min Obs:,1088.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0001,0.0019,0.0707,0.9436,-0.0035,0.0038
s1,-0.0480,0.0192,-2.4978,0.0125,-0.0857,-0.0103
s3,-0.0077,0.0367,-0.2112,0.8328,-0.0796,0.0641
s5,0.0192,0.0484,0.3966,0.6916,-0.0757,0.1141
s7,0.0005,0.0321,0.0154,0.9877,-0.0624,0.0634


In [88]:
mod = PanelOLS(depe,exog,entity_effects=False,time_effects=False)
mod.fit(cov_type="clustered",cluster_entity=True,cluster_time=True)

0,1,2,3
Dep. Variable:,target,R-squared:,0.0016
Estimator:,PanelOLS,R-squared (Between):,-0.0573
No. Observations:,103360,R-squared (Within):,0.0017
Date:,"Thu, Jan 14 2021",R-squared (Overall):,0.0016
Time:,21:33:32,Log-likelihood,4.094e+04
Cov. Estimator:,Clustered,,
,,F-statistic:,41.770
Entities:,95,P-value,0.0000
Avg Obs:,1088.0,Distribution:,"F(4,103355)"
Min Obs:,1088.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,0.0001,0.0019,0.0706,0.9437,-0.0035,0.0038
s1,-0.0480,0.0195,-2.4661,0.0137,-0.0862,-0.0099
s3,-0.0077,0.0328,-0.2364,0.8132,-0.0720,0.0565
s5,0.0192,0.0405,0.4740,0.6355,-0.0602,0.0986
s7,0.0005,0.0281,0.0176,0.9860,-0.0546,0.0556


#### Fama Macbeth

In [89]:
mod = FamaMacBeth(depe,exog)

In [90]:
mod.fit(cov_type='kernel',kernel="bartlett")

0,1,2,3
Dep. Variable:,target,R-squared:,-0.0110
Estimator:,FamaMacBeth,R-squared (Between):,-0.5999
No. Observations:,103360,R-squared (Within):,-0.0104
Date:,"Thu, Jan 14 2021",R-squared (Overall):,-0.0110
Time:,21:35:15,Log-likelihood,4.029e+04
Cov. Estimator:,Fama-MacBeth Kernel Cov,,
,,F-statistic:,-280.84
Entities:,95,P-value,1.0000
Avg Obs:,1088.0,Distribution:,"F(4,103355)"
Min Obs:,1088.0,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.0026,0.0026,-0.9795,0.3273,-0.0077,0.0026
s1,-0.0983,0.0220,-4.4670,0.0000,-0.1414,-0.0551
s3,-0.0280,0.0146,-1.9223,0.0546,-0.0566,0.0006
s5,-0.0076,0.0195,-0.3921,0.6950,-0.0458,0.0305
s7,0.0104,0.0115,0.9029,0.3666,-0.0122,0.0329
