# Panel Regression 
## Regression with data that is both cross-sectional and time-series

In [99]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects
from linearmodels.panel import compare

df = pd.read_csv('/Users/robertwrobel/Code/Python4Statistics/Notebooks/panel_data.csv')
df.head(15)

Unnamed: 0,Company,Year,Marketing,RD,Revenue
0,Company A,2010,105.01,160.73,498.51
1,Company A,2011,106.93,166.82,536.58
2,Company A,2012,86.4,182.49,552.3
3,Company A,2013,104.65,181.61,566.84
4,Company A,2014,105.86,108.67,382.67
5,Company A,2015,85.71,121.87,391.84
6,Company A,2016,137.32,165.45,582.64
7,Company A,2017,109.48,165.41,521.05
8,Company A,2018,76.17,165.45,482.68
9,Company A,2019,113.13,265.58,761.72


In [101]:
df = df.set_index(['Company', 'Year'])
df.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,Marketing,RD,Revenue
Company,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Company A,2010,105.01,160.73,498.51
Company A,2011,106.93,166.82,536.58
Company A,2012,86.4,182.49,552.3
Company A,2013,104.65,181.61,566.84
Company A,2014,105.86,108.67,382.67
Company A,2015,85.71,121.87,391.84
Company A,2016,137.32,165.45,582.64
Company A,2017,109.48,165.41,521.05
Company A,2018,76.17,165.45,482.68
Company A,2019,113.13,265.58,761.72


In [103]:
X = df[['Marketing', 'RD']]
X = sm.add_constant(X)
y = df['Revenue']

In [105]:
#OLS model for baseline
model = sm.OLS(y,X)
ols_results = model.fit()
ols_results.summary()

0,1,2,3
Dep. Variable:,Revenue,R-squared:,0.984
Model:,OLS,Adj. R-squared:,0.984
Method:,Least Squares,F-statistic:,1465.0
Date:,"Wed, 29 May 2024",Prob (F-statistic):,4.540000000000001e-43
Time:,13:18:53,Log-Likelihood:,-185.18
No. Observations:,50,AIC:,376.4
Df Residuals:,47,BIC:,382.1
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.7485,10.935,0.068,0.946,-21.249,22.746
Marketing,1.3436,0.081,16.587,0.000,1.181,1.507
RD,2.2967,0.044,51.961,0.000,2.208,2.386

0,1,2,3
Omnibus:,4.372,Durbin-Watson:,2.244
Prob(Omnibus):,0.112,Jarque-Bera (JB):,3.27
Skew:,-0.495,Prob(JB):,0.195
Kurtosis:,3.767,Cond. No.,1430.0


In [107]:
#Fixed effects - Differences between entities
#F test for poolability: H0 = individual effects are minimal / fixed effects not needed, HA = fixed effects needed, if p-value < alpha, fixed effects are appropriate
model = PanelOLS(y, X, entity_effects=True)  
fe_results1 = model.fit()
fe_results1.summary

0,1,2,3
Dep. Variable:,Revenue,R-squared:,0.9848
Estimator:,PanelOLS,R-squared (Between):,0.9744
No. Observations:,50,R-squared (Within):,0.9848
Date:,"Wed, May 29 2024",R-squared (Overall):,0.9842
Time:,13:18:53,Log-likelihood,-182.55
Cov. Estimator:,Unadjusted,,
,,F-statistic:,1396.8
Entities:,5,P-value,0.0000
Avg Obs:,10.0000,Distribution:,"F(2,43)"
Min Obs:,10.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-2.2823,11.343,-0.2012,0.8415,-25.159,20.594
Marketing,1.3673,0.0838,16.319,0.0000,1.1983,1.5362
RD,2.3007,0.0451,50.962,0.0000,2.2097,2.3918


In [109]:
#Fixed effects -  Differences between time periods
model = PanelOLS(y, X, time_effects=True)  
fe_results2 = model.fit()
fe_results2.summary

0,1,2,3
Dep. Variable:,Revenue,R-squared:,0.9860
Estimator:,PanelOLS,R-squared (Between):,0.9756
No. Observations:,50,R-squared (Within):,0.9844
Date:,"Wed, May 29 2024",R-squared (Overall):,0.9839
Time:,13:18:53,Log-likelihood,-179.10
Cov. Estimator:,Unadjusted,,
,,F-statistic:,1334.7
Entities:,5,P-value,0.0000
Avg Obs:,10.0000,Distribution:,"F(2,38)"
Min Obs:,10.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-2.0933,11.329,-0.1848,0.8544,-25.027,20.841
Marketing,1.3096,0.0877,14.934,0.0000,1.1321,1.4871
RD,2.3375,0.0484,48.341,0.0000,2.2396,2.4354


In [111]:
#Fixed effects - Differences between entities and time periods
model = PanelOLS(y, X, entity_effects=True, time_effects=True)  
fe_results3 = model.fit()
fe_results3.summary

0,1,2,3
Dep. Variable:,Revenue,R-squared:,0.9868
Estimator:,PanelOLS,R-squared (Between):,0.9745
No. Observations:,50,R-squared (Within):,0.9845
Date:,"Wed, May 29 2024",R-squared (Overall):,0.9838
Time:,13:18:53,Log-likelihood,-175.71
Cov. Estimator:,Unadjusted,,
,,F-statistic:,1271.0
Entities:,5,P-value,0.0000
Avg Obs:,10.0000,Distribution:,"F(2,34)"
Min Obs:,10.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-5.6881,11.768,-0.4834,0.6319,-29.603,18.227
Marketing,1.3355,0.0910,14.676,0.0000,1.1506,1.5205
RD,2.3437,0.0495,47.394,0.0000,2.2432,2.4442


In [113]:
#Random effects 
model = RandomEffects(y, X)
re_results = model.fit()
re_results.summary

0,1,2,3
Dep. Variable:,Revenue,R-squared:,0.9845
Estimator:,RandomEffects,R-squared (Between):,0.9750
No. Observations:,50,R-squared (Within):,0.9848
Date:,"Wed, May 29 2024",R-squared (Overall):,0.9842
Time:,13:18:53,Log-likelihood,-183.99
Cov. Estimator:,Unadjusted,,
,,F-statistic:,1491.9
Entities:,5,P-value,0.0000
Avg Obs:,10.0000,Distribution:,"F(2,47)"
Min Obs:,10.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-0.6064,10.979,-0.0552,0.9562,-22.693,21.480
Marketing,1.3542,0.0806,16.795,0.0000,1.1920,1.5164
RD,2.2985,0.0437,52.540,0.0000,2.2105,2.3865


In [115]:
comparison = compare({'Fixed Effects': fe_results1, 'Random Effects': re_results})
comparison

0,1,2
,Fixed Effects,Random Effects
Dep. Variable,Revenue,Revenue
Estimator,PanelOLS,RandomEffects
No. Observations,50,50
Cov. Est.,Unadjusted,Unadjusted
R-squared,0.9848,0.9845
R-Squared (Within),0.9848,0.9848
R-Squared (Between),0.9744,0.9750
R-Squared (Overall),0.9842,0.9842
F-statistic,1396.8,1491.9
