In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

np.random.seed(0)
n_periods = 100
n_cross_section = 10
n_obs = n_periods * n_cross_section
data = pd.DataFrame({
    'Time': np.repeat(range(n_periods), n_cross_section),
    'Cross_Section': np.tile(range(n_cross_section), n_periods),
    'X1': np.random.randn(n_obs),
    'X2': np.random.randn(n_obs),
    'X3': np.random.randn(n_obs),
    'Y': np.random.randn(n_obs),
})

data.sort_values(by=['Cross_Section', 'Time'], inplace=True)
print(data.shape)
data.head()

(1000, 6)


Unnamed: 0,Time,Cross_Section,X1,X2,X3,Y
0,0,0,1.764052,0.555963,-1.532921,1.593274
10,1,0,0.144044,-1.00033,0.371173,0.279196
20,2,0,-2.55299,1.015665,-1.913743,-2.810668
30,3,0,0.154947,-0.753704,0.743554,0.966306
40,4,0,-1.048553,1.669251,-1.216077,-0.067945


In [2]:
cross_data = []

for cross_section, df in data.groupby('Cross_Section'):
    x = sm.add_constant(df[['X1', 'X2', 'X3']])
    y = df['Y'] 
    model = sm.OLS(y, x)
    result = model.fit()
    cross_data.append(result.params)

cross_data = pd.DataFrame(cross_data)
print(cross_data.shape)
cross_data.head()

(10, 4)


Unnamed: 0,const,X1,X2,X3
0,-0.060058,0.203161,0.049822,0.105252
1,0.165471,-0.16363,0.053271,-0.10884
2,-0.128742,-0.031212,0.03847,-0.08062
3,-0.117634,0.093704,0.244994,0.083918
4,-0.086229,-0.070633,-0.09068,-0.037857


In [3]:
# The regressors in every regression are the same collection of i. 
# Only the dependent variable changes from one regression to the other.

time_data = []
x = cross_data

for time, df in data.groupby('Time'):
    y = df['Y'].values

    model = sm.OLS(y, x)
    result = model.fit()
    time_data.append(result.params)

time_data = pd.DataFrame(time_data)
print(time_data.shape)
time_data.head()

(100, 4)


Unnamed: 0,const,X1,X2,X3
0,4.091892,3.229014,2.053984,2.874924
1,0.969862,0.419557,-1.837971,1.003579
2,3.033017,-4.763284,-2.797892,0.882572
3,1.423884,5.716954,-3.946337,-3.697416
4,-0.321093,-4.794623,-0.898722,4.78878


In [4]:

print("Mean of betas", time_data.mean(axis=0), sep="\n")
print()
print("Standard deviation of betas", time_data.std(axis=0), sep="\n")

Mean of betas
const    1.038324
X1      -0.041370
X2      -0.040311
X3      -0.100653
dtype: float64

Standard deviation of betas
const    2.875527
X1       2.927050
X2       2.954771
X3       4.133308
dtype: float64
