In [1]:
import pandas as pd
import numpy as np
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant


In [2]:
# Create sample panel data
np.random.seed(123)

# Generate sample data
n_units = 50  # number of units (e.g., companies, countries)
n_periods = 10  # number of time periods
n_obs = n_units * n_periods


In [9]:
# Create panel structure
units = np.repeat(range(n_units), n_periods)
time = np.tile(range(n_periods), n_units)


In [12]:
# Generate independent variable
X = np.random.normal(0, 1, n_obs)


In [14]:
# Generate fixed effects
unit_effects = np.repeat(np.random.normal(0, 1, n_units), n_periods)
time_effects = np.tile(np.random.normal(0, 1, n_periods), n_units)


[-1.71590149 -0.67837381  1.47440502 -0.52386001 -0.1359824  -0.80556199
 -0.93375149 -0.47026923 -0.66105597  0.9575018  -1.71590149 -0.67837381
  1.47440502 -0.52386001 -0.1359824  -0.80556199 -0.93375149 -0.47026923
 -0.66105597  0.9575018  -1.71590149 -0.67837381  1.47440502 -0.52386001
 -0.1359824  -0.80556199 -0.93375149 -0.47026923 -0.66105597  0.9575018
 -1.71590149 -0.67837381  1.47440502 -0.52386001 -0.1359824  -0.80556199
 -0.93375149 -0.47026923 -0.66105597  0.9575018  -1.71590149 -0.67837381
  1.47440502 -0.52386001 -0.1359824  -0.80556199 -0.93375149 -0.47026923
 -0.66105597  0.9575018  -1.71590149 -0.67837381  1.47440502 -0.52386001
 -0.1359824  -0.80556199 -0.93375149 -0.47026923 -0.66105597  0.9575018
 -1.71590149 -0.67837381  1.47440502 -0.52386001 -0.1359824  -0.80556199
 -0.93375149 -0.47026923 -0.66105597  0.9575018  -1.71590149 -0.67837381
  1.47440502 -0.52386001 -0.1359824  -0.80556199 -0.93375149 -0.47026923
 -0.66105597  0.9575018  -1.71590149 -0.67837381  1.4

In [15]:
# Generate dependent variable with fixed effects
beta = 0.5
Y = (beta * X + unit_effects + time_effects + np.random.normal(0, 0.1, n_obs))


In [21]:
# Create DataFrame
df = pd.DataFrame({
    'unit': units.astype(int),
    'time': time.astype(int),
    'Y': Y.astype(float),
    'X': X.astype(float)
})

In [23]:
# Create dummy variables for fixed effects
unit_dummies = pd.get_dummies(df['unit'], prefix='unit', drop_first=True).astype(float)
time_dummies = pd.get_dummies(df['time'], prefix='time', drop_first=True).astype(float)
print(unit_dummies)
print(time_dummies)

     unit_1  unit_2  unit_3  unit_4  unit_5  unit_6  unit_7  unit_8  unit_9  \
0       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
1       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
2       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4       0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
..      ...     ...     ...     ...     ...     ...     ...     ...     ...   
495     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
496     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
497     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
498     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
499     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

     unit_10  ...  unit_40  unit_41  unit_42  unit_

In [24]:
# Combine X with dummy variables
X_with_fe = pd.concat([
    df[['X']].astype(float),
    unit_dummies,
    time_dummies
], axis=1)


# Add constant
X_with_fe = add_constant(X_with_fe)

# Run OLS regression
model = OLS(df['Y'], X_with_fe)
results = model.fit()

# Print results
print(results.summary().tables[1])

# Get the coefficient for X (beta)
print("\nEstimated beta coefficient:", results.params['X'])
print("True beta coefficient:", beta)


                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.9804      0.034    -88.592      0.000      -3.047      -2.914
X              0.4937      0.005     99.135      0.000       0.484       0.503
unit_1         1.4222      0.044     32.559      0.000       1.336       1.508
unit_2         0.8289      0.044     18.968      0.000       0.743       0.915
unit_3         2.5514      0.044     58.443      0.000       2.466       2.637
unit_4        -0.5993      0.044    -13.733      0.000      -0.685      -0.514
unit_5         0.6546      0.044     14.948      0.000       0.569       0.741
unit_6         0.6558      0.044     15.021      0.000       0.570       0.742
unit_7         0.7332      0.044     16.781      0.000       0.647       0.819
unit_8         2.1977      0.044     50.252      0.000       2.112       2.284
unit_9        -1.0831      0.044    -24.818      0.0