In [1]:
import numpy as np
import statsmodels.formula.api as smf
import pandas as pd

In [2]:
cps85 = pd.read_csv("CPS85.txt")
cps85.head()

Unnamed: 0,logwage,educ,exper,fe,married,age,south,nonwh,hisp,union
0,2.1972,10,27,0,1,43,0,0,0,0
1,1.7047,12,20,0,1,38,0,0,0,0
2,1.335,12,4,1,0,22,0,0,0,0
3,2.3514,12,29,1,1,47,0,0,0,0
4,2.708,12,40,0,1,58,0,0,0,1


In [3]:
cps85['exper2'] = cps85['exper'] ** 2
print(cps85[['exper', 'exper2']].head())

   exper  exper2
0     27     729
1     20     400
2      4      16
3     29     841
4     40    1600


In [4]:
cps85['fe'] = cps85['fe'].astype('category')
cps85['married'] = cps85['married'].astype('category')

In [5]:
formula = 'logwage ~ educ + exper + exper2 + fe + married + fe:married'

In [6]:
model1 = smf.ols(formula=formula, data=cps85).fit()
print("\nRegression Model Summary:")
print(model1.summary())


Regression Model Summary:
                            OLS Regression Results                            
Dep. Variable:                logwage   R-squared:                       0.337
Model:                            OLS   Adj. R-squared:                  0.329
Method:                 Least Squares   F-statistic:                     44.37
Date:                Tue, 15 Oct 2024   Prob (F-statistic):           7.23e-44
Time:                        10:34:22   Log-Likelihood:                -291.66
No. Observations:                 531   AIC:                             597.3
Df Residuals:                     524   BIC:                             627.2
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
Inter

## OLS replication using MATH

In [7]:
N = len(cps85['logwage'])
intercept = np.ones((N, 1))

educ = cps85['educ'].values.reshape(N, 1)       
exper = cps85['exper'].values.reshape(N, 1)      
exper2 = cps85['exper2'].values.reshape(N, 1)

fe = cps85['fe'].astype('category').cat.codes.values.reshape(N, 1)
married = cps85['married'].astype('category').cat.codes.values.reshape(N, 1)
fe_married = fe * married

In [8]:
X = np.hstack((intercept, educ, exper, exper2, fe, married, fe_married))
y = cps85['logwage'].values.reshape(N, 1)
K = X.shape[1]

In [10]:
beta_hat = np.linalg.inv(X.T @ X) @ X.T @ y
u_hat = y - X @ beta_hat
sig2_hat = ( 1/(N-K) ) * np.sum(u_hat ** 2);
v_hat = (1 / (N - K)) * np.linalg.inv (1/N * X.T @ X)
v_hat_ols = 1/N * v_hat
se_OLS = np.sqrt(np.diag(v_hat_ols))
print(se_OLS)

[1.20821680e-02 8.07724694e-04 5.62838530e-04 1.21596008e-05
 6.59380253e-03 5.72754774e-03 8.10339079e-03]
