In [None]:
import polars as pl
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Question 3

df1 = pl.read_excel("data/timeinvar-1.xlsx")
df2 = pl.read_excel("data/timevar-1.xlsx")
df = df1.join(df2, on="id", how="left",validate="1:m")
df = df.with_columns(
    hs=pl.when(pl.col("edu") <= 12).then(1).otherwise(0),
    col=pl.when((pl.col("edu") > 12) & (pl.col("edu") <= 16)).then(1).otherwise(0),
    tao1=pl.when((pl.col("edu") <= 16)).then(pl.col("edu")).otherwise(0),
    tao2=pl.when((pl.col("edu") > 16)).then(pl.col("edu")).otherwise(0),
    grad=pl.when(pl.col("edu") > 16).then(1).otherwise(0),
    educab=pl.col("edu") * pl.col("ability"),
    edu2=pl.col("edu")**2,
)
data = df.to_pandas()

In [None]:
formula  = "lwage ~ edu + ability + exper + meduc + feduc + brokenhome + siblings"


model = smf.ols(formula=formula, data=data).fit()
print(model.summary())

In [None]:
formula  = "lwage ~ col + grad + ability + exper + meduc + feduc + brokenhome + siblings"


model = smf.ols(formula=formula, data=data).fit()
print(model.summary())

In [None]:
# the marginal efect is of 0.1747. this means that 1 unit increas in collage means an incrament 
# of 17% in wages. 

In [None]:
# b

formula  = "lwage ~ edu + edu2 + ability + exper + meduc + feduc + brokenhome + siblings"


model = smf.ols(formula=formula, data=data).fit()
print(model.summary())

In [None]:
plt.scatter(data["edu"], data["lwage"])
plt.savefig("assets/fig1.png")

In [None]:
# c

formula  = "lwage ~ edu + educab + ability + exper + meduc + feduc + brokenhome + siblings"

model = smf.ols(formula=formula, data=data).fit(cov_type='HC1')

print(model.summary())

In [None]:
plt.hist(data["edu"])

In [None]:
coeffs = model.params[['edu', 'educab']].values
cov = model.cov_params().loc[['edu', 'educab'], ['edu', 'educab']].values

delta_x = np.array([1, 0.052374]) 

delta_ln_assaults = np.dot(delta_x, coeffs)

std_error = np.sqrt(np.dot(delta_x, np.dot(cov, delta_x)))

lower = delta_ln_assaults - 1.96 * std_error
upper = delta_ln_assaults + 1.96 * std_error

change = (np.exp(delta_ln_assaults) - 1)
ci_lower = (np.exp(lower) - 1)
ci_upper = (np.exp(upper) - 1)

print(f"Predicted change in assaults: {change:.2f}")
print(f"95% CI: [{ci_lower:.2f}, {ci_upper:.2f}]")

In [None]:
#d
# INFO: Missign the second part of (d)

formula  = "lwage ~ edu + edu2 + educab + ability + exper + meduc + feduc + brokenhome + siblings"

model = smf.ols(formula=formula, data=data).fit(cov_type='HC1')

print(model.summary())

In [None]:
mean_ability = data['ability'].mean()
low_ability_data = data[data['ability'] < mean_ability]
high_ability_data = data[data['ability'] >= mean_ability]

low_ability_lwage = model.predict(low_ability_data)
high_ability_lwage = model.predict(high_ability_data)


In [None]:

# Plotting for low-ability individuals
plt.scatter(low_ability_data['edu'], low_ability_lwage, label='Low Ability', color='blue')

# Plotting for high-ability individualss
plt.scatter(high_ability_data['edu'], high_ability_lwage, label='High Ability', color='red')

plt.xlabel('Years of Education')
plt.ylabel('Predicted Log Wage')
plt.savefig("assets/fig3.png")


In [None]:
formula  = "lwage ~ tao1 + tao2 + ability + exper + meduc + feduc + brokenhome + siblings"


model = smf.ols(formula=formula, data=data).fit()
print(model.summary())