In [2]:
import pandas as pd
import statsmodels.api as sm

# Step 1: Prepare the dataset
data = {
    'Y': [137,118,124,124,120,129,122,142,128,114,132,130,130,112,132,117,134,132,121,128],
    'W': [0,1,1,1,0,1,1,0,0,1,1,0,0,1,0,1,0,0,1,1],
    'X': [19.8,23.4,27.7,24.6,21.5,25.1,22.4,29.3,20.8,20.2,27.3,24.5,22.9,18.4,24.2,21.0,25.9,23.2,21.6,22.8]
}
df = pd.DataFrame(data)

# Step 2: Add constant for intercept
X = sm.add_constant(df[['W', 'X']])
y = df['Y']

# Step 3: Fit the linear regression model
model = sm.OLS(y, X).fit()

# Step 4: Print results for part b)
tau_hat = model.params['W']
p_value = model.pvalues['W']

print("Estimated Average Treatment Effect (τ̂):", round(tau_hat, 3))
print("p-value for τ̂:", round(p_value, 4))
print("\nInterpretation:")
if p_value < 0.05:
    print("→ The treatment effect is statistically significant at the 5% level.")
else:
    print("→ The treatment effect is NOT statistically significant at the 5% level.")

# Step 5: Explanation for part c)
print("\nUnder the following assumptions, τ̂ can be interpreted causally:")
print("1. Unconfoundedness: No unmeasured confounders; treatment is as good as random given X.")
print("2. Linearity: The relationship between engagement, treatment, and spending is linear.")
print("3. SUTVA: No interference between corporations and only one version of treatment per unit.")



Estimated Average Treatment Effect (τ̂): -9.106
p-value for τ̂: 0.0004

Interpretation:
→ The treatment effect is statistically significant at the 5% level.

Under the following assumptions, τ̂ can be interpreted causally:
1. Unconfoundedness: No unmeasured confounders; treatment is as good as random given X.
2. Linearity: The relationship between engagement, treatment, and spending is linear.
3. SUTVA: No interference between corporations and only one version of treatment per unit.
