## Causal Modeling

In [2]:
from numpy.random import normal as rnorm
import numpy as np
import pandas as pd
import statsmodels.api as sm



### Confounding Demo

In [11]:
def get_coefs(n = 100, true = 1):
    U = rnorm(size=n)                 # Unmeasured confounder
    X = 0.5 * U + rnorm(size=n)       # Treatment influenced by U
    Y = true * X + U + rnorm(size=n)  # Outcome influenced by X and U

    data = pd.DataFrame({'X': X, 'U': U, 'Y': Y})

    # Fit a linear regression model with and 
    # without adjusting for the unmeasured confounder
    model = sm.OLS(data['Y'], sm.add_constant(data['X'])).fit()
    model2 = sm.OLS(data['Y'], sm.add_constant(data[['X', 'U']])).fit()
    return model.params['X'], model2.params['X']

def simulate_confounding(nreps = 100, n = 100, true=1):
    results = []
    for _ in range(nreps):
        results.append(get_coefs(n, true))

    results = np.mean(results, axis=0)

    return pd.DataFrame({
        'true': true,
        'estimate_1': results[0],
        'estimate_2': results[1],        
    }, index=['X']).round(3)
        
        
simulate_confounding(n=1000, nreps=500)

Unnamed: 0,true,estimate_1,estimate_2
X,1,1.4,1.0


## Guided Exploration

In [9]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

# Set seed for reproducibility
np.random.seed(42)

# Generate synthetic data
n = 2500
x = np.random.normal(size=n)          # the feature
y = np.random.normal(size=n)          # the target (no relation to x)
z = x + y + np.random.normal(size=n)  # the collider

data = pd.DataFrame({'x': x, 'y': y, 'z': z})

# Fit linear models
model_without_z = LinearRegression().fit(data[['x']], data['y'])
model_with_z = LinearRegression().fit(data[['x', 'z']], data['y'])

# Compare x coefficients
pd.DataFrame({
    'estimate_1': model_without_z.coef_[0],
    'estimate_2': model_with_z.coef_[0]
}, index=['x']).round(3)

Unnamed: 0,estimate_1,estimate_2
x,-0.011,-0.495
