# Estimate a causal effect between two variables.

Let us take another example, say we want to estimate a causal effect between two variables, pair of independent and dependent variables and we have some idea about possible values of intercept alpha and slope parameter beta.


What we can do is we can randomly sample from normal distribution to generate error terms, dependent and independent variable values. Then we can estimate the coefficient of beta, beta_hat, and repeat this process M = 10000 times. Then by the LLN, the sample mean of these 10000 beta_hats will be an unbiased estimate for the true beta.

In [1]:
import numpy as np
import statsmodels.api as sm
np.random.seed(2021)
mu = 0 
sigma = 1 
n = 100 
# assumed population parameters
alpha = np.repeat(0.5, n)
beta = 1.5

def MC_estimation_slope(M):
    MC_betas = []
    MC_samples = {}

    for i in range(M):
        # randomly sampling from normal distribution as error terms
        e = np.random.normal(mu, sigma, n)
        # generating independent variable by making sure the variance in X is larger than the variance in error terms
        X = 9 * np.random.normal(mu, sigma, n)
        # population distribution using the assumd parameter values alpha/beta
        Y = (alpha + beta * X + e)
        
        # running OLS regression for getting slope parameters
        model = sm.OLS(Y.reshape((-1, 1)), X.reshape((-1, 1)))
        ols_result = model.fit()
        coeff = ols_result.params
        
        MC_samples[i] = Y
        MC_betas.append(coeff)
    MC_beta_hats = np.array(MC_betas).flatten()
    return(MC_samples, MC_beta_hats)
    
MC_samples, MC_beta_hats = MC_estimation_slope(M = 10000)
beta_hat_MC = np.mean(MC_beta_hats)

  import pandas.util.testing as tm


In [2]:
beta_hat_MC

1.5002180550943296