# Problem Set 2

Solutions to Computational Problems

## Instrumental Variables

--- 

## Problem 3

In [1]:
from functools import partial
from itertools import product

import numpy as np
import pandas as pd
from joblib import Parallel, delayed

### Functions

In [2]:
def _simulate_from_model(
    gamma: float,
    beta: float,
    n_samples: int,
    n_sim: int,
    rng: np.random.Generator,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Simulate data from the model.

    Args:
        gamma (float): Slope parameter of x onto z.
        beta (float): Slope parameter of y onto x.
        n_samples (int): Number of samples per simulation.
        n_sim (int): Number of simulations.
        rng (np.random.Generator): Random number generator.

    Returns:
        - np.ndarray: Outcomes of shape (n_sim, n_samples).
        - np.ndarray: Regressors of shape (n_sim, n_samples).
        - np.ndarray: Instruments of shape (n_sim, n_samples).

    """
    mean = np.array([1, 0, 0])
    cov = np.array(
        [
            [1, 0, 0],
            [0, 1, 0.8],
            [0, 0.8, 1],
        ],
    )

    mvnormal = rng.multivariate_normal(mean=mean, cov=cov, size=(n_samples, n_sim))

    z, e, v = mvnormal.swapaxes(0, 2)

    x = z * gamma + v
    y = x * beta + e

    return y, x, z

In [3]:
def _ols_1d(y: np.ndarray, x: np.ndarray) -> np.ndarray:
    """Fast estimation of coefficient in OLS model for 1d x and y.

    Args:
        y (np.ndarray): Outcomes of shape (n_sim, n_samples).
        x (np.ndarray): Regressors of shape (n_sim, n_samples).

    Returns:
        np.ndarray: Coefficients of shape (n_sim,).

    """
    return np.sum(x * y, axis=1) / np.sum(x**2, axis=1)

In [4]:
def _ols_coef_and_se(y: np.ndarray, x: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    """Fast estimation of coefficient and its se in OLS model for 1d x and y.

    Args:
        y (np.ndarray): Outcomes of shape (n_sim, n_samples).
        x (np.ndarray): Regressors of shape (n_sim, n_samples).

    Returns:
        - np.ndarray: Coefficients of shape (n_sim,).
        - np.ndarray: Standard errors of shape (n_sim,).

    """
    sum_x_squared = np.sum(x**2, axis=1)
    coef = np.sum(x * y, axis=1) / sum_x_squared
    residuals = y - x * coef.reshape(-1, 1)
    var = np.mean(residuals**2, axis=1) / sum_x_squared
    return coef, np.sqrt(var)

In [5]:
def _iv_coef_and_se(
    y: np.ndarray,
    x: np.ndarray,
    z: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
    """Fast estimation of coefficient and its se in IV model for 1d x, z, and y.

    Args:
        y (np.ndarray): Outcomes of shape (n_sim, n_samples).
        x (np.ndarray): Regressors of shape (n_sim, n_samples).
        z (np.ndarray): Instruments of shape (n_sim, n_samples).

    Returns:
        - np.ndarray: Coefficients of shape (n_sim,).
        - np.ndarray: Standard errors of shape (n_sim,).

    """
    sum_z_x = np.sum(z * x, axis=1)
    coef = np.sum(z * y, axis=1) / sum_z_x
    residuals = y - x * coef.reshape(-1, 1)
    var = sum_z_x ** (-2) * np.sum(z**2, axis=1) * np.mean(residuals**2, axis=1)
    return coef, np.sqrt(var)

In [6]:
def _simulation(
    gamma: float,
    n_samples: int,
    n_sim: int,
    rng: np.random.Generator,
) -> pd.DataFrame:
    """Perform a monte carlo simulation.

    Args:
        gamma (float): Slope parameter of x onto z.
        n_samples (int): Number of samples per simulation.
        n_sim (int): Number of simulations.
        rng (np.random.Generator): Random number generator.

    Returns:
        pd.DataFrame: Simulation result.

    """
    # simulate data
    y, x, z = _simulate_from_model(
        gamma=gamma,
        beta=1,
        n_samples=n_samples,
        n_sim=n_sim,
        rng=rng,
    )

    # estimate coefficients
    coef_ols = _ols_1d(y, x)

    coef_iv, coef_iv_se = _iv_coef_and_se(y, x, z)

    gamma_estimate, gamma_se = _ols_coef_and_se(x, z)

    # compute coverage of iv estimate
    lower = coef_iv - 1.96 * coef_iv_se
    upper = coef_iv + 1.96 * coef_iv_se

    coverage = np.logical_and(lower < 1, upper > 1)

    # compute f-test
    f_test_iv = (gamma_estimate / gamma_se) ** 2
    f_test_iv_larger_10 = f_test_iv > 10

    # compute conditional coverage
    _conditional_coverage = coverage[f_test_iv_larger_10].mean()

    # collect results
    result = {
        "Coefficient - OLS": coef_ols,
        "Coefficient - IV": coef_iv,
        "Standard Error - IV": coef_iv_se,
        "Coverage - IV": coverage,
        "Gamma estimate": gamma_estimate,
        "Gamma se": gamma_se,
        "Prob. F > 10": f_test_iv_larger_10,
    }

    # take mean across simulations
    result = pd.DataFrame(result).mean(axis=0)
    result["Cond. coverage"] = _conditional_coverage  # is already 'meaned'

    return result

### Computation

In [7]:
rng = np.random.default_rng(54321)

gamma_grid = np.linspace(0, 5 / 25, num=6)

simulation = partial(_simulation, n_sim=10_000, rng=rng)

In [8]:
parameter_grid = list(product(gamma_grid, [625, 2_500]))

raw_result = Parallel(n_jobs=8)(
    delayed(simulation)(gamma=gamma, n_samples=n_samples)
    for gamma, n_samples in parameter_grid
)

In [9]:
result = pd.DataFrame(
    raw_result,
    index=pd.MultiIndex.from_tuples(parameter_grid, names=["gamma", "No. samples"]),
).T

In [10]:
result.round(3)

gamma,0.00,0.00,0.04,0.04,0.08,0.08,0.12,0.12,0.16,0.16,0.20,0.20
No. samples,625,2500,625,2500,625,2500,625,2500,625,2500,625,2500
Coefficient - OLS,1.8,1.8,1.798,1.797,1.79,1.79,1.778,1.777,1.761,1.761,1.741,1.741
Coefficient - IV,2.766,0.052,0.614,0.769,0.565,0.973,0.945,0.989,0.973,0.994,0.983,0.996
Standard Error - IV,35190.751,39321.618,724.55,17.968,262.636,0.192,0.285,0.122,0.193,0.09,0.149,0.072
Coverage - IV,0.87,0.866,0.903,0.927,0.928,0.948,0.94,0.954,0.946,0.952,0.95,0.951
Gamma estimate,0.0,0.0,0.04,0.04,0.08,0.08,0.12,0.12,0.16,0.16,0.2,0.2
Gamma se,0.028,0.014,0.028,0.014,0.028,0.014,0.028,0.014,0.028,0.014,0.028,0.014
Prob. F > 10,0.001,0.001,0.042,0.377,0.376,0.995,0.86,1.0,0.992,1.0,1.0,1.0
Cond. coverage,0.0,0.0,0.383,0.817,0.82,0.947,0.93,0.954,0.946,0.952,0.95,0.951
