# Problem Set 2

Solutions to Computational Problems

## Problem 2

In [1]:
from functools import partial
from itertools import product

import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.stats import norm

In [2]:
def simulate_from_model(
    n_sim: int,
    n_periods: int,
    alpha: float,
    rng: np.random.Generator,
):
    """Simulate from AR(1) model.

    Args:
        n_sim (int): Number of simulations.
        n_periods (int): Number of periods.
        alpha (float): AR(1) parameter.
        rng (np.random.Generator): Random number generator.

    Returns:
        np.ndarray: Simulated data.

    """
    # Draw error terms

    cov = np.identity(n_periods + 1)
    cov[0, 0] /= 1 - alpha**2

    error = rng.multivariate_normal(mean=np.zeros(n_periods + 1), cov=cov, size=n_sim).T

    # Initialize starting value

    y = np.empty_like(error)
    y[0, :] = error[0, :]

    # Simulate

    for t in range(1, n_periods + 1):
        y[t, :] = alpha * y[t - 1, :] + error[t, :]

    return y[1:, :]

In [9]:
def ols_coef(y: np.ndarray):
    """Computes the OLS estimator of the AR(1) coefficient.

    Args:
        y (np.ndarray): Time series data. Has shape (n_periods, n_sim).

    Returns:
        np.ndarray: OLS estimate of the AR(1) coefficient. Has shape (n_sim,).

    """
    y_lagged = y[:-1].T
    y = y[1:].T

    coefs = []
    for _y_lag, _y in zip(y_lagged, y, strict=True):
        coef, *_ = np.linalg.lstsq(_y_lag.reshape(-1, 1), _y, rcond=None)
        coefs.append(float(coef))

    return np.array(coefs)

In [10]:
def ols_se(coef: np.ndarray, n_periods: int):
    """Computes the standard error of the OLS estimator of the AR(1) coefficient.

    Args:
        coef (np.ndarray): The OLS estimate of the AR(1) coefficient. Has shape (n_sim,)
        n_periods (int): The number of periods in the time series.

    Returns:
        np.ndarray: The standard error of the OLS estimator of the AR(1) coefficient.
            Has shape (n_sim,).

    """
    return np.sqrt(np.abs(1 - coef**2) / n_periods)

In [11]:
def ols_confidence_interval(
    coef: np.ndarray,
    se: np.ndarray,
    significance_level: float,
):
    """Computes the confidence interval for the OLS coefficient.

    Args:
        coef (np.ndarray): The OLS estimate of the AR(1) coefficient. Has shape (n_sim,)
        se (np.ndarray): The standard error of the OLS estimator. Has shape (n_sim,)
        significance_level (float): The significance level.

    Returns:
        - np.ndarray: The lower bound of the confidence interval. Has shape (n_sim,).
        - np.ndarray: The upper bound of the confidence interval. Has shape (n_sim,).

    """
    lower = coef - norm.ppf(1 - significance_level / 2) * se
    upper = coef + norm.ppf(1 - significance_level / 2) * se
    return lower, upper

In [12]:
def _simulation(
    n_sim: int,
    n_periods: int,
    alpha: float,
    significance_level: float,
    rng: np.random.Generator,
):
    """Run a monte carlo simulation compute metrics.

    Args:
        n_sim (int): Number of simulations.
        n_periods (int): Number of periods.
        alpha (float): AR(1) parameter.
        significance_level (float): Significance level.
        rng (np.random.Generator): Random number generator.

    Returns:
        pd.DataFrame: Dataframe containing the averaged metrics.

    """
    y = simulate_from_model(
        n_sim=n_sim,
        n_periods=n_periods,
        alpha=alpha,
        rng=rng,
    )

    coef = ols_coef(y)

    se = ols_se(coef, n_periods=n_periods)

    lower, upper = ols_confidence_interval(
        coef,
        se=se,
        significance_level=significance_level,
    )

    mse = (coef - alpha) ** 2

    coverage = np.logical_and(lower <= alpha, alpha <= upper)

    result = {
        "Estimate": coef,
        "Standard Error": se,
        "MSE": mse,
        "Coverage": coverage,
    }

    result = pd.DataFrame(result).mean(axis=0)
    return result

## Computation

In [13]:
rng = np.random.default_rng(54321)

alpha_grid = [0.99, 0.95, 0.9, 0.8, 0.7]
n_periods_grid = [200, 800]

simulation = partial(_simulation, n_sim=10_000, rng=rng, significance_level=0.05)

In [15]:
parameter_grid = list(product(alpha_grid, n_periods_grid))

raw_result = Parallel(n_jobs=5)(
    delayed(simulation)(alpha=alpha, n_periods=n_periods)
    for alpha, n_periods in parameter_grid
)

In [65]:
result = pd.DataFrame(
    raw_result,
    index=pd.MultiIndex.from_tuples(parameter_grid, names=["alpha", "No. Periods"]),
).T

In [66]:
result.round(4)

alpha,0.99,0.99,0.95,0.95,0.90,0.90,0.80,0.80,0.70,0.70
No. Periods,200,800,200,800,200,800,200,800,200,800
Estimate,0.982,0.9876,0.941,0.9475,0.891,0.8976,0.7916,0.7978,0.6925,0.6981
Standard Error,0.0123,0.0054,0.0234,0.0112,0.0317,0.0155,0.0429,0.0213,0.0508,0.0253
MSE,0.0003,0.0,0.0007,0.0001,0.0012,0.0003,0.002,0.0005,0.0027,0.0007
Coverage,0.9063,0.9467,0.9505,0.9477,0.951,0.9501,0.9485,0.9491,0.9458,0.9494
