In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from hidmed import *
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt

## Tune hyperparameters for each setup and sample size combination

In [None]:
def tune_parameters(xdim, zdim, wdim, mdim, udim, setup, n, folds=2, seed=0):
    """Tune the parameters of the estimators for a given setup."""
    assert setup in ["a", "b", "c"], "Invalid setup. Must be 'a', 'b', or 'c'."
    datagen = LinearHidMedDGP(xdim, zdim, wdim, mdim, udim, setup=setup, seed=seed)

    # tune all hyperparameters using setup "c"
    estimator = ProximalMultiplyRobust(
        generalized_model=True,
        folds=folds,
        num_runs=200,
        n_jobs=1,
    )
    dataset = datagen.sample_dataset(n, seed=seed + 1)
    estimator.fit(dataset)
    return estimator.params


# set up
seed = 0
setups = ["a", "b", "c"]
sample_sizes = np.array([300, 1500, 3000, 6000])

In [None]:
# tuned_parameters = {}
# for setup in setups:
#     for n in sample_sizes:
#         print(f"1d-case, n={n}, setup={setup}")
#         tuned_parameters[1, setup, n] = tune_parameters(
#             1, 1, 1, 1, 1, setup, n, folds=2, seed=seed
#         )
#         print("\n")

#         print(f"5d-case, n={n}, setup={setup}")
#         tuned_parameters[5, setup, n] = tune_parameters(
#             5, 2, 2, 2, 1, setup, n, folds=2, seed=seed
#         )
#         print("\n")

# !mkdir -p assets
# pickle.dump(tuned_parameters, open("assets/tuned_parameters.pkl", "wb"))

## Evaluate estimators

In [None]:
def evaluate_estimator(predictor, datagen, n, num_runs=100):
    """Evaluate the performance of a predictor."""
    true_psi = datagen.true_psi()
    res = {
        "estimate": np.zeros(num_runs),
        "bias": np.zeros(num_runs),
        "mse": np.zeros(num_runs),
        "anb": np.zeros(num_runs),
        "covered": np.zeros(num_runs),
        "ci_width": np.zeros(num_runs),
    }
    for i in tqdm(range(num_runs)):
        dataset = datagen.sample_dataset(n, seed=i + 2)
        point_estimates = predictor.fit(dataset, reduce=False)
        res["estimate"][i] = np.mean(point_estimates)
        res["bias"][i] = res["estimate"][i] - true_psi
        res["mse"][i] = calculate_mse(point_estimates, true_psi)
        res["anb"][i] = absolute_normalized_bias(point_estimates, true_psi)
        res["covered"][i] = is_covered(point_estimates, true_psi)
        res["ci_width"][i] = confidence_interval(point_estimates)
    return res

In [None]:
params = pickle.load(open("assets/tuned_parameters.pkl", "rb"))

results = {}

for setup in setups:
    for n in sample_sizes:
        # 1d case
        for dim in [1, 5]:
            if dim == 1:
                xdim, zdim, wdim, mdim, udim = 1, 1, 1, 1, 1
            else:
                xdim, zdim, wdim, mdim, udim = 5, 2, 2, 2, 1

            datagen = LinearHidMedDGP(
                xdim, zdim, wdim, mdim, udim, setup=setup, seed=seed
            )
            true_psi = datagen.true_psi()

            for estimator in [
                ProximalMultiplyRobust,
                ProximalInverseProbWeighting,
                ProximalOutcomeRegression,
            ]:
                print(
                    f"Running {estimator.__name__} for {dim}-dimensional case, n={n}, setup={setup}"
                )
                predictor = estimator(
                    generalized_model=(setup == "c" or setup == "b"),
                    folds=2,
                    verbose=False,
                    **params[xdim, setup, n],
                )
                results[dim, setup, n, estimator.__name__] = evaluate_estimator(
                    predictor, datagen, n, num_runs=100
                )

pickle.dump(results, open("assets/results.pkl", "wb"))

## Visualize results