In [1]:
import warnings
from dataclasses import replace

import numpy as np

from scripts.helpers import run_experiment
from scripts.propensity import get_propensity_scores
from variables.variables import *

warnings.filterwarnings("ignore")

# Hyperparameter Grid Search Explained

![Alt Text](https://media.giphy.com/media/v1.Y2lkPTc5MGI3NjExb2RsbzFrOHhzdzJ4Nm9pd2kzaTh1NjFpb2tpdDhlaWdsM3RoMHhocCZlcD12MV9naWZzX3NlYXJjaCZjdD1n/4ah0igooeeQYQNifeE/giphy.gif)

Here’s what’s happening in this notebook cell — made digestible:

1. **Setting up the grids**  
   - `QUANTILE_GRID`: different quantile thresholds we want to test  
   - `CLIPPING_GRID`: pairs of low/high clipping values to keep extreme data in check  

2. **Looping over exposures**  
   For each exposure (the variable/outcome we’re modeling):  
   - We start with a `BASE_CONFIG` and tweak it with the exposure-specific method and cutoff  
   - We generate propensity scores (`df, X, shap_values`) to understand the data landscape  

3. **Grid search for robustness**  
   - We systematically try **all combinations** of quantiles and clipping values  
   - Each configuration is tested using `run_experiment`  
   - If **any configuration passes the strict criteria**, the exposure is marked as robust

**TL;DR:** We are stress-testing each exposure across multiple hyperparameter settings to identify robust configurations while keeping the process transparent and reproducible.  

This approach ensures that the findings are **stable**, **robust**, and scientifically reliable — all while exploring the parameter space efficiently.


In [None]:
# ================================================================
# Hyperparameter grids
# ================================================================

QUANTILE_GRID = np.array([0.0, 0.001, 0.01, 0.0125, 0.015, 0.0175, 0.025, 0.05, 0.075])
CLIPPING_PERCENTS = np.array([2.5, 3.5, 5.0, 7.0, 7.5, 10.0, 12.5])
CLIPPING_GRID = [(p, 100.0 - p) for p in CLIPPING_PERCENTS]

# ================================================================
# Loop
# ================================================================

for exposure, vals in EXPOSURES.items():
    print(f"\n{exposure}")

    base_cfg = replace(
        BASE_CONFIG,
        method=vals["method"],
        limit=vals["cutoff"],
    )

    df, kwargs, X, shap_values = get_propensity_scores(
        exposure=exposure,
        config=base_cfg.__dict__,
        variables=variable_config,
        file=DATAFRAME_PATH,
    )

    passed_strict = any(
        run_experiment(
            config=replace(
                base_cfg,
                q=float(q),
                clip=clip,
            ).__dict__,
            variable_config=variable_config,
            df=df,
            kwargs=kwargs,
            X=X,
            shap_values=shap_values,
        )
        == "PASS_STRICT"
        for q in QUANTILE_GRID
        for clip in CLIPPING_GRID
    )