In [None]:
import itertools
import json
import pandas as pd
from src.data import load_data, filter_data
from src.features import build_features
from src.model import train_model, apply_threshold
from src.metrics import compute_metrics

In [None]:
# 1) Define parameter grid
PARAM_GRID = {
    'exclude_subgroups': [None, ['subgroup1'], ['subgroup2']],
    'feature_set': ['base', 'extended'],
    'model': ['logistic', 'random_forest'],
    'threshold_policy': ['fixed', 'optimized']
}

In [None]:
# 2) Generate all universes
def generate_universes(grid):
    keys, values = zip(*grid.items())
    for combo in itertools.product(*values):
        yield dict(zip(keys, combo))
        

In [None]:
# 3) Orchestrator

In [None]:
def run_universe(universe):
    # 1) Load + prefilter
    df = load_data()
    df_filtered = filter_data(df, exclude=universe['exclude_subgroups'])

    # 2) Features + model
    X, y = build_features(df_filtered, universe['feature_set'])
    model = train_model(X, y, universe['model'])
    y_pred, threshold = apply_threshold(model, X, policy=universe['threshold_policy'])

    # 3) Metrics
    metrics = compute_metrics(y, y_pred)
    return metrics


In [None]:
def main():
    all_results = []
    for idx, uni in enumerate(generate_universes(PARAM_GRID), 1):
        print(f"Running universe {idx}/{len(list(generate_universes(PARAM_GRID)))}: {uni}")
        metrics = run_universe(uni)
        metrics.update(uni)
        all_results.append(metrics)

    # Save aggregated
    pd.DataFrame(all_results).to_csv('analyses/results.csv', index=False)


In [None]:
if __name__ == '__main__':
    main()
    