In [6]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
import warnings

warnings.simplefilter("ignore", category=UserWarning)


###############################################################################
# 1. GLOBAL PARAMETERS & SETTINGS
###############################################################################
FP_COST = 10    # Penalty for false positive treatment
FN_COST = 50    # Penalty for false negative (never treated but was sick)
D_COST  = 1     # Penalty per time-step of delay before treating a sick patient
GAMMA   = 0.99  # Discount factor
T_MAX   = 20    # Time horizon (discrete steps 0..T_MAX-1 for each patient)

# Example features
FEATURE_COLS = ["time", "EIT", "NIRS", "EIS"]

###############################################################################
# 2. HELPER FUNCTIONS: SPLITTING & FILTERING
###############################################################################
def split_patients_kfold(df, n_splits=4, seed=0):
    """
    Shuffle unique patient IDs, then split into (n_splits+1) groups:
       G1,...,G_{n_splits}, G_{n_splits+1} (the final holdout).
    """
    rng = np.random.RandomState(seed)
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    n = len(unique_pts)
    splits = {}
    
    for i in range(n_splits + 1):
        start_idx = int(i * n / (n_splits + 1))
        end_idx   = int((i + 1) * n / (n_splits + 1))
        group_name = f"G{i+1}"
        splits[group_name] = set(unique_pts[start_idx:end_idx])
    
    return splits

def filter_by_group(df, pid_set):
    """Return the rows of df whose patient_id is in pid_set."""
    return df[df['patient_id'].isin(pid_set)].copy()


###############################################################################
# 3. ML TRAINING & RISK-SCORE PREDICTIONS
###############################################################################
def train_and_predict_model(model_type, hyperparams, df_train, df_val, feature_cols=FEATURE_COLS):
    """
    Train a classification model (model_type in {catboost, rf, gb})
    with given hyperparams on df_train. Then return predicted probabilities
    (risk scores) for df_val (aligned with df_val).
    """
    X_train = df_train[feature_cols]
    y_train = df_train['label']
    
    if model_type == "catboost":
        model = CatBoostClassifier(**hyperparams, verbose=False)
        model.fit(X_train, y_train)
    elif model_type == "rf":
        model = RandomForestClassifier(**hyperparams, random_state=42)
        model.fit(X_train, y_train)
    elif model_type == "gb":
        model = GradientBoostingClassifier(**hyperparams, random_state=42)
        model.fit(X_train, y_train)
    else:
        raise ValueError(f"Unknown model_type={model_type}")
    
    X_val = df_val[feature_cols]
    risk_scores = model.predict_proba(X_val)[:, 1]  # Probability that label=1
    return risk_scores

def generate_risk_scores_via_cv(df_train_splits, i_val, model_list, param_grid_dict, feature_cols=FEATURE_COLS):
    """
    For cross-validation fold i_val, pick the best (model_type, hyperparams) by AUC.
    
    Returns:
      best_val_scores (np.array): risk scores for df_train_splits[i_val]
      best_model_type, best_hparams, best_auc
    """
    from sklearn.metrics import roc_auc_score
    
    df_val = df_train_splits[i_val]
    X_val  = df_val[feature_cols]
    y_val  = df_val['label'].values
    
    # Build a single training set = union of all G_j except G_i_val
    train_parts = []
    for k, v_df in df_train_splits.items():
        if k != i_val:
            train_parts.append(v_df)
    df_train_full = pd.concat(train_parts, ignore_index=True)
    
    best_model_type = None
    best_hparams    = None
    best_auc        = -999
    best_val_scores = None
    
    # Evaluate each combination
    for m_type in model_list:
        for hyperparams in param_grid_dict[m_type]:
            scores_val = train_and_predict_model(m_type, hyperparams, df_train_full, df_val, feature_cols=feature_cols)
            auc_val = roc_auc_score(y_val, scores_val)
            if auc_val > best_auc:
                best_auc = auc_val
                best_model_type = m_type
                best_hparams    = hyperparams
                best_val_scores = scores_val
    
    return best_val_scores, best_model_type, best_hparams, best_auc


###############################################################################
# 4. SIMULATE POLICIES
###############################################################################
def simulate_policy(df, policy_func):
    """
    df has columns: patient_id, time, label, predicted_risk (optionally),
    policy_func(patient_rows) -> treat_time (int) or None.

    Returns: dict of {cost, precision, recall, avg_treatment_time}.
    """
    results = []
    for pid, rows in df.groupby('patient_id'):
        rows = rows.sort_values('time')
        label = rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(rows)
        
        if treat_time is None:
            treated_flag = 0
            if label == 1:
                cost = FN_COST
            else:
                cost = 0
            tp = 0
            fp = 0
            tt = None
        else:
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time
                tp   = 1
                fp   = 0
            else:
                cost = FP_COST
                tp   = 0
                fp   = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res   = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    
    precision = tp_sum / (tp_sum + fp_sum) if (tp_sum+fp_sum) > 0 else 0.0
    sick_df = df_res[df_res['label'] == 1]
    recall = tp_sum / len(sick_df) if len(sick_df) > 0 else 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 5. BENCHMARK DECISION POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,0.5,10)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    
    for thr in thresholds:
        def policy_func(rows):
            for _, row in rows.iterrows():
                if row['predicted_risk'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    
    return best_thr, best_stats

def dynamic_threshold_random_search(df,
                                    time_steps=T_MAX,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost= float('inf')
    best_stats= None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(rows):
            for _, row in rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['predicted_risk'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec  = thr_vec.copy()
            best_stats= stats
    return best_vec, best_stats

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 5)
    if B_candidates is None:
        B_candidates = np.linspace(0, 0.5, 6)
    
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(rows):
                for _, row in rows.iterrows():
                    t = row['time']
                    thr = np.clip(A*t + B, 0, 1)
                    if row['predicted_risk'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_stats= stats
                best_A    = A
                best_B    = B
    
    return (best_A,best_B), best_stats

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    
    for thr in thresholds:
        def policy_func(rows):
            final_row = rows.loc[rows['time'].idxmax()]
            if final_row['predicted_risk'] >= thr:
                return int(final_row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    
    return best_thr, best_stats

###############################################################################
# 6. DATA-DRIVEN DP
###############################################################################
def assign_buckets(prob, n_buckets=5):
    edges = np.linspace(0,1,n_buckets+1)
    for b in range(n_buckets):
        if edges[b] <= prob < edges[b+1]:
            return b
    return n_buckets - 1

def estimate_transition_and_sick_probs(df_train, T=T_MAX, n_buckets=5):
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows= grp.to_dict('records')
        
        for i, row in enumerate(rows):
            t = int(row['time'])
            b = int(row['risk_bucket'])
            lbl = row['label']
            
            if t < T:
                bucket_counts[t,b] += 1
                sick_counts[t,b]   += lbl
            
            if i < len(rows)-1:
                nxt = rows[i+1]
                t_next = int(nxt['time'])
                b_next = int(nxt['risk_bucket'])
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t,b,b_next] += 1

    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                p_trans[t_, b_, b_] = 1.0  # fallback

    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    
    return p_trans, p_sick

def train_data_driven_dp(p_trans, p_sick,
                         FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX):
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    for b in range(n_buckets):
        cost_treat   = p_sick[T-1,b]*(D*(T-1)) + (1 - p_sick[T-1,b])*FP
        cost_notreat = p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    for t in reversed(range(T)):
        for b in range(n_buckets):
            cost_treat = p_sick[t,b]*(D*t) + (1 - p_sick[t,b])*FP
            if t == T-1:
                cost_wait = gamma * V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next] * V[t+1,b_next]
                cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b]   = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b]   = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_data_driven_dp_policy(V, pi_, T=T_MAX):
    def policy_func(rows):
        for _, row in rows.iterrows():
            t = int(row['time'])
            if t < T:
                b = int(row['risk_bucket'])
                if pi_[t,b] == 1:  # treat
                    return t
        return None
    return policy_func


###############################################################################
# 7. ALGORITHM 2 (Full CV) IMPLEMENTATION
###############################################################################
def run_experiment_algorithm2(
    df_all,
    n_splits=4,
    seed=42,
    model_list=("catboost","rf","gb"),
    param_grid_dict=None):
    """
    Implement Algorithm 2 (Full Cross-Validation) with integrated ML + DP.

    Returns:
      final_table (pd.DataFrame): results on G_{n_splits+1}
      df_val_stats (pd.DataFrame): details per fold in cross-validation
    """
    if param_grid_dict is None:
        # Example small grids
        param_grid_dict = {
            "catboost": [
                {"iterations":50, "depth":3, "learning_rate":0.1},
                {"iterations":50, "depth":4, "learning_rate":0.05},
            ],
            "rf": [
                {"n_estimators":50, "max_depth":3},
                {"n_estimators":100, "max_depth":5},
            ],
            "gb": [
                {"n_estimators":50, "max_depth":3, "learning_rate":0.1},
                {"n_estimators":100,"max_depth":3, "learning_rate":0.05},
            ]
        }

    # 1) Split => G1..G_{n_splits}, G_{n_splits+1}
    splits = split_patients_kfold(df_all, n_splits=n_splits, seed=seed)
    group_dfs = {}
    for group_name, pid_set in splits.items():
        sub_df = filter_by_group(df_all, pid_set)
        group_dfs[group_name] = sub_df
    
    test_name = f"G{n_splits+1}"
    df_test   = group_dfs[test_name]

    # 2) Cross-validation on folds {G1..G_n}
    all_val_stats = []
    for i_val in range(1, n_splits+1):
        val_name = f"G{i_val}"
        df_val = group_dfs[val_name]

        # pick best ML
        val_scores, best_model_type, best_hparams, best_auc = generate_risk_scores_via_cv(
            df_train_splits=group_dfs,
            i_val=val_name,
            model_list=model_list,
            param_grid_dict=param_grid_dict,
            feature_cols=FEATURE_COLS
        )
        df_val = df_val.copy()
        df_val["predicted_risk"] = val_scores
        group_dfs[val_name] = df_val  # store predictions

        # Evaluate benchmark policies on this fold
        # (a) Constant
        thr_const, stats_const = constant_threshold_search(df_val)
        # (b) Dynamic threshold
        thr_vec, stats_dyn = dynamic_threshold_random_search(df_val, seed=999+ i_val)
        # (c) Linear threshold
        (A_lin,B_lin), stats_lin = linear_threshold_search(df_val)
        # (d) Wait-till-end
        thr_wte, stats_wte = wait_till_end_search(df_val)
        # (e) DP
        #     1) train final_model on training folds => get risk for that training set => DP => apply to val
        train_parts = []
        for j in range(1, n_splits+1):
            if j != i_val:
                train_parts.append(group_dfs[f"G{j}"])
        df_train_fold = pd.concat(train_parts, ignore_index=True).copy()
        
        # Retrain "best model" on df_train_fold => produce risk => bucket => DP
        from sklearn.metrics import roc_auc_score
        X_train_f = df_train_fold[FEATURE_COLS]
        y_train_f = df_train_fold['label']
        if best_model_type == "catboost":
            final_model = CatBoostClassifier(**best_hparams, verbose=False)
            final_model.fit(X_train_f, y_train_f)
        elif best_model_type == "rf":
            final_model = RandomForestClassifier(**best_hparams, random_state=42)
            final_model.fit(X_train_f, y_train_f)
        else:
            final_model = GradientBoostingClassifier(**best_hparams, random_state=42)
            final_model.fit(X_train_f, y_train_f)
        
        df_train_fold["predicted_risk"] = final_model.predict_proba(X_train_f)[:,1]
        df_train_fold["risk_bucket"]    = df_train_fold["predicted_risk"].apply(assign_buckets)
        
        p_trans, p_sick = estimate_transition_and_sick_probs(df_train_fold, T=T_MAX, n_buckets=5)
        V, pi_ = train_data_driven_dp(p_trans, p_sick, 
                                      FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX)
        df_val_dp = df_val.copy()
        df_val_dp["risk_bucket"] = df_val_dp["predicted_risk"].apply(assign_buckets)
        dp_policy = make_data_driven_dp_policy(V, pi_, T=T_MAX)
        stats_dp = simulate_policy(df_val_dp, dp_policy)
        
        all_val_stats.append({
            "fold": i_val,
            "best_model_type": best_model_type,
            "best_hparams": best_hparams,
            "AUC_val": roc_auc_score(df_val['label'], df_val['predicted_risk']),

            "const_cost": stats_const["cost"],
            "dyn_cost":   stats_dyn["cost"],
            "lin_cost":   stats_lin["cost"],
            "wte_cost":   stats_wte["cost"],
            "dp_cost":    stats_dp["cost"],
        })
    
    df_val_stats = pd.DataFrame(all_val_stats)

    # 3) Pick a single final model from among folds or do a separate logic.
    #    For simplicity, pick the fold that had the best dp_cost:
    best_fold_idx = df_val_stats["dp_cost"].idxmin()
    fold_rec = df_val_stats.loc[best_fold_idx]
    final_model_type = fold_rec["best_model_type"]
    final_hparams    = fold_rec["best_hparams"]
    
    # 4) Retrain on G1..G_n => evaluate on G_{n+1}
    train_all = []
    for i in range(1, n_splits+1):
        train_all.append(group_dfs[f"G{i}"])
    df_train_all = pd.concat(train_all, ignore_index=True).copy()
    
    X_train_all = df_train_all[FEATURE_COLS]
    y_train_all = df_train_all["label"]
    
    if final_model_type == "catboost":
        final_model = CatBoostClassifier(**final_hparams, verbose=False)
        final_model.fit(X_train_all, y_train_all)
    elif final_model_type == "rf":
        final_model = RandomForestClassifier(**final_hparams, random_state=42)
        final_model.fit(X_train_all, y_train_all)
    else:
        final_model = GradientBoostingClassifier(**final_hparams, random_state=42)
        final_model.fit(X_train_all, y_train_all)

    df_test = df_test.copy()
    df_test["predicted_risk"] = final_model.predict_proba(df_test[FEATURE_COLS])[:,1]
    
    # Evaluate final table
    # (a) Constant
    thr_const, stats_const = constant_threshold_search(df_test)
    # (b) Dynamic
    thr_vec, stats_dyn = dynamic_threshold_random_search(df_test)
    # (c) Linear
    (A_lin,B_lin), stats_lin = linear_threshold_search(df_test)
    # (d) Wait-till-end
    thr_wte, stats_wte = wait_till_end_search(df_test)
    # (e) DP
    df_train_all["predicted_risk"] = final_model.predict_proba(df_train_all[FEATURE_COLS])[:,1]
    df_train_all["risk_bucket"]    = df_train_all["predicted_risk"].apply(assign_buckets)
    p_trans, p_sick = estimate_transition_and_sick_probs(df_train_all, T=T_MAX, n_buckets=5)
    V, pi_ = train_data_driven_dp(p_trans, p_sick, 
                                  FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX)
    df_test_dp = df_test.copy()
    df_test_dp["risk_bucket"] = df_test_dp["predicted_risk"].apply(assign_buckets)
    dp_policy_func = make_data_driven_dp_policy(V, pi_, T=T_MAX)
    stats_dp = simulate_policy(df_test_dp, dp_policy_func)

    final_table = pd.DataFrame({
        "Method": [
            "Constant Threshold",
            "Dynamic Threshold-R",
            "Linear Threshold",
            "Wait Till End",
            "Dynamic Threshold-DP (DataDriven)"
        ],
        "Precision (%)": [
            100*stats_const['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        "Cost": [
            stats_const['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        "Recall (%)": [
            100*stats_const['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        "Treatment Time": [
            stats_const['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })

    return final_table, df_val_stats


###############################################################################
# 8. RUN-ONCE FUNCTION (with a given seed)
###############################################################################
def run_experiment_once(df_all, seed=42, n_splits=4):
    """
    Runs the full Algorithm 2 cross-validation approach for a single random seed.
    Returns the final test table (5 methods) and the cross-validation details.
    """
    final_table, df_cv_details = run_experiment_algorithm2(
        df_all=df_all,
        n_splits=n_splits,
        seed=seed,
        model_list=("catboost","rf","gb"),
        param_grid_dict=None  # default small grid
    )
    return final_table, df_cv_details

###############################################################################
# 9. MAIN: 30 Replications
###############################################################################
def main():
    # 1) Load the data once
    df_all = pd.read_csv("synthetic_patients_with_features.csv")

    # df_all = df_all[df_all['time'] < T_MAX].copy()

    N_REPS = 30
    methods = [
        "Constant Threshold",
        "Dynamic Threshold-R",
        "Linear Threshold",
        "Wait Till End",
        "Dynamic Threshold-DP (DataDriven)"
    ]
    
    # 2) Data structure to hold results across runs
    results_over_runs = {
        m: {'precision': [], 'cost': [], 'recall': [], 'time': []}
        for m in methods
    }
    
    # 3) Loop over 30 seeds
    for rep in range(N_REPS):
        seed_value = 1000 + rep  # or any scheme you like
        print(f"\n=== RUN {rep+1}/{N_REPS}, seed={seed_value} ===")
        
        final_table, _ = run_experiment_once(df_all, seed=seed_value, n_splits=4)
        
        # final_table has columns: Method, Precision (%), Cost, Recall (%), Treatment Time
        # We'll accumulate them in results_over_runs
        for idx, row in final_table.iterrows():
            m = row["Method"]
            results_over_runs[m]["precision"].append(row["Precision (%)"])
            results_over_runs[m]["cost"].append(row["Cost"])
            results_over_runs[m]["recall"].append(row["Recall (%)"])
            results_over_runs[m]["time"].append(row["Treatment Time"])
    
    # 4) Compute mean ± std dev across the 30 runs
    final_rows = []
    for i, m in enumerate(methods):
        prec_arr = np.array(results_over_runs[m]["precision"])
        cost_arr = np.array(results_over_runs[m]["cost"])
        rec_arr  = np.array(results_over_runs[m]["recall"])
        time_arr = np.array(results_over_runs[m]["time"])
        
        prec_mean, prec_std = prec_arr.mean(), prec_arr.std()
        cost_mean, cost_std = cost_arr.mean(), cost_arr.std()
        rec_mean,  rec_std  = rec_arr.mean(),  rec_arr.std()
        time_mean, time_std = time_arr.mean(), time_arr.std()
        
        final_rows.append([
            m,
            f"{prec_mean:.2f} ± {prec_std:.2f}",
            f"{cost_mean:.2f} ± {cost_std:.2f}",
            f"{rec_mean:.2f} ± {rec_std:.2f}",
            f"{time_mean:.2f} ± {time_std:.2f}"
        ])
    
    # 5) Print final summary
    print("\n=== FINAL RESULTS (Mean ± Std Dev over 30 Replications) ===")
    print("{:<28s} {:>18s} {:>18s} {:>18s} {:>18s}".format(
        "Method", "Precision(%)", "Cost", "Recall(%)", "Avg Time"))
    for row in final_rows:
        m, prec_str, cost_str, rec_str, time_str = row
        print(f"{m:<28s} {prec_str:>18s} {cost_str:>18s} {rec_str:>18s} {time_str:>18s}")


if __name__ == "__main__":
    main()


=== RUN 1/30, seed=1000 ===

=== RUN 2/30, seed=1001 ===

=== RUN 3/30, seed=1002 ===

=== RUN 4/30, seed=1003 ===

=== RUN 5/30, seed=1004 ===

=== RUN 6/30, seed=1005 ===

=== RUN 7/30, seed=1006 ===

=== RUN 8/30, seed=1007 ===

=== RUN 9/30, seed=1008 ===

=== RUN 10/30, seed=1009 ===

=== RUN 11/30, seed=1010 ===

=== RUN 12/30, seed=1011 ===

=== RUN 13/30, seed=1012 ===

=== RUN 14/30, seed=1013 ===

=== RUN 15/30, seed=1014 ===

=== RUN 16/30, seed=1015 ===

=== RUN 17/30, seed=1016 ===

=== RUN 18/30, seed=1017 ===

=== RUN 19/30, seed=1018 ===

=== RUN 20/30, seed=1019 ===

=== RUN 21/30, seed=1020 ===

=== RUN 22/30, seed=1021 ===

=== RUN 23/30, seed=1022 ===

=== RUN 24/30, seed=1023 ===

=== RUN 25/30, seed=1024 ===

=== RUN 26/30, seed=1025 ===

=== RUN 27/30, seed=1026 ===

=== RUN 28/30, seed=1027 ===

=== RUN 29/30, seed=1028 ===

=== RUN 30/30, seed=1029 ===

=== FINAL RESULTS (Mean ± Std Dev over 30 Replications) ===
Method                             Precision(%) 

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
import warnings

warnings.simplefilter("ignore", category=UserWarning)

###############################################################################
# GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
GAMMA   = 0.99
T_MAX   = 20
FEATURE_COLS = ["time", "EIT", "NIRS", "EIS"]  # adapt to your CSV columns

###############################################################################
# DATA SPLITTING
###############################################################################
def split_patients_kfold(df, n_splits=4, seed=0):
    """
    Shuffle unique patient IDs, then split into (n_splits+1) groups:
       G1,...,G_{n_splits}, G_{n_splits+1}.
    """
    rng = np.random.RandomState(seed)
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)

    n = len(unique_pts)
    splits = {}
    for i in range(n_splits + 1):
        start_idx = int(i * n / (n_splits + 1))
        end_idx   = int((i + 1) * n / (n_splits + 1))
        group_name = f"G{i+1}"
        pid_subset = unique_pts[start_idx:end_idx]
        splits[group_name] = set(pid_subset)
    return splits

def filter_by_group(df, pid_set):
    return df[df['patient_id'].isin(pid_set)].copy()

###############################################################################
# ML TRAINING & PREDICTIONS
###############################################################################
def train_model(model_type, hyperparams, X_train, y_train):
    """
    Train a classification model (catboost, rf, or gb) with given hyperparams.
    Returns the trained model object.
    """
    if model_type == "catboost":
        model = CatBoostClassifier(**hyperparams, verbose=False, random_state=42)
    elif model_type == "rf":
        model = RandomForestClassifier(**hyperparams, random_state=42)
    elif model_type == "gb":
        model = GradientBoostingClassifier(**hyperparams, random_state=42)
    else:
        raise ValueError(f"Unknown model_type={model_type}")
    model.fit(X_train, y_train)
    return model

def predict_risk(model, X):
    """Return predicted probabilities of label=1."""
    return model.predict_proba(X)[:,1]

###############################################################################
# SIMULATE POLICY (COST, PRECISION, RECALL, etc.)
###############################################################################
def simulate_policy(df, policy_func):
    results = []
    for pid, rows in df.groupby('patient_id'):
        rows = rows.sort_values('time')
        label = rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(rows)
        if treat_time is None:
            # never treated
            if label == 1:
                cost = FN_COST
            else:
                cost = 0
            tp = 0
            fp = 0
            treat_flag = 0
            ttime = None
        else:
            treat_flag = 1
            if label == 1:
                cost = D_COST * treat_time
                tp   = 1
                fp   = 0
            else:
                cost = FP_COST
                tp   = 0
                fp   = 1
            ttime = treat_time
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treat_flag,
            'treat_time': ttime,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()

    treated_df = df_res[df_res['treated']==1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    precision = tp_sum / (tp_sum + fp_sum) if (tp_sum+fp_sum)>0 else 0.0

    sick_df = df_res[df_res['label']==1]
    total_sick = len(sick_df)
    recall = tp_sum / total_sick if total_sick>0 else 0.0

    if len(treated_df)>0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt   = valid_tt.mean() if len(valid_tt)>0 else 0.0
    else:
        avg_tt = 0.0

    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# BENCHMARK THRESHOLD POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    for thr in thresholds:
        def policy_func(rows):
            for _, row in rows.iterrows():
                if row['predicted_risk'] >= thr:
                    return int(row['time'])
            return None
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    return best_thr, best_stats

def dynamic_threshold_random_search(df,
                                    time_steps=T_MAX,
                                    threshold_candidates=[0.0,0.2,0.4,0.6,0.8,1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost= float('inf')
    best_stats= None
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        def policy_func(rows):
            for _, row in rows.iterrows():
                t = int(row['time'])
                if t<time_steps and row['predicted_risk']>=thr_vec[t]:
                    return t
            return None
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec  = thr_vec.copy()
            best_stats= stats
    return best_vec, best_stats

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 5)
    if B_candidates is None:
        B_candidates = np.linspace(0, 0.5, 6)
    best_A, best_B = None, None
    best_cost = float('inf')
    best_stats= None
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(rows):
                for _, row in rows.iterrows():
                    t = row['time']
                    thr = np.clip(A*t + B, 0, 1)
                    if row['predicted_risk'] >= thr:
                        return int(t)
                return None
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A    = A
                best_B    = B
                best_stats= stats
    return (best_A,best_B), best_stats

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost = None, float('inf')
    best_stats= None
    for thr in thresholds:
        def policy_func(rows):
            final_row = rows.loc[rows['time'].idxmax()]
            if final_row['predicted_risk'] >= thr:
                return int(final_row['time'])
            return None
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    return best_thr, best_stats

###############################################################################
# DATA-DRIVEN DP
###############################################################################
def assign_buckets(prob, n_buckets=5):
    edges = np.linspace(0,1,n_buckets+1)
    for b in range(n_buckets):
        if edges[b]<=prob<edges[b+1]:
            return b
    return n_buckets-1

def estimate_transition_and_sick_probs(df_train, T=T_MAX, n_buckets=5):
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])
    for pid, grp in df_sorted.groupby('patient_id'):
        rows = grp.to_dict('records')
        for i, row in enumerate(rows):
            t  = int(row['time'])
            b  = int(row['risk_bucket'])
            lbl= int(row['label'])
            if t < T:
                bucket_counts[t,b]+=1
                sick_counts[t,b]+=lbl
            if i < len(rows)-1:
                nxt = rows[i+1]
                t_next = int(nxt['time'])
                b_next = int(nxt['risk_bucket'])
                if (t_next==t+1) and (t<T-1):
                    transition_counts[t,b,b_next]+=1

    p_trans = np.zeros((T-1,n_buckets,n_buckets))
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_,b_,:].sum()
            if denom>0:
                p_trans[t_,b_,:] = transition_counts[t_,b_,:] / denom
            else:
                p_trans[t_,b_,b_] = 1.0

    p_sick = np.zeros((T,n_buckets))
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_,b_]
            if denom>0:
                p_sick[t_,b_] = sick_counts[t_,b_] / denom
            else:
                p_sick[t_,b_] = 0.0
    return p_trans, p_sick

def train_data_driven_dp(p_trans, p_sick, 
                         FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX):
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    # terminal at t=T
    for b in range(n_buckets):
        cost_treat   = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        cost_notreat = p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    # backward
    for t in reversed(range(T)):
        for b in range(n_buckets):
            cost_treat = p_sick[t,b]*(D*t) + (1-p_sick[t,b])*FP
            if t==T-1:
                cost_wait = gamma*V[T,b]
            else:
                exp_future=0.0
                for b_next in range(n_buckets):
                    exp_future+=p_trans[t,b,b_next]*V[t+1,b_next]
                cost_wait = gamma*exp_future
            if cost_treat<=cost_wait:
                V[t,b]   = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b]   = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_data_driven_dp_policy(V, pi_, T=T_MAX):
    def policy_func(rows):
        for _, row in rows.iterrows():
            t = int(row['time'])
            if t < T:
                b = int(row['risk_bucket'])
                if pi_[t,b]==1:
                    return t
        return None
    return policy_func

###############################################################################
# ALGORITHM 2: "Full Cross-Validation" (FIXED VERSION)
###############################################################################
def run_experiment_algorithm2_fixed(df_all, n_splits=4, seed=42):
    """
    Revised Algorithm 2 that ensures:
      1) We do a full cross-val to find the best (model_type, hyperparams) by average AUC.
      2) Then we retrain that final model on G1..Gn, re-estimate transitions from that
         final model's predictions, and only then produce the final DP policy for G_{n+1}.
      3) We do not pick 'the best fold's DP' but always do a final pass with the chosen ML.
    """
    # 1) Split data => G1..G_{n_splits}, G_{n_splits+1}
    splits = split_patients_kfold(df_all, n_splits=n_splits, seed=seed)
    group_dfs = {}
    for group_name, pidset in splits.items():
        group_dfs[group_name] = filter_by_group(df_all, pidset)

    # We have model_list + param_grid for demonstration
    model_list = ["catboost","rf","gb"]
    param_grid_dict = {
        "catboost":[
            {"iterations":50, "depth":3, "learning_rate":0.1},
            {"iterations":50, "depth":4, "learning_rate":0.05},
        ],
        "rf":[
            {"n_estimators":50, "max_depth":3},
            {"n_estimators":100,"max_depth":5},
        ],
        "gb":[
            {"n_estimators":50, "max_depth":3, "learning_rate":0.1},
            {"n_estimators":100,"max_depth":3, "learning_rate":0.05},
        ],
    }

    # The final test set:
    test_name = f"G{n_splits+1}"
    df_test = group_dfs[test_name].copy()

    # 2) For each candidate (model_type, hyperparams), do an n-fold CV for AUC
    #    We'll store average AUC for each candidate
    candidate_list = []
    for mt in model_list:
        for hp in param_grid_dict[mt]:
            candidate_list.append((mt, hp))

    results_cv = []
    # We'll do standard "full CV" for each candidate => get average AUC
    for (mtype, mparams) in candidate_list:
        auc_vals = []
        for i_val in range(1, n_splits+1):
            val_name = f"G{i_val}"
            train_parts = []
            for j in range(1, n_splits+1):
                if j!=i_val:
                    train_parts.append(group_dfs[f"G{j}"])
            df_train_fold = pd.concat(train_parts, ignore_index=True)
            # Train
            X_train = df_train_fold[FEATURE_COLS]
            y_train = df_train_fold['label']
            model = train_model(mtype, mparams, X_train, y_train)

            # Predict on val
            df_val = group_dfs[val_name]
            X_val = df_val[FEATURE_COLS]
            val_probs = predict_risk(model, X_val)
            if len(np.unique(df_val['label']))<2:
                # edge case => AUC = 0.5
                auc_ = 0.5
            else:
                auc_ = roc_auc_score(df_val['label'], val_probs)
            auc_vals.append(auc_)
        # average across folds
        avg_auc = np.mean(auc_vals)
        results_cv.append({
            "model_type": mtype,
            "hyperparams": mparams,
            "avg_auc": avg_auc
        })

    df_results_cv = pd.DataFrame(results_cv)
    best_idx = df_results_cv["avg_auc"].idxmax()
    best_model_type = df_results_cv.loc[best_idx,"model_type"]
    best_hparams    = df_results_cv.loc[best_idx,"hyperparams"]
    best_avg_auc    = df_results_cv.loc[best_idx,"avg_auc"]

    print(f"[INFO] Best ML: {best_model_type} {best_hparams}, avg AUC={best_avg_auc:.4f}")

    # 3) Retrain that final ML on G1..G_{n_splits}
    train_all = []
    for i in range(1, n_splits+1):
        train_all.append(group_dfs[f"G{i}"])
    df_train_all = pd.concat(train_all, ignore_index=True)
    X_train_all = df_train_all[FEATURE_COLS]
    y_train_all = df_train_all['label']

    final_model = train_model(best_model_type, best_hparams, X_train_all, y_train_all)

    # 4) Produce final risk predictions on:
    #    (A) The training union => to estimate transitions
    df_train_all = df_train_all.copy()
    train_probs = predict_risk(final_model, df_train_all[FEATURE_COLS])
    df_train_all["predicted_risk"] = train_probs
    df_train_all["risk_bucket"]    = df_train_all["predicted_risk"].apply(assign_buckets)

    p_trans, p_sick = estimate_transition_and_sick_probs(df_train_all, T=T_MAX, n_buckets=5)
    V, pi_ = train_data_driven_dp(p_trans, p_sick,
                                  FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX)
    dp_policy_func = make_data_driven_dp_policy(V, pi_, T=T_MAX)

    #    (B) The final holdout => for cost evaluation
    df_test = df_test.copy()
    test_probs = predict_risk(final_model, df_test[FEATURE_COLS])
    df_test["predicted_risk"] = test_probs

    # 5) Evaluate benchmark thresholds on df_test
    thr_c, stats_c        = constant_threshold_search(df_test)
    thr_vec, stats_dyn    = dynamic_threshold_random_search(df_test, seed=999)
    (A_lin, B_lin), stats_lin = linear_threshold_search(df_test)
    thr_wte, stats_wte    = wait_till_end_search(df_test)

    # 6) Evaluate final DP on df_test
    df_test["risk_bucket"] = df_test["predicted_risk"].apply(assign_buckets)
    stats_dp = simulate_policy(df_test, dp_policy_func)

    # Build final results table
    final_table = pd.DataFrame({
        "Method": [
            "Constant Threshold",
            "Dynamic Threshold-R",
            "Linear Threshold",
            "Wait Till End",
            "Dynamic Threshold-DP"
        ],
        "Precision (%)": [
            100*stats_c['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        "Cost": [
            stats_c['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        "Recall (%)": [
            100*stats_c['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        "Treatment Time": [
            stats_c['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })

    return final_table, df_results_cv

###############################################################################
# MAIN: MULTIPLE REPLICATIONS
###############################################################################
def main():
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    # If needed, filter to time < T_MAX:
    # df_all = df_all[df_all['time'] < T_MAX].copy()

    N_REPS = 30
    method_list = [
        "Constant Threshold",
        "Dynamic Threshold-R",
        "Linear Threshold",
        "Wait Till End",
        "Dynamic Threshold-DP"
    ]

    # Collect stats for each replication
    all_results = {m: {
        "precision": [], "cost": [], "recall": [], "time": []
    } for m in method_list}

    for rep in range(N_REPS):
        seed_val = 100 + rep
        print(f"\n=== RUN {rep+1}/{N_REPS}, seed={seed_val} ===")
        final_table, df_cv_details = run_experiment_algorithm2_fixed(
            df_all, n_splits=4, seed=seed_val
        )
        for idx, row in final_table.iterrows():
            meth = row["Method"]
            all_results[meth]["precision"].append(row["Precision (%)"])
            all_results[meth]["cost"].append(row["Cost"])
            all_results[meth]["recall"].append(row["Recall (%)"])
            all_results[meth]["time"].append(row["Treatment Time"])

    # Summarize
    summary_rows = []
    for m in method_list:
        prec_arr = np.array(all_results[m]["precision"])
        cost_arr = np.array(all_results[m]["cost"])
        rec_arr  = np.array(all_results[m]["recall"])
        time_arr = np.array(all_results[m]["time"])

        prec_str = f"{prec_arr.mean():.2f} ± {prec_arr.std():.2f}"
        cost_str = f"{cost_arr.mean():.2f} ± {cost_arr.std():.2f}"
        rec_str  = f"{rec_arr.mean():.2f} ± {rec_arr.std():.2f}"
        time_str = f"{time_arr.mean():.2f} ± {time_arr.std():.2f}"

        summary_rows.append({
            "Method": m,
            "Precision (%)": prec_str,
            "Cost": cost_str,
            "Recall (%)": rec_str,
            "Treatment Time": time_str
        })

    df_summary = pd.DataFrame(summary_rows)
    print("\n=== FINAL RESULTS (Mean ± Std Dev over 30 Replications, Fixed Alg 2) ===")
    print(df_summary.to_string(index=False))

if __name__=="__main__":
    main()


=== RUN 1/30, seed=100 ===
[INFO] Best ML: gb {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05}, avg AUC=0.9287

=== RUN 2/30, seed=101 ===
[INFO] Best ML: gb {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.1}, avg AUC=0.9238

=== RUN 3/30, seed=102 ===
[INFO] Best ML: gb {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.1}, avg AUC=0.9270

=== RUN 4/30, seed=103 ===
[INFO] Best ML: catboost {'iterations': 50, 'depth': 3, 'learning_rate': 0.1}, avg AUC=0.9293

=== RUN 5/30, seed=104 ===
[INFO] Best ML: gb {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05}, avg AUC=0.9247

=== RUN 6/30, seed=105 ===
[INFO] Best ML: gb {'n_estimators': 100, 'max_depth': 3, 'learning_rate': 0.05}, avg AUC=0.9276

=== RUN 7/30, seed=106 ===
[INFO] Best ML: catboost {'iterations': 50, 'depth': 3, 'learning_rate': 0.1}, avg AUC=0.9280

=== RUN 8/30, seed=107 ===
[INFO] Best ML: catboost {'iterations': 50, 'depth': 3, 'learning_rate': 0.1}, avg AUC=0.9225

=== RUN 9/30, see