In [8]:
#algorithm 0
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

# Sklearn models, metrics, etc.
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import ParameterGrid
# CatBoost
from catboost import CatBoostClassifier

###############################################################################
# 1. GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
T_MAX   = 21   # maximum discrete time steps (0..T_MAX-1)
GAMMA_CANDIDATES = [0.95, 0.99]  # Example DP discount factors to try

# For demonstration, we'll use a small hyperparameter grid for each ML model.
RF_PARAM_GRID = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5]
}
GB_PARAM_GRID = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1],
    'max_depth': [3, 5]
}
CATBOOST_PARAM_GRID = {
    'iterations': [50, 100],
    'learning_rate': [0.05, 0.1],
    'depth': [3, 5]
}

###############################################################################
# 2. HELPER FUNCTIONS
###############################################################################
def split_into_four_groups(df, seed=0):
    """
    Shuffle patient IDs and split ~evenly into four groups: G1, G2, G3, G4.
    Used for Algorithm 0 (Standard Validation).
    """
    rng = np.random.RandomState(seed)
    unique_pids = df['patient_id'].unique()
    rng.shuffle(unique_pids)
    
    n = len(unique_pids)
    i1 = int(0.25 * n)
    i2 = int(0.50 * n)
    i3 = int(0.75 * n)
    
    G1_pids = unique_pids[: i1]
    G2_pids = unique_pids[i1 : i2]
    G3_pids = unique_pids[i2 : i3]
    G4_pids = unique_pids[i3 : ]
    
    G1 = df[df['patient_id'].isin(G1_pids)].copy()
    G2 = df[df['patient_id'].isin(G2_pids)].copy()
    G3 = df[df['patient_id'].isin(G3_pids)].copy()
    G4 = df[df['patient_id'].isin(G4_pids)].copy()
    
    return G1, G2, G3, G4

def filter_by_group(df, pid_set):
    return df[df['patient_id'].isin(pid_set)].copy()

def compute_auc_score(y_true, y_prob):
    """Compute AUC safely. If only one class, return 0.5."""
    if len(np.unique(y_true)) < 2:
        return 0.5
    return roc_auc_score(y_true, y_prob)

def train_and_select_best_model(X_train, y_train, X_val, y_val):
    """
    Trains multiple models (RandomForest, GB, CatBoost)
    over small hyperparam grids, picks best by AUC.
    
    Returns: (best_model, best_auc, best_model_name)
    """
    best_auc = -1.0
    best_model = None
    best_name  = None
    
    # 1) RandomForest
    for params in ParameterGrid(RF_PARAM_GRID):
        rf = RandomForestClassifier(random_state=0, **params)
        rf.fit(X_train, y_train)
        val_prob = rf.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc   = auc_val
            best_model = rf
            best_name  = f"RandomForest_{params}"
    
    # 2) GradientBoosting
    for params in ParameterGrid(GB_PARAM_GRID):
        gb = GradientBoostingClassifier(random_state=0, **params)
        gb.fit(X_train, y_train)
        val_prob = gb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc   = auc_val
            best_model = gb
            best_name  = f"GradientBoosting_{params}"
    
    # 3) CatBoost
    for params in ParameterGrid(CATBOOST_PARAM_GRID):
        cb = CatBoostClassifier(verbose=0, random_state=0, **params)
        cb.fit(X_train, y_train)
        val_prob = cb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc   = auc_val
            best_model = cb
            best_name  = f"CatBoost_{params}"
    
    return best_model, best_auc, best_name

###############################################################################
# 3. SIMULATE POLICY (Unconstrained)
###############################################################################
def simulate_policy(df, policy_func):
    """
    df must contain:
      - patient_id
      - time
      - risk_score
      - label (0 or 1)
    policy_func(patient_rows) -> treat_time (int) or None
    
    Return dict of cost, precision, recall, avg_treatment_time
    """
    results = []
    
    for pid, grp in df.groupby('patient_id'):
        grp = grp.sort_values('time')
        label = grp['label'].iloc[0]
        
        treat_time = policy_func(grp)
        
        if treat_time is None:
            # never treated
            if label == 1:
                cost = FN_COST
                tp   = 0
            else:
                cost = 0
                tp   = 0
            fp = 0
            treat_flag = 0
            ttime = None
        else:
            treat_flag = 1
            if label == 1:
                # cost = D * treat_time
                cost = D_COST * treat_time
                tp   = 1
                fp   = 0
            else:
                cost = FP_COST
                tp   = 0
                fp   = 1
            ttime = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treat_flag,
            'treat_time': ttime,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res   = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated']==1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    if len(treated_df)>0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df = df_res[df_res['label']==1]
    total_sick = len(sick_df)
    if total_sick>0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0
    
    if len(treated_df)>0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt   = valid_tt.mean() if len(valid_tt)>0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 4. BENCHMARK THRESHOLD-BASED POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            # treat at first time we see risk_score >= thr
            for _, row in patient_rows.iterrows():
                if row['risk_score'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    return best_thr, best_stats

def make_constant_threshold_policy(thr):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            if row['risk_score'] >= thr:
                return int(row['time'])
        return None
    return policy_func

def dynamic_threshold_random_search(df,
                                    time_steps=20,
                                    threshold_candidates=[0.0,0.2,0.4,0.6,0.8,1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost= float('inf')
    best_stats=None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['risk_score'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec  = thr_vec.copy()
            best_stats= stats
    return best_vec, best_stats

def make_dynamic_threshold_policy(thr_vec):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < len(thr_vec):
                if row['risk_score'] >= thr_vec[t]:
                    return t
        return None
    return policy_func

def linear_threshold_search(df,
                            A_candidates=np.linspace(-0.05, 0.01, 7),
                            B_candidates=np.linspace(0,0.6,2)):
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A*t + B
                    thr = np.clip(thr,0,1)
                    if row['risk_score'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A    = A
                best_B    = B
                best_stats= stats
    return (best_A,best_B), best_stats

def make_linear_threshold_policy(A,B):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = row['time']
            thr = A*t + B
            thr = np.clip(thr,0,1)
            if row['risk_score'] >= thr:
                return int(t)
        return None
    return policy_func

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            final_t = patient_rows['time'].max()
            final_row = patient_rows[patient_rows['time']==final_t].iloc[0]
            if final_row['risk_score'] >= thr:
                return int(final_t)
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    return best_thr, best_stats

def make_wait_till_end_policy(thr):
    def policy_func(patient_rows):
        final_t = patient_rows['time'].max()
        final_row = patient_rows[patient_rows['time']==final_t].iloc[0]
        if final_row['risk_score'] >= thr:
            return int(final_t)
        return None
    return policy_func


###############################################################################
# 5. DATA-DRIVEN DP (UNCONSTRAINED)
###############################################################################
def to_bucket(prob):
    """Simple function to map prob into a 5-bucket scale [0..4]."""
    b = int(prob * 5)
    return min(b, 4)

def estimate_transition_and_sick_probs(df_train, T=20, n_buckets=5):
    """
    p_trans[t,b,b_next], p_sick[t,b]
    df_train has columns: patient_id, time, risk_bucket, label
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)
    
    df_sorted = df_train.sort_values(['patient_id','time'])
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows= grp.to_dict('records')
        
        for i, row in enumerate(rows):
            t = int(row['time'])
            b = int(row['risk_bucket'])
            lbl = row['label']
            
            if t < T:
                bucket_counts[t,b] += 1
                sick_counts[t,b]   += lbl
            
            if i < len(rows)-1:
                nxt = rows[i+1]
                t_next = nxt['time']
                b_next = nxt['risk_bucket']
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t,b,b_next] += 1
    
    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_,b_,:].sum()
            if denom>0:
                p_trans[t_,b_,:] = transition_counts[t_,b_,:] / denom
            else:
                p_trans[t_,b_,b_] = 1.0
    
    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_,b_]
            if denom>0:
                p_sick[t_,b_] = sick_counts[t_,b_] / denom
            else:
                p_sick[t_,b_] = 0.0
    return p_trans, p_sick

def train_data_driven_dp_unconstrained(p_trans, p_sick, 
                                       FP=10, FN=50, D=1, gamma=0.99, T=20):
    """
    Standard DP for unconstrained scenario:
      V[t,b] = min( cost_treat_now, cost_wait )
    """
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    # boundary at t=T
    for b in range(n_buckets):
        cost_treat   = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        cost_notreat = p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    for t in reversed(range(T)):
        for b in range(n_buckets):
            # treat now
            cost_treat = p_sick[t,b]*(D*t) + (1-p_sick[t,b])*FP
            # wait
            if t == T-1:
                cost_wait = gamma * V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next]*V[t+1,b_next]
                cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b]   = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b]   = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_dp_policy(V, pi_, T=20):
    """Return a policy function that treats if pi[t,b]==1."""
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            b = int(row['risk_bucket'])
            if t < T:
                if pi_[t,b] == 1:
                    return t
        return None
    return policy_func

###############################################################################
# 6. ALGORITHM 0 (STANDARD VALIDATION)
###############################################################################
def run_algorithm0_unconstrained(df_all, seed=0):
    """
    1) Split df_all -> G1, G2, G3, G4
    2) ML hyperparam search on (G1->G2)
    3) Retrain best ML on G1+G2
    4) DP hyperparam search on G3
    5) Final evaluation on G4
    """
    # Step 1: Split
    G1, G2, G3, G4 = split_into_four_groups(df_all, seed=seed)
    print(f"G1: {G1['patient_id'].nunique()} patients | G2: {G2['patient_id'].nunique()} | "
          f"G3: {G3['patient_id'].nunique()} | G4: {G4['patient_id'].nunique()}")
    
    # Step 2: ML hyperparam search on (G1->G2)
    X_train = G1[['EIT','NIRS','EIS']].values
    y_train = G1['label'].values
    
    X_val   = G2[['EIT','NIRS','EIS']].values
    y_val   = G2['label'].values
    
    best_model, best_auc, best_name = train_and_select_best_model(
        X_train, y_train, X_val, y_val
    )
    print(f"Best ML model on (G1->G2): {best_name}, AUC={best_auc:.4f}")
    
    # Step 3: Retrain best ML on G1+G2
    G12 = pd.concat([G1, G2], ignore_index=True)
    X_12 = G12[['EIT','NIRS','EIS']].values
    y_12 = G12['label'].values
    # We'll do the same approach => train_and_select_best_model on itself 
    #   or just fit best_model to G1+G2 
    #   (for simplicity, we directly refit best_model).
    
    best_model.fit(X_12, y_12)
    
    # Step 4: DP hyperparam search on G3 => produce risk scores from final ML
    G3 = G3.copy()
    X_3 = G3[['EIT','NIRS','EIS']].values
    prob_3 = best_model.predict_proba(X_3)[:,1]
    G3['risk_score'] = prob_3
    
    # We also need transitions from G12 => so produce risk_score for G12
    G12 = G12.copy()
    prob_12 = best_model.predict_proba(G12[['EIT','NIRS','EIS']])[:,1]
    G12['risk_score'] = prob_12
    
    # For each candidate gamma => train DP => evaluate cost on G3 => pick best gamma
    best_gamma = None
    best_cost_dp = float('inf')
    best_V = None
    best_pi= None
    
    # Bucket the training data for DP
    G12['risk_bucket'] = G12['risk_score'].apply(to_bucket)
    
    for gamma_ in GAMMA_CANDIDATES:
        # estimate transitions
        p_trans, p_sick = estimate_transition_and_sick_probs(G12, T=T_MAX, n_buckets=5)
        V_temp, pi_temp = train_data_driven_dp_unconstrained(
            p_trans, p_sick, FP=FP_COST, FN=FN_COST,
            D=D_COST, gamma=gamma_, T=T_MAX
        )
        # Evaluate on G3
        #   also bucket G3
        G3_temp = G3.copy()
        G3_temp['risk_bucket'] = G3_temp['risk_score'].apply(to_bucket)
        
        dp_policy_func = make_dp_policy(V_temp, pi_temp, T=T_MAX)
        stats_dp = simulate_policy(G3_temp, dp_policy_func)
        
        if stats_dp['cost'] < best_cost_dp:
            best_cost_dp = stats_dp['cost']
            best_gamma   = gamma_
            best_V = V_temp
            best_pi= pi_temp
    
    print(f"Best DP gamma on G3 = {best_gamma}, cost={best_cost_dp:.2f}")
    
    # Step 5: Evaluate on G4
    G4 = G4.copy()
    prob_4 = best_model.predict_proba(G4[['EIT','NIRS','EIS']])[:,1]
    G4['risk_score'] = prob_4
    
    # (A) threshold-based policies
    thr_const, stats_const = constant_threshold_search(G4)
    thr_vec, stats_dyn     = dynamic_threshold_random_search(G4, time_steps=T_MAX)
    (A_lin,B_lin), stats_lin = linear_threshold_search(G4)
    thr_wte, stats_wte     = wait_till_end_search(G4)
    
    # (B) final DP policy using best_gamma => we already have best_V, best_pi
    # re-check transitions from G12 if needed, but we already found them
    # we just build final policy:
    dp_policy_final = make_dp_policy(best_V, best_pi, T=T_MAX)
    
    G4_dp = G4.copy()
    G4_dp['risk_bucket'] = G4_dp['risk_score'].apply(to_bucket)
    stats_dp = simulate_policy(G4_dp, dp_policy_final)
    
    # Build final table
    table = pd.DataFrame({
        'Method': [
            'Constant Threshold',
            'Dynamic Threshold-R',
            'Linear Threshold',
            'Wait Till End',
            f'Dynamic Threshold-DP (gamma={best_gamma})'
        ],
        'Precision (%)': [
            100*stats_const['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        'Cost': [
            stats_const['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        'Recall (%)': [
            100*stats_const['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        'Treatment Time': [
            stats_const['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })
    
    return table

###############################################################################
# 7. RUN MULTIPLE REPLICATIONS
###############################################################################
def run_multiple_replications(df_all, n_replications=30):
    """
    Run Algorithm 0 multiple times with different random seeds.
    Compute mean and standard deviation for each metric.
    """
    # Define standard method names for consistent reporting
    standard_methods = [
        'Constant Threshold',
        'Dynamic Threshold-R',
        'Linear Threshold',
        'Wait Till End',
        'Dynamic Threshold-DP'
    ]
    
    # Initialize containers for each metric and method
    precision_values = {method: [] for method in standard_methods}
    cost_values = {method: [] for method in standard_methods}
    recall_values = {method: [] for method in standard_methods}
    treatment_time_values = {method: [] for method in standard_methods}
    
    for i in range(n_replications):
        seed = i  # Use a different seed for each replication
        print(f"\nRunning replication {i+1}/{n_replications} with seed={seed}")
        
        # Run algorithm with current seed
        table = run_algorithm0_unconstrained(df_all, seed=seed)
        
        # Extract values for each method
        for _, row in table.iterrows():
            method = row['Method']
            
            # Standardize method name (remove gamma value from DP method)
            standard_method = method
            if 'Dynamic Threshold-DP' in method:
                standard_method = 'Dynamic Threshold-DP'
            
            if standard_method in standard_methods:
                precision_values[standard_method].append(row['Precision (%)'])
                cost_values[standard_method].append(row['Cost'])
                recall_values[standard_method].append(row['Recall (%)'])
                treatment_time_values[standard_method].append(row['Treatment Time'])
    
    # Compute statistics
    final_data = []
    for method in standard_methods:
        if precision_values[method]:  # Check if we have data for this method
            precision_mean = np.mean(precision_values[method])
            precision_std = np.std(precision_values[method])
            cost_mean = np.mean(cost_values[method])
            cost_std = np.std(cost_values[method])
            recall_mean = np.mean(recall_values[method])
            recall_std = np.std(recall_values[method])
            treat_time_mean = np.mean(treatment_time_values[method])
            treat_time_std = np.std(treatment_time_values[method])
            
            final_data.append({
                'Method': method,
                'Precision (%)': f"{precision_mean:.2f} ± {precision_std:.2f}",
                'Cost': f"{cost_mean:.2f} ± {cost_std:.2f}",
                'Recall (%)': f"{recall_mean:.2f} ± {recall_std:.2f}",
                'Treatment Time': f"{treat_time_mean:.2f} ± {treat_time_std:.2f}"
            })
    
    return pd.DataFrame(final_data)

###############################################################################
# 8. MAIN
###############################################################################
def main():
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    
    # If needed, filter df_all to time < T_MAX:
    df_all = df_all[df_all['time'] < T_MAX].copy()
    
    # Check required columns:
    required = {'patient_id','time','EIT','NIRS','EIS','label'}
    if not required.issubset(df_all.columns):
        raise ValueError(f"Your CSV must have columns at least: {required}. Found: {df_all.columns}")
    
    # Run Algorithm 0 multiple times
    final_results = run_multiple_replications(df_all, n_replications=30)
    
    print("\n=== FINAL RESULTS (Mean ± Std Dev over 30 Replications, Test on G4) ===")
    print(final_results.to_string(index=False))

if __name__ == "__main__":
    main()


####algorithm 1

import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

# Sklearn models, metrics, etc.
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import ParameterGrid
# CatBoost
from catboost import CatBoostClassifier

###############################################################################
# 1. GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
GAMMA   = 0.99
T_MAX   = 21   # maximum discrete time steps (0..T_MAX-1)


RF_PARAM_GRID = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5]
}
GB_PARAM_GRID = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1],
    'max_depth': [3, 5]
}
CATBOOST_PARAM_GRID = {
    'iterations': [50, 100],
    'learning_rate': [0.05, 0.1],
    'depth': [3, 5]
}

###############################################################################
# 2. HELPER FUNCTIONS: splitting, ML training, DP, etc.
###############################################################################
def make_folds(df, n_folds=5, seed=0):
    """
    Semi Cross-Validation approach:
      - We'll produce n_folds separate sets: G1, G2, ..., G_{n_folds}.
      - We'll treat the last fold G_{n_folds} as the final holdout test.
      - The first (n_folds - 1) folds are used in the "semi cross-val" loops.
    """
    rng = np.random.RandomState(seed)
    
    # We'll shuffle patient IDs, then chunk them into n_folds groups
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    folds = []
    fold_size = int(np.ceil(len(unique_pts) / n_folds))
    
    start_idx = 0
    for k in range(n_folds):
        end_idx = min(start_idx + fold_size, len(unique_pts))
        fold_pids = unique_pts[start_idx:end_idx]
        folds.append(set(fold_pids))
        start_idx = end_idx
    
    return folds

def filter_by_group(df, pid_set):
    """Returns the subset of df whose patient_id is in pid_set."""
    return df[df['patient_id'].isin(pid_set)].copy()

def compute_auc_score(y_true, y_prob):
    """
    Safe AUC computation. If all y_true are the same class,
    AUC is not well-defined, so we'll return 0.5 by default.
    """
    if len(np.unique(y_true)) < 2:
        return 0.5
    return roc_auc_score(y_true, y_prob)

def train_and_select_best_model(X_train, y_train, X_val, y_val):
    """
    Trains multiple models (RandomForest, GradientBoosting, CatBoost)
    over small hyperparameter grids, picks the best by AUC on (X_val,y_val).
    
    Returns:
        best_model   (fitted model with best AUC)
        best_auc     (float)
        best_model_name (str, e.g. "RandomForest")
    """
    best_auc = -1.0
    best_model = None
    best_name  = None
    
    # 1) RandomForest
    for params in ParameterGrid(RF_PARAM_GRID):
        rf = RandomForestClassifier(random_state=0, **params)
        rf.fit(X_train, y_train)
        val_prob = rf.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = rf
            best_name  = f"RandomForest_{params}"
    
    # 2) GradientBoosting
    for params in ParameterGrid(GB_PARAM_GRID):
        gb = GradientBoostingClassifier(random_state=0, **params)
        gb.fit(X_train, y_train)
        val_prob = gb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = gb
            best_name  = f"GradientBoosting_{params}"
    
    # 3) CatBoost
    for params in ParameterGrid(CATBOOST_PARAM_GRID):
        # silent mode
        cb = CatBoostClassifier(verbose=0, random_state=0, **params)
        cb.fit(X_train, y_train, eval_set=(X_val,y_val), verbose=0)
        val_prob = cb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = cb
            best_name  = f"CatBoost_{params}"
    
    return best_model, best_auc, best_name


###############################################################################
# 3. POLICY SIMULATION (Unconstrained) 
###############################################################################
def simulate_policy(df, policy_func):
    """
    Evaluate total cost, precision, recall, avg_treatment_time under a 
    given policy_func. The policy_func is a function taking 
       policy_func(subDF_of_single_patient) -> treat_time or None
    """
    results = []
    
    for pid, patient_rows in df.groupby('patient_id'):
        patient_rows = patient_rows.sort_values('time')
        
        label = patient_rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(patient_rows)
        
        if treat_time is None:
            # never treated
            if label == 1:
                cost = FN_COST
                tp   = 0
            else:
                cost = 0
                tp   = 0
            fp = 0
            treated_flag = 0
            tt = None
        else:
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time  # delay cost
                tp = 1
                fp = 0
            else:
                cost = FP_COST
                tp = 0
                fp = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df = df_res[df_res['label'] == 1]
    total_sick = len(sick_df)
    if total_sick > 0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 4. BENCHMARK POLICIES (Threshold-based)
###############################################################################
def constant_threshold_search(df, thresholds=None):
    """Grid search over a set of constant thresholds."""
    if thresholds is None:
        thresholds = np.linspace(0, 1, 21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                if row['risk_score'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats
    
    return best_thr, best_stats

def make_constant_threshold_policy(thr):
    """Creates a policy that treats the patient at the first time whose risk_score >= thr."""
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            if row['risk_score'] >= thr:
                return int(row['time'])
        return None
    return policy_func

def dynamic_threshold_random_search(df,
                                    time_steps=20,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=200,
                                    seed=0):
    """
    Randomly sample vectors of length time_steps from threshold_candidates,
    pick the best by cost on df.
    """
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost = float('inf')
    best_stats = None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['risk_score'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec = thr_vec.copy()
            best_stats = stats
    
    return best_vec, best_stats

def make_dynamic_threshold_policy(thr_vec):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < len(thr_vec):
                if row['risk_score'] >= thr_vec[t]:
                    return t
        return None
    return policy_func

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    """
    threshold(t) = clamp( A*t + B, [0,1] )
    do grid search
    """
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 11)
    if B_candidates is None:
        B_candidates = np.linspace(0, 1, 11)
    
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A*t + B
                    thr = max(0, min(1, thr))
                    if row['risk_score'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A = A
                best_B = B
                best_stats = stats
    
    return (best_A, best_B), best_stats

def make_linear_threshold_policy(A, B):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = row['time']
            thr = A*t + B
            thr = max(0, min(1, thr))
            if row['risk_score'] >= thr:
                return int(t)
        return None
    return policy_func

def wait_till_end_search(df, thresholds=None):
    """
    Evaluate policy: treat only at final time if risk_score >= thr.
    """
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            # final row:
            final_t = patient_rows['time'].max()
            final_row = patient_rows[patient_rows['time']==final_t].iloc[0]
            if final_row['risk_score'] >= thr:
                return int(final_t)
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats
    
    return best_thr, best_stats

def make_wait_till_end_policy(thr):
    def policy_func(patient_rows):
        final_t = patient_rows['time'].max()
        final_row = patient_rows[patient_rows['time']==final_t].iloc[0]
        if final_row['risk_score'] >= thr:
            return int(final_t)
        return None
    return policy_func

###############################################################################
# 5. DATA-DRIVEN DP (Unconstrained)
###############################################################################
def estimate_transition_and_sick_probs(df_train, T=20, n_buckets=5):
    """
    We'll estimate:
      p_trans[t, b, b_next]: Probability that a patient in risk-bucket b at time t
                             transitions to bucket b_next at time t+1
      p_sick[t, b]: Probability that a patient is sick given that they are in bucket b at time t.
    This is a naive aggregator (assuming Markov wrt bucket).
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])
    
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows = grp.to_dict('records')
        
        for i in range(len(rows)):
            t  = int(rows[i]['time'])
            b  = int(rows[i]['risk_bucket'])
            lb = int(rows[i]['label'])  # 0 or 1
            if t < T:
                bucket_counts[t, b] += 1
                sick_counts[t, b]   += lb
            
            if i < len(rows) - 1:
                # consider the next row if it's exactly t+1
                t_next = int(rows[i+1]['time'])
                b_next = int(rows[i+1]['risk_bucket'])
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t, b, b_next] += 1.0

    # p_trans
    n_buckets = bucket_counts.shape[1]
    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                # if no data, fallback to identity
                p_trans[t_, b_, b_] = 1.0
    
    # p_sick
    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    
    return p_trans, p_sick

def train_data_driven_dp_unconstrained(p_trans, p_sick, 
                                       FP=10, FN=50, D=1, gamma=0.99, T=20):
    """
    We define states as (t, bucket), and actions: 0=wait, 1=treat now.
    We'll do a simple backward recursion:
       V[t,b] = min( cost_of_treat_now, cost_of_wait )
    cost_of_treat_now = p_sick[t,b]* (D*t) + (1-p_sick[t,b])* FP
    cost_of_wait      = gamma * E_{b_next}[ V[t+1, b_next] ]
    At t=T, we define cost if not treated:
       => p_sick[T-1,b]*FN  vs. cost_of_treat_now at T-1
    We'll store the policy in pi_[t,b].
    """
    n_buckets = p_sick.shape[1]
    # Note: We'll define V[t,b] for t in [0..T], b in [0..n_buckets-1].
    # But we actually only have transitions up to T-1 in p_trans.
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    # boundary at t=T: if we haven't treated yet, the cost is:
    # min( treat at T, not treat at all ).
    # But let's define it simply as "if not treat => FN" or "if treat => cost_treatNow".
    for b in range(n_buckets):
        # "treat now at time T" => D*T?? but actually t goes up to T-1. 
        # We'll define an effective "t = T" as if it's the final step.
        # so cost_treat = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        # cost_notreat  = p_sick[T-1,b]*FN
        # We'll just do that here:
        cost_treat  = p_sick[T-1,b]*(D*(T-1)) + (1 - p_sick[T-1,b])*FP
        cost_notreat= p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    # now go backward:
    for t in reversed(range(T)):
        for b in range(n_buckets):
            # cost if treat now
            cost_treat = p_sick[t,b]*(D*t) + (1 - p_sick[t,b])*FP
            
            # cost if wait
            if t == T-1:
                # if wait at T-1, next is T => no transitions => V[T,b]
                cost_wait = gamma * V[T,b]
            else:
                # compute expected cost from next state
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next]*V[t+1,b_next]
                cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b] = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b] = cost_wait
                pi_[t,b] = 0
    
    return V, pi_

def make_data_driven_dp_policy_unconstrained(V, pi_, T=20):
    """
    Creates a function that iterates over time steps of a patient.
    As soon as DP says "treat" at (t,b), we do so and stop.
    """
    def policy_func(patient_rows):
        # naive approach: read each row in chronological order
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            b = int(row['risk_bucket'])
            if t < T:
                if pi_[t,b] == 1:
                    return t
        # if we never treat => None
        return None
    return policy_func

###############################################################################
# 6. ALGORITHM 1: SEMI CROSS-VALIDATION (Unconstrained)
###############################################################################
def semi_crossval_unconstrained(df_all, n_folds=5, seed=0):
    """
    Implements the "semi cross-validation" approach for ML + DP 
    in the unconstrained scenario (Algorithm 1).
    
    Steps (schematic):
      1) Create n_folds. 
         Let G_{n_folds} be final holdout. G_1..G_{n_folds-1} for "semi-CV".
      2) For j in [1..(n_folds-1)]:
          - Validation fold = G_j
          - Training fold = union of G_k for k != j
          - Among that "training fold," we do an (n_folds-2)-fold approach 
            to select best ML hyperparams (AUC).
            (In a simpler "semi" approach, we might skip an inner fold and just train ML on train set.)
          - Evaluate DP hyperparams on G_j, store best result.
      3) Aggregate or pick final hyperparams from these folds.
      4) Retrain ML + DP on union G_1..G_{n_folds-1}, evaluate on G_{n_folds}.

    For brevity, we do a simpler version:
      - For each j in 0..(n_folds-2):
         * Train ML on (all except G_j),
         * Evaluate best threshold or best DP on G_j
      - Then average or pick the median. 
      - Finally, evaluate on G_{n_folds-1} as holdout.
    """
    folds = make_folds(df_all, n_folds=n_folds, seed=seed)
    # final test:
    test_fold_pid = folds[-1]
    # the first n_folds-1 are the "CV folds"
    cv_folds = folds[:-1]
    
    # Lists to store benchmark hyperparams found in each fold j
    best_thr_const_list = []
    best_dyn_vec_list   = []
    best_linAB_list     = []
    best_thr_wait_list  = []
    best_dp_policies    = []
    
    for j, val_pid in enumerate(cv_folds):
        # Validation fold j => G_j
        df_val   = filter_by_group(df_all, val_pid)
        
        # Training = union of all other folds except j
        train_pid = set()
        for k, fold_pids in enumerate(cv_folds):
            if k != j:
                train_pid = train_pid.union(fold_pids)
        df_train = filter_by_group(df_all, train_pid)
        
        # ============== (A) Train ML model on df_train => pick best by AUC on the same df_train ==============
     
        X_train = df_train[['EIT','NIRS','EIS']].values
        y_train = df_train['label'].values
        
        # Just do a train/val = we can do a small split inside df_train, or 
        # let's do the entire df_train for training and the same df_train for ML selection 
        # (not ideal, but simpler).
        X_val = df_train[['EIT','NIRS','EIS']].values
        y_val = df_train['label'].values
        
        ml_model, best_auc, best_mname = train_and_select_best_model(X_train, y_train, X_val, y_val)
        
        # Now, we apply this trained model to produce a risk_score for ALL ROWS in df_train+df_val
        # So we can do threshold tuning, DP, etc.
        # We'll store them back in the main df so we can do the policy searches.
        # But be careful not to pollute folds with each other => for demonstration, it's simpler 
        # to do it just for df_val "on the fly" for cost evaluation.

        # (B) Evaluate on VAL fold => get risk scores
        X_val_fold = df_val[['EIT','NIRS','EIS']].values
        val_probs  = ml_model.predict_proba(X_val_fold)[:,1]
        df_val.loc[:,'risk_score'] = val_probs  # set the model-based risk

        # We do the same for df_train because we need to estimate Markov transitions for the DP
        X_train_fold = df_train[['EIT','NIRS','EIS']].values
        train_probs  = ml_model.predict_proba(X_train_fold)[:,1]
        df_train.loc[:,'risk_score'] = train_probs

        # Also discretize into risk buckets again, e.g. 5 equally sized:
        # We'll do a simple approach:  (0,0.2)->0, [0.2,0.4)->1, ...
        def to_bucket(p):
            return min(int(p*5), 4)
        df_train.loc[:,'risk_bucket'] = df_train['risk_score'].apply(to_bucket)
        df_val.loc[:,'risk_bucket']   = df_val['risk_score'].apply(to_bucket)

        # ============= (C) Benchmark Policies on VAL fold =============
        #  (C.1) Constant threshold
        thr_c, _ = constant_threshold_search(df_train)  # or do it on df_train
        best_thr_const_list.append(thr_c)
        
        #  (C.2) Dynamic threshold
        thr_vec, _ = dynamic_threshold_random_search(df_train, 
                                                     time_steps=T_MAX,
                                                     threshold_candidates=[0,0.2,0.4,0.6,0.8,1.0],
                                                     n_samples=200,
                                                     seed=j)
        best_dyn_vec_list.append(thr_vec)
        
        #  (C.3) Linear threshold
        (A,B), _ = linear_threshold_search(df_train)
        best_linAB_list.append((A,B))
        
        #  (C.4) Wait till end
        thr_wte, _ = wait_till_end_search(df_train)
        best_thr_wait_list.append(thr_wte)
        
        # ============= (D) DP Approach =============
        # We'll fit the Markov chain from df_train => p_trans, p_sick
        p_trans, p_sick = estimate_transition_and_sick_probs(df_train, T=T_MAX, n_buckets=5)
        V, pi_ = train_data_driven_dp_unconstrained(p_trans, p_sick, 
                                                    FP=FP_COST, FN=FN_COST, 
                                                    D=D_COST, gamma=GAMMA, T=T_MAX)
        best_dp_policies.append((V, pi_))
        
    
        
        # End of fold j

    # ----- (E) Combine or pick final hyperparams from these folds ------
    # For demonstration, let's pick the average or the median from the sets we found:

    thr_const_final = np.mean(best_thr_const_list)
    
    mid_idx = len(best_dyn_vec_list)//2
    thr_dyn_final = best_dyn_vec_list[mid_idx]  # pick the "middle" one
    
    A_ave = np.mean([ab[0] for ab in best_linAB_list])
    B_ave = np.mean([ab[1] for ab in best_linAB_list])
    
    thr_wait_final = np.mean(best_thr_wait_list)
    
    # For DP, let's pick the last fold's (V, pi_). 
    # Or we could store them all and pick the one with minimal val cost. 
    # We'll just pick the last for demonstration:
    V_final, pi_final = best_dp_policies[-1]
    
    # ========== (F) Retrain ML model on all CV folds except test fold => final model ==========
    train_pid_all = set()
    for fold_pid in cv_folds:
        train_pid_all = train_pid_all.union(fold_pid)
    df_train_cv = filter_by_group(df_all, train_pid_all)
    
    X_train_cv = df_train_cv[['EIT','NIRS','EIS']].values
    y_train_cv = df_train_cv['label'].values
    
    # We'll do the same "train_and_select_best_model" approach 
    # but we have no separate val set, so we'll just reuse X_train_cv for selection:
    final_model, _, _ = train_and_select_best_model(X_train_cv, y_train_cv,
                                                    X_train_cv, y_train_cv)
    
    # We'll produce final risk scores for test set G_{n_folds}.
    df_test = filter_by_group(df_all, test_fold_pid).copy()
    
    X_test  = df_test[['EIT','NIRS','EIS']].values
    test_probs = final_model.predict_proba(X_test)[:,1]
    df_test.loc[:,'risk_score'] = test_probs
    
    # Re-bucket for DP or threshold logic
    def to_bucket(p):
        return min(int(p*5), 4)
    df_test.loc[:,'risk_bucket'] = df_test['risk_score'].apply(to_bucket)
    
    # (F.1) Build final policies from the chosen final hyperparams:
    const_policy = make_constant_threshold_policy(thr_const_final)
    dyn_policy   = make_dynamic_threshold_policy(thr_dyn_final)
    lin_policy   = make_linear_threshold_policy(A_ave, B_ave)
    wte_policy   = make_wait_till_end_policy(thr_wait_final)
    dp_policy    = make_data_driven_dp_policy_unconstrained(V_final, pi_final, T=T_MAX)
    
    # (F.2) Evaluate on test set
    stats_const = simulate_policy(df_test, const_policy)
    stats_dyn   = simulate_policy(df_test, dyn_policy)
    stats_lin   = simulate_policy(df_test, lin_policy)
    stats_wte   = simulate_policy(df_test, wte_policy)
    stats_dp    = simulate_policy(df_test, dp_policy)
    
    table = pd.DataFrame({
        'Method': [
            'Constant Threshold',
            'Dynamic Threshold-R',
            'Linear Threshold',
            'Wait Till End',
            'Dynamic Threshold-DP'
        ],
        'Precision (%)': [
            100*stats_const['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        'Cost': [
            stats_const['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        'Recall (%)': [
            100*stats_const['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        'Treatment Time': [
            stats_const['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })
    
    return table

###############################################################################
# 7. RUN MULTIPLE REPLICATIONS 
###############################################################################
def run_multiple_replications(df_all, n_replications=30, n_folds=5):
    """
    Run Algorithm 1 (Semi Cross-Validation) multiple times with different random seeds.
    Compute mean and standard deviation for each metric.
    """
    # Define standard method names for consistent reporting
    standard_methods = [
        'Constant Threshold',
        'Dynamic Threshold-R',
        'Linear Threshold',
        'Wait Till End',
        'Dynamic Threshold-DP'
    ]
    
    # Initialize containers for each metric and method
    precision_values = {method: [] for method in standard_methods}
    cost_values = {method: [] for method in standard_methods}
    recall_values = {method: [] for method in standard_methods}
    treatment_time_values = {method: [] for method in standard_methods}
    
    for i in range(n_replications):
        seed = i  # Use a different seed for each replication
        print(f"\nRunning replication {i+1}/{n_replications} with seed={seed}")
        
        # Run algorithm with current seed
        table = semi_crossval_unconstrained(df_all, n_folds=n_folds, seed=seed)
        
        # Extract values for each method
        for _, row in table.iterrows():
            method = row['Method']
            
            if method in standard_methods:
                precision_values[method].append(row['Precision (%)'])
                cost_values[method].append(row['Cost'])
                recall_values[method].append(row['Recall (%)'])
                treatment_time_values[method].append(row['Treatment Time'])
    
    # Compute statistics
    final_data = []
    for method in standard_methods:
        if precision_values[method]:  # Check if we have data for this method
            precision_mean = np.mean(precision_values[method])
            precision_std = np.std(precision_values[method])
            cost_mean = np.mean(cost_values[method])
            cost_std = np.std(cost_values[method])
            recall_mean = np.mean(recall_values[method])
            recall_std = np.std(recall_values[method])
            treat_time_mean = np.mean(treatment_time_values[method])
            treat_time_std = np.std(treatment_time_values[method])
            
            final_data.append({
                'Method': method,
                'Precision (%)': f"{precision_mean:.2f} ± {precision_std:.2f}",
                'Cost': f"{cost_mean:.2f} ± {cost_std:.2f}",
                'Recall (%)': f"{recall_mean:.2f} ± {recall_std:.2f}",
                'Treatment Time': f"{treat_time_mean:.2f} ± {treat_time_std:.2f}"
            })
    
    return pd.DataFrame(final_data)

###############################################################################
# 8. MAIN
###############################################################################
def main():
    # Load  CSV file
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    df_all = df_all[df_all['time'] < T_MAX].copy()
    
    # Just ensure columns exist:
    required_cols = {'patient_id','time','risk_bucket','risk_score','EIT','NIRS','EIS','label'}
    missing = required_cols - set(df_all.columns)
    if missing:
        raise ValueError(f" CSV is missing columns: {missing}")
    
    n_folds = 5
    n_replications = 30
    
    final_results = run_multiple_replications(df_all, n_replications=n_replications, n_folds=n_folds)
    
    print("\n=== FINAL RESULTS (Mean ± Std Dev over 30 Replications, Algorithm 1) ===")
    print(final_results.to_string(index=False))

if __name__ == "__main__":
    main()



##algorithm 2

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import roc_auc_score
import warnings

warnings.simplefilter("ignore", category=UserWarning)


###############################################################################
# 1. GLOBAL PARAMETERS & SETTINGS
###############################################################################
FP_COST = 10    # Penalty for false positive treatment
FN_COST = 50    # Penalty for false negative (never treated but was sick)
D_COST  = 1     # Penalty per time-step of delay before treating a sick patient
GAMMA   = 0.99  # Discount factor
T_MAX   = 20    # Time horizon (discrete steps 0..T_MAX-1 for each patient)

# Example features
FEATURE_COLS = ["time", "EIT", "NIRS", "EIS"]

###############################################################################
# 2. HELPER FUNCTIONS: SPLITTING & FILTERING
###############################################################################
def split_patients_kfold(df, n_splits=4, seed=0):
    """
    Shuffle unique patient IDs, then split into (n_splits+1) groups:
       G1,...,G_{n_splits}, G_{n_splits+1} (the final holdout).
    """
    rng = np.random.RandomState(seed)
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    n = len(unique_pts)
    splits = {}
    
    for i in range(n_splits + 1):
        start_idx = int(i * n / (n_splits + 1))
        end_idx   = int((i + 1) * n / (n_splits + 1))
        group_name = f"G{i+1}"
        splits[group_name] = set(unique_pts[start_idx:end_idx])
    
    return splits

def filter_by_group(df, pid_set):
    """Return the rows of df whose patient_id is in pid_set."""
    return df[df['patient_id'].isin(pid_set)].copy()


###############################################################################
# 3. ML TRAINING & RISK-SCORE PREDICTIONS
###############################################################################
def train_and_predict_model(model_type, hyperparams, df_train, df_val, feature_cols=FEATURE_COLS):
    """
    Train a classification model (model_type in {catboost, rf, gb})
    with given hyperparams on df_train. Then return predicted probabilities
    (risk scores) for df_val (aligned with df_val).
    """
    X_train = df_train[feature_cols]
    y_train = df_train['label']
    
    if model_type == "catboost":
        model = CatBoostClassifier(**hyperparams, verbose=False)
        model.fit(X_train, y_train)
    elif model_type == "rf":
        model = RandomForestClassifier(**hyperparams, random_state=42)
        model.fit(X_train, y_train)
    elif model_type == "gb":
        model = GradientBoostingClassifier(**hyperparams, random_state=42)
        model.fit(X_train, y_train)
    else:
        raise ValueError(f"Unknown model_type={model_type}")
    
    X_val = df_val[feature_cols]
    risk_scores = model.predict_proba(X_val)[:, 1]  # Probability that label=1
    return risk_scores

def generate_risk_scores_via_cv(df_train_splits, i_val, model_list, param_grid_dict, feature_cols=FEATURE_COLS):
    """
    For cross-validation fold i_val, pick the best (model_type, hyperparams) by AUC.
    
    Returns:
      best_val_scores (np.array): risk scores for df_train_splits[i_val]
      best_model_type, best_hparams, best_auc
    """
    from sklearn.metrics import roc_auc_score
    
    df_val = df_train_splits[i_val]
    X_val  = df_val[feature_cols]
    y_val  = df_val['label'].values
    
    # Build a single training set = union of all G_j except G_i_val
    train_parts = []
    for k, v_df in df_train_splits.items():
        if k != i_val:
            train_parts.append(v_df)
    df_train_full = pd.concat(train_parts, ignore_index=True)
    
    best_model_type = None
    best_hparams    = None
    best_auc        = -999
    best_val_scores = None
    
    # Evaluate each combination
    for m_type in model_list:
        for hyperparams in param_grid_dict[m_type]:
            scores_val = train_and_predict_model(m_type, hyperparams, df_train_full, df_val, feature_cols=feature_cols)
            auc_val = roc_auc_score(y_val, scores_val)
            if auc_val > best_auc:
                best_auc = auc_val
                best_model_type = m_type
                best_hparams    = hyperparams
                best_val_scores = scores_val
    
    return best_val_scores, best_model_type, best_hparams, best_auc


###############################################################################
# 4. SIMULATE POLICIES
###############################################################################
def simulate_policy(df, policy_func):
    """
    df has columns: patient_id, time, label, predicted_risk (optionally),
    policy_func(patient_rows) -> treat_time (int) or None.

    Returns: dict of {cost, precision, recall, avg_treatment_time}.
    """
    results = []
    for pid, rows in df.groupby('patient_id'):
        rows = rows.sort_values('time')
        label = rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(rows)
        
        if treat_time is None:
            treated_flag = 0
            if label == 1:
                cost = FN_COST
            else:
                cost = 0
            tp = 0
            fp = 0
            tt = None
        else:
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time
                tp   = 1
                fp   = 0
            else:
                cost = FP_COST
                tp   = 0
                fp   = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res   = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    
    precision = tp_sum / (tp_sum + fp_sum) if (tp_sum+fp_sum) > 0 else 0.0
    sick_df = df_res[df_res['label'] == 1]
    recall = tp_sum / len(sick_df) if len(sick_df) > 0 else 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 5. BENCHMARK DECISION POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,0.5,10)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    
    for thr in thresholds:
        def policy_func(rows):
            for _, row in rows.iterrows():
                if row['predicted_risk'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    
    return best_thr, best_stats

def dynamic_threshold_random_search(df,
                                    time_steps=T_MAX,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost= float('inf')
    best_stats= None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(rows):
            for _, row in rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['predicted_risk'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec  = thr_vec.copy()
            best_stats= stats
    return best_vec, best_stats

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 5)
    if B_candidates is None:
        B_candidates = np.linspace(0, 0.5, 6)
    
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(rows):
                for _, row in rows.iterrows():
                    t = row['time']
                    thr = np.clip(A*t + B, 0, 1)
                    if row['predicted_risk'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_stats= stats
                best_A    = A
                best_B    = B
    
    return (best_A,best_B), best_stats

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    
    for thr in thresholds:
        def policy_func(rows):
            final_row = rows.loc[rows['time'].idxmax()]
            if final_row['predicted_risk'] >= thr:
                return int(final_row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    
    return best_thr, best_stats

###############################################################################
# 6. DATA-DRIVEN DP
###############################################################################
def assign_buckets(prob, n_buckets=5):
    edges = np.linspace(0,1,n_buckets+1)
    for b in range(n_buckets):
        if edges[b] <= prob < edges[b+1]:
            return b
    return n_buckets - 1

def estimate_transition_and_sick_probs(df_train, T=T_MAX, n_buckets=5):
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows= grp.to_dict('records')
        
        for i, row in enumerate(rows):
            t = int(row['time'])
            b = int(row['risk_bucket'])
            lbl = row['label']
            
            if t < T:
                bucket_counts[t,b] += 1
                sick_counts[t,b]   += lbl
            
            if i < len(rows)-1:
                nxt = rows[i+1]
                t_next = int(nxt['time'])
                b_next = int(nxt['risk_bucket'])
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t,b,b_next] += 1

    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                p_trans[t_, b_, b_] = 1.0  # fallback

    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    
    return p_trans, p_sick

def train_data_driven_dp(p_trans, p_sick,
                         FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX):
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    for b in range(n_buckets):
        cost_treat   = p_sick[T-1,b]*(D*(T-1)) + (1 - p_sick[T-1,b])*FP
        cost_notreat = p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    for t in reversed(range(T)):
        for b in range(n_buckets):
            cost_treat = p_sick[t,b]*(D*t) + (1 - p_sick[t,b])*FP
            if t == T-1:
                cost_wait = gamma * V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next] * V[t+1,b_next]
                cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b]   = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b]   = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_data_driven_dp_policy(V, pi_, T=T_MAX):
    def policy_func(rows):
        for _, row in rows.iterrows():
            t = int(row['time'])
            if t < T:
                b = int(row['risk_bucket'])
                if pi_[t,b] == 1:  # treat
                    return t
        return None
    return policy_func


###############################################################################
# 7. ALGORITHM 2 (Full CV) IMPLEMENTATION
###############################################################################
def run_experiment_algorithm2(
    df_all,
    n_splits=4,
    seed=42,
    model_list=("catboost","rf","gb"),
    param_grid_dict=None):
    """
    Implement Algorithm 2 (Full Cross-Validation) with integrated ML + DP.

    Returns:
      final_table (pd.DataFrame): results on G_{n_splits+1}
      df_val_stats (pd.DataFrame): details per fold in cross-validation
    """
    if param_grid_dict is None:
        # Example small grids
        param_grid_dict = {
            "catboost": [
                {"iterations":50, "depth":3, "learning_rate":0.1},
                {"iterations":50, "depth":4, "learning_rate":0.05},
            ],
            "rf": [
                {"n_estimators":50, "max_depth":3},
                {"n_estimators":100, "max_depth":5},
            ],
            "gb": [
                {"n_estimators":50, "max_depth":3, "learning_rate":0.1},
                {"n_estimators":100,"max_depth":3, "learning_rate":0.05},
            ]
        }

    # 1) Split => G1..G_{n_splits}, G_{n_splits+1}
    splits = split_patients_kfold(df_all, n_splits=n_splits, seed=seed)
    group_dfs = {}
    for group_name, pid_set in splits.items():
        sub_df = filter_by_group(df_all, pid_set)
        group_dfs[group_name] = sub_df
    
    test_name = f"G{n_splits+1}"
    df_test   = group_dfs[test_name]

    # 2) Cross-validation on folds {G1..G_n}
    all_val_stats = []
    for i_val in range(1, n_splits+1):
        val_name = f"G{i_val}"
        df_val = group_dfs[val_name]

        # pick best ML
        val_scores, best_model_type, best_hparams, best_auc = generate_risk_scores_via_cv(
            df_train_splits=group_dfs,
            i_val=val_name,
            model_list=model_list,
            param_grid_dict=param_grid_dict,
            feature_cols=FEATURE_COLS
        )
        df_val = df_val.copy()
        df_val["predicted_risk"] = val_scores
        group_dfs[val_name] = df_val  # store predictions

        # Evaluate benchmark policies on this fold
        # (a) Constant
        thr_const, stats_const = constant_threshold_search(df_val)
        # (b) Dynamic threshold
        thr_vec, stats_dyn = dynamic_threshold_random_search(df_val, seed=999+ i_val)
        # (c) Linear threshold
        (A_lin,B_lin), stats_lin = linear_threshold_search(df_val)
        # (d) Wait-till-end
        thr_wte, stats_wte = wait_till_end_search(df_val)
        # (e) DP
        #     1) train final_model on training folds => get risk for that training set => DP => apply to val
        train_parts = []
        for j in range(1, n_splits+1):
            if j != i_val:
                train_parts.append(group_dfs[f"G{j}"])
        df_train_fold = pd.concat(train_parts, ignore_index=True).copy()
        
        # Retrain "best model" on df_train_fold => produce risk => bucket => DP
        from sklearn.metrics import roc_auc_score
        X_train_f = df_train_fold[FEATURE_COLS]
        y_train_f = df_train_fold['label']
        if best_model_type == "catboost":
            final_model = CatBoostClassifier(**best_hparams, verbose=False)
            final_model.fit(X_train_f, y_train_f)
        elif best_model_type == "rf":
            final_model = RandomForestClassifier(**best_hparams, random_state=42)
            final_model.fit(X_train_f, y_train_f)
        else:
            final_model = GradientBoostingClassifier(**best_hparams, random_state=42)
            final_model.fit(X_train_f, y_train_f)
        
        df_train_fold["predicted_risk"] = final_model.predict_proba(X_train_f)[:,1]
        df_train_fold["risk_bucket"]    = df_train_fold["predicted_risk"].apply(assign_buckets)
        
        p_trans, p_sick = estimate_transition_and_sick_probs(df_train_fold, T=T_MAX, n_buckets=5)
        V, pi_ = train_data_driven_dp(p_trans, p_sick, 
                                      FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX)
        df_val_dp = df_val.copy()
        df_val_dp["risk_bucket"] = df_val_dp["predicted_risk"].apply(assign_buckets)
        dp_policy = make_data_driven_dp_policy(V, pi_, T=T_MAX)
        stats_dp = simulate_policy(df_val_dp, dp_policy)
        
        all_val_stats.append({
            "fold": i_val,
            "best_model_type": best_model_type,
            "best_hparams": best_hparams,
            "AUC_val": roc_auc_score(df_val['label'], df_val['predicted_risk']),

            "const_cost": stats_const["cost"],
            "dyn_cost":   stats_dyn["cost"],
            "lin_cost":   stats_lin["cost"],
            "wte_cost":   stats_wte["cost"],
            "dp_cost":    stats_dp["cost"],
        })
    
    df_val_stats = pd.DataFrame(all_val_stats)

    # 3) Pick a single final model from among folds or do a separate logic.
    #    For simplicity, pick the fold that had the best dp_cost:
    best_fold_idx = df_val_stats["dp_cost"].idxmin()
    fold_rec = df_val_stats.loc[best_fold_idx]
    final_model_type = fold_rec["best_model_type"]
    final_hparams    = fold_rec["best_hparams"]
    
    # 4) Retrain on G1..G_n => evaluate on G_{n+1}
    train_all = []
    for i in range(1, n_splits+1):
        train_all.append(group_dfs[f"G{i}"])
    df_train_all = pd.concat(train_all, ignore_index=True).copy()
    
    X_train_all = df_train_all[FEATURE_COLS]
    y_train_all = df_train_all["label"]
    
    if final_model_type == "catboost":
        final_model = CatBoostClassifier(**final_hparams, verbose=False)
        final_model.fit(X_train_all, y_train_all)
    elif final_model_type == "rf":
        final_model = RandomForestClassifier(**final_hparams, random_state=42)
        final_model.fit(X_train_all, y_train_all)
    else:
        final_model = GradientBoostingClassifier(**final_hparams, random_state=42)
        final_model.fit(X_train_all, y_train_all)

    df_test = df_test.copy()
    df_test["predicted_risk"] = final_model.predict_proba(df_test[FEATURE_COLS])[:,1]
    
    # Evaluate final table
    # (a) Constant
    thr_const, stats_const = constant_threshold_search(df_test)
    # (b) Dynamic
    thr_vec, stats_dyn = dynamic_threshold_random_search(df_test)
    # (c) Linear
    (A_lin,B_lin), stats_lin = linear_threshold_search(df_test)
    # (d) Wait-till-end
    thr_wte, stats_wte = wait_till_end_search(df_test)
    # (e) DP
    df_train_all["predicted_risk"] = final_model.predict_proba(df_train_all[FEATURE_COLS])[:,1]
    df_train_all["risk_bucket"]    = df_train_all["predicted_risk"].apply(assign_buckets)
    p_trans, p_sick = estimate_transition_and_sick_probs(df_train_all, T=T_MAX, n_buckets=5)
    V, pi_ = train_data_driven_dp(p_trans, p_sick, 
                                  FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX)
    df_test_dp = df_test.copy()
    df_test_dp["risk_bucket"] = df_test_dp["predicted_risk"].apply(assign_buckets)
    dp_policy_func = make_data_driven_dp_policy(V, pi_, T=T_MAX)
    stats_dp = simulate_policy(df_test_dp, dp_policy_func)

    final_table = pd.DataFrame({
        "Method": [
            "Constant Threshold",
            "Dynamic Threshold-R",
            "Linear Threshold",
            "Wait Till End",
            "Dynamic Threshold-DP (DataDriven)"
        ],
        "Precision (%)": [
            100*stats_const['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        "Cost": [
            stats_const['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        "Recall (%)": [
            100*stats_const['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        "Treatment Time": [
            stats_const['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })

    return final_table, df_val_stats


###############################################################################
# 8. RUN-ONCE FUNCTION (with a given seed)
###############################################################################
def run_experiment_once(df_all, seed=42, n_splits=4):
    """
    Runs the full Algorithm 2 cross-validation approach for a single random seed.
    Returns the final test table (5 methods) and the cross-validation details.
    """
    final_table, df_cv_details = run_experiment_algorithm2(
        df_all=df_all,
        n_splits=n_splits,
        seed=seed,
        model_list=("catboost","rf","gb"),
        param_grid_dict=None  # default small grid
    )
    return final_table, df_cv_details

###############################################################################
# 9. MAIN: 30 Replications
###############################################################################
def main():
    # 1) Load the data once
    df_all = pd.read_csv("synthetic_patients_with_features.csv")

    # df_all = df_all[df_all['time'] < T_MAX].copy()

    N_REPS = 30
    methods = [
        "Constant Threshold",
        "Dynamic Threshold-R",
        "Linear Threshold",
        "Wait Till End",
        "Dynamic Threshold-DP (DataDriven)"
    ]
    
    # 2) Data structure to hold results across runs
    results_over_runs = {
        m: {'precision': [], 'cost': [], 'recall': [], 'time': []}
        for m in methods
    }
    
    # 3) Loop over 30 seeds
    for rep in range(N_REPS):
        seed_value = 1000 + rep  # or any scheme you like
        print(f"\n=== RUN {rep+1}/{N_REPS}, seed={seed_value} ===")
        
        final_table, _ = run_experiment_once(df_all, seed=seed_value, n_splits=4)
        
        # final_table has columns: Method, Precision (%), Cost, Recall (%), Treatment Time
        # We'll accumulate them in results_over_runs
        for idx, row in final_table.iterrows():
            m = row["Method"]
            results_over_runs[m]["precision"].append(row["Precision (%)"])
            results_over_runs[m]["cost"].append(row["Cost"])
            results_over_runs[m]["recall"].append(row["Recall (%)"])
            results_over_runs[m]["time"].append(row["Treatment Time"])
    
    # 4) Compute mean ± std dev across the 30 runs
    final_rows = []
    for i, m in enumerate(methods):
        prec_arr = np.array(results_over_runs[m]["precision"])
        cost_arr = np.array(results_over_runs[m]["cost"])
        rec_arr  = np.array(results_over_runs[m]["recall"])
        time_arr = np.array(results_over_runs[m]["time"])
        
        prec_mean, prec_std = prec_arr.mean(), prec_arr.std()
        cost_mean, cost_std = cost_arr.mean(), cost_arr.std()
        rec_mean,  rec_std  = rec_arr.mean(),  rec_arr.std()
        time_mean, time_std = time_arr.mean(), time_arr.std()
        
        final_rows.append([
            m,
            f"{prec_mean:.2f} ± {prec_std:.2f}",
            f"{cost_mean:.2f} ± {cost_std:.2f}",
            f"{rec_mean:.2f} ± {rec_std:.2f}",
            f"{time_mean:.2f} ± {time_std:.2f}"
        ])
    
    # 5) Print final summary
    print("\n=== FINAL RESULTS (Mean ± Std Dev over 30 Replications) ===")
    print("{:<28s} {:>18s} {:>18s} {:>18s} {:>18s}".format(
        "Method", "Precision(%)", "Cost", "Recall(%)", "Avg Time"))
    for row in final_rows:
        m, prec_str, cost_str, rec_str, time_str = row
        print(f"{m:<28s} {prec_str:>18s} {cost_str:>18s} {rec_str:>18s} {time_str:>18s}")


if __name__ == "__main__":
    main()



#algorithm 3


#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from catboost import CatBoostClassifier
import warnings

warnings.simplefilter("ignore", category=UserWarning)

###############################################################################
# 1. GLOBAL PARAMETERS & SETTINGS
###############################################################################
FP_COST = 10    # Penalty for false positive (treating a healthy patient)
FN_COST = 50    # Penalty for false negative (never treating a sick patient)
D_COST  = 1     # Penalty per time-step of delay in treating a sick patient
GAMMA   = 0.99  # Default discount factor for DP (may be overridden in DP tuning)
T_MAX   = 20    # Time horizon (discrete steps 0..T_MAX-1 for each patient)

FEATURE_COLS = ["time", "EIT", "NIRS", "EIS"]

###############################################################################
# 2. HELPER FUNCTIONS FOR DATA SPLITTING & FILTERING
###############################################################################
def split_patients_kfold(df, n_splits=4, seed=0):
    """
    Shuffle unique patient IDs, then split into (n_splits+1) groups G1,...,G_{n_splits},G_{n_splits+1}.
    """
    rng = np.random.RandomState(seed)
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    n = len(unique_pts)
    splits = {}
    
    # Partition into n_splits+1 roughly equal groups
    for i in range(n_splits + 1):
        start_idx = int(i * n / (n_splits + 1))
        end_idx   = int((i + 1) * n / (n_splits + 1))
        group_name = f"G{i+1}"
        splits[group_name] = set(unique_pts[start_idx:end_idx])
    
    return splits

def filter_by_group(df, pid_set):
    """Return the subset of df whose patient_id is in pid_set."""
    return df[df['patient_id'].isin(pid_set)].copy()


###############################################################################
# 3. ML TRAINING & RISK-SCORE PREDICTIONS
###############################################################################
def train_and_predict_model(
    model_type,
    hyperparams,
    df_train,
    df_val,
    feature_cols=FEATURE_COLS):
    """
    Train a classification model (CatBoost/RF/GB) on df_train and return predicted
    probabilities for df_val. `hyperparams` is a dict of model-specific hyper-parameters.
    """
    X_train = df_train[feature_cols]
    y_train = df_train['label']
    
    if model_type == "catboost":
        model = CatBoostClassifier(**hyperparams, verbose=False)
        model.fit(X_train, y_train)
    elif model_type == "rf":
        model = RandomForestClassifier(**hyperparams, random_state=42)
        model.fit(X_train, y_train)
    elif model_type == "gb":
        model = GradientBoostingClassifier(**hyperparams, random_state=42)
        model.fit(X_train, y_train)
    else:
        raise ValueError(f"Unknown model_type={model_type}")
    
    X_val = df_val[feature_cols]
    risk_scores = model.predict_proba(X_val)[:,1]  # Probability label=1
    return risk_scores


def select_best_ml_hyperparams_by_auc(
    df_train_splits,
    val_split_name,
    model_list,
    param_grid_dict,
    feature_cols=FEATURE_COLS):
    """
    Perform a grid search over (model_type, hyperparams) to maximize AUC on the 
    validation set = df_train_splits[val_split_name].
    Return:
       best_model_type, best_hyperparams, best_auc, val_preds (predicted_risk for the val set).
    """
    from sklearn.metrics import roc_auc_score
    
    df_val = df_train_splits[val_split_name].copy()
    
    # Combine all other folds for training
    train_df_list = []
    for k, v_df in df_train_splits.items():
        if k != val_split_name:
            train_df_list.append(v_df)
    df_train_full = pd.concat(train_df_list, ignore_index=True)
    
    X_val = df_val[feature_cols]
    y_val = df_val['label'].values
    
    best_model_type = None
    best_hparams    = None
    best_auc        = -999
    best_preds      = None
    
    # Grid search across all candidate (model_type, hyperparam)
    for model_type in model_list:
        for hyperparams in param_grid_dict[model_type]:
            # Train on df_train_full, predict on df_val
            preds = train_and_predict_model(
                model_type=model_type,
                hyperparams=hyperparams,
                df_train=df_train_full,
                df_val=df_val,
                feature_cols=feature_cols
            )
            auc_val = roc_auc_score(y_val, preds)
            if auc_val > best_auc:
                best_auc = auc_val
                best_model_type = model_type
                best_hparams    = hyperparams
                best_preds      = preds
    
    return best_model_type, best_hparams, best_auc, best_preds


###############################################################################
# 4. POLICY SIMULATION (Compute cost, precision, recall, etc.)
###############################################################################
def simulate_policy(df, policy_func):
    """
    df has columns: [patient_id, time, label, predicted_risk].
    policy_func(patient_rows) -> an integer in [0..T_MAX-1] for the 
        time step of treatment, or None if never treated.
    Returns a dict with keys {cost, precision, recall, avg_treatment_time}, etc.
    """
    results = []
    for pid, patient_rows in df.groupby('patient_id'):
        patient_rows = patient_rows.sort_values('time')
        label = patient_rows['label'].iloc[0]  # 0 or 1 (healthy vs sick)
        treat_time = policy_func(patient_rows)
        
        if treat_time is None:
            # never treat
            treated_flag = 0
            if label == 1:
                cost = FN_COST  # missed a sick patient
            else:
                cost = 0
            tp = 0
            fp = 0
            tt = None
        else:
            # treat at treat_time
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time  # delay cost
                tp   = 1
                fp   = 0
            else:
                cost = FP_COST
                tp   = 0
                fp   = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum     = treated_df['tp'].sum()
    fp_sum     = treated_df['fp'].sum()
    
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df   = df_res[df_res['label'] == 1]
    total_sick= len(sick_df)
    if total_sick > 0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt   = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }


###############################################################################
# 5. BENCHMARK STRATEGIES (Constant Threshold, Dynamic, Linear, Wait-Till-End)
###############################################################################
def constant_threshold_search(df, thresholds=None):
    """
    Try a grid of constant thresholds for the entire time horizon,
    pick the one minimizing cost on df. Return (best_threshold, best_stats).
    """
    if thresholds is None:
        thresholds = np.linspace(0, 0.5, 8)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                if row['predicted_risk'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    
    return best_thr, best_stats


def dynamic_threshold_random_search(df,
                                    time_steps=T_MAX,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=100,
                                    seed=0):
    """
    Sample random time-varying thresholds (one threshold per time step),
    measure cost, pick the best. For demonstration.
    """
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost= float('inf')
    best_stats=None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['predicted_risk'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec  = thr_vec.copy()
            best_stats= stats
    
    return best_vec, best_stats


def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    """
    threshold(t) = A*t + B, clipped to [0,1].
    Search over A_candidates x B_candidates, pick the best cost.
    """
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 3)
    if B_candidates is None:
        B_candidates = np.linspace(0,1,5)
    
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A * t + B
                    thr = max(0, min(1, thr))  # clip to [0,1]
                    if row['predicted_risk'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A    = A
                best_B    = B
                best_stats= stats
    
    return (best_A, best_B), best_stats


def wait_till_end_search(df, thresholds=None):
    """
    Treat (if at all) only at the final time step, with a single threshold.
    """
    if thresholds is None:
        thresholds = np.linspace(0, 1, 21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            # Look at the final time row
            final_row = patient_rows.loc[patient_rows['time'].idxmax()]
            if final_row['predicted_risk'] >= thr:
                return int(final_row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    return best_thr, best_stats


###############################################################################
# 6. DATA-DRIVEN DP (Bucketed)
###############################################################################
def assign_buckets(prob, n_buckets=5):
    """
    Convert predicted probability into a discrete bucket 0..(n_buckets-1).
    E.g. edges for 5 buckets = [0,0.2,0.4,0.6,0.8,1.0]
    """
    edges = np.linspace(0, 1, n_buckets+1)
    for b in range(n_buckets):
        if edges[b] <= prob < edges[b+1]:
            return b
    return n_buckets-1  # fallback if prob=1.0

def estimate_transition_and_sick_probs(df_train, T=T_MAX, n_buckets=5):
    """
    Given df_train with 'predicted_risk' & 'label' & 'time' (0..T-1),
    compute p_trans[t,b,b'] = P(bucket_{t+1}=b' | bucket_t=b),
    and p_sick[t,b] = Probability of being sick in (t,b).
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets))
    bucket_counts     = np.zeros((T, n_buckets))
    sick_counts       = np.zeros((T, n_buckets))
    
    df_sorted = df_train.sort_values(['patient_id','time'])
    
    for pid, grp in df_sorted.groupby('patient_id'):
        rows = grp.to_dict('records')
        for i, row in enumerate(rows):
            t   = int(row['time'])
            b   = int(row['risk_bucket'])
            lbl = int(row['label'])
            if t < T:
                bucket_counts[t, b] += 1
                sick_counts[t, b]   += lbl
            
            if i < len(rows) - 1:
                row_next = rows[i+1]
                t_next = int(row_next['time'])
                b_next = int(row_next['risk_bucket'])
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t, b, b_next] += 1
    
    p_trans = np.zeros((T-1, n_buckets, n_buckets))
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                # if no data, remain in the same bucket with prob=1
                p_trans[t_, b_, b_] = 1.0
    
    p_sick = np.zeros((T, n_buckets))
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    
    return p_trans, p_sick


def train_data_driven_dp(p_trans, p_sick,
                         FP=FP_COST, FN=FN_COST, D=D_COST,
                         gamma=GAMMA, T=T_MAX):
    """
    Standard backward DP for the bucket-based approach:
      - V[t,b] = min( cost of treating now, cost of waiting )
    Return V, pi_ (value function and policy).
    """
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    # Terminal cost at t=T
    # If we reach time T in bucket b, the next step is "end" => we can choose treat or not
    for b in range(n_buckets):
        cost_treat = p_sick[T-1,b]* (D*(T-1)) + (1 - p_sick[T-1,b])*FP
        cost_skip  = p_sick[T-1,b]*FN
        V[T,b]     = min(cost_treat, cost_skip)
    
    # Backward recursion
    for t in reversed(range(T)):
        for b in range(n_buckets):
            # Option A: Treat now
            cost_treat = p_sick[t,b]*(D*t) + (1 - p_sick[t,b])*FP
            
            # Option B: Wait => expected cost of next state
            if t == T-1:
                # next step is t=T
                exp_future = V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next] * V[t+1,b_next]
            cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b]   = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b]   = cost_wait
                pi_[t,b] = 0
    
    return V, pi_

def make_data_driven_dp_policy(V, pi_, T=T_MAX):
    """
    Returns a function that uses pi_[t,b] to decide when to treat.
    """
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < T:
                b = int(row['risk_bucket'])
                action = pi_[t,b]  # 0=wait, 1=treat
                if action == 1:
                    return t
        return None
    return policy_func


###############################################################################
# 7. DP Hyper-Parameter Search for Algorithm 3
###############################################################################
def dp_param_search(df_train_fold, df_val_fold,
                    dp_param_grid,  # list of dicts, e.g. [{'gamma':0.95}, {'gamma':0.99}]
                    T=T_MAX):
    """
    Given a training fold & validation fold, we try each DP param set in dp_param_grid,
    build a DP policy, and measure cost on the validation fold.
    
    Return the best_dp_params, the cost, and the predicted risk for the validation set
    (the validation set already should have 'predicted_risk' from the chosen ML).
    """
    # For DP, we need to:
    #  - compute discrete buckets in the training fold
    #  - estimate transitions
    #  - run DP for each param set
    #  - apply the resulting policy on the validation fold
    #  - measure cost
    
    # 1) Assign buckets to training fold
    df_train_fold = df_train_fold.copy()
    df_train_fold["risk_bucket"] = df_train_fold["predicted_risk"].apply(assign_buckets)
    
    # 2) Estimate transitions
    p_trans, p_sick = estimate_transition_and_sick_probs(df_train_fold, T=T)
    
    best_params = None
    best_cost   = float('inf')
    best_stats  = None
    
    # Assign buckets to val fold too (for policy simulation)
    df_val_fold = df_val_fold.copy()
    df_val_fold["risk_bucket"] = df_val_fold["predicted_risk"].apply(assign_buckets)
    
    for param_dict in dp_param_grid:
        gamma_ = param_dict.get("gamma", GAMMA)
        # Potentially we could also vary D, FP, FN, etc. if included in the dictionary
        D_  = param_dict.get("D", D_COST)
        FP_ = param_dict.get("FP", FP_COST)
        FN_ = param_dict.get("FN", FN_COST)
        
        # 3) Train DP
        V, pi_ = train_data_driven_dp(p_trans, p_sick,
                                      FP=FP_, FN=FN_, D=D_, gamma=gamma_, T=T)
        dp_policy_func = make_data_driven_dp_policy(V, pi_, T=T)
        
        # 4) Evaluate on df_val_fold
        stats = simulate_policy(df_val_fold, dp_policy_func)
        if stats['cost'] < best_cost:
            best_cost  = stats['cost']
            best_params= param_dict
            best_stats = stats
    
    return best_params, best_stats


###############################################################################
# 8. ALGORITHM 3: SEQUENTIAL OPTIMIZATION
###############################################################################
def run_experiment_algorithm3(
    df_all,
    n_splits=4,
    seed=42,
    model_list=("catboost","rf","gb"),
    ml_param_grid=None,
    dp_param_grid=None,
    verbose=True
):
    """
    Implement Algorithm 3 (Sequential Optimization):
      1) Cross-validate ML hyperparams (AUC-based).
      2) For each fold's chosen ML model, cross-validate DP hyperparams (cost-based).
      3) Summarize the sets of (mu) found in each fold => define mu_{all}^*.
      4) Use all folds again to pick final ML hyperparams (by AUC).
      5) Then, with that ML fixed, pick final DP hyperparams from mu_{all}^* by cost.
      6) Retrain on G1..G_n with chosen ML, produce predicted_risk, run final DP,
         evaluate on G_{n+1}.
    
    Because there are multiple ways to interpret the text-block pseudo-code,
    this function follows the step-by-step logic common in "sequential" 
    (non-decision-aware => then DP decision) style. 
    """
    from sklearn.metrics import roc_auc_score
    
    if ml_param_grid is None:
        # A small default grid for demonstration
        ml_param_grid = {
            "catboost": [
                {"iterations":50, "depth":3, "learning_rate":0.1},
                {"iterations":50, "depth":4, "learning_rate":0.05},
            ],
            "rf": [
                {"n_estimators":50, "max_depth":3},
                {"n_estimators":100,"max_depth":5},
            ],
            "gb": [
                {"n_estimators":50, "max_depth":3, "learning_rate":0.1},
                {"n_estimators":100,"max_depth":3, "learning_rate":0.05},
            ]
        }
    if dp_param_grid is None:
        # Example DP param grid: vary gamma, or vary others
        dp_param_grid = [
            {"gamma": 0.95},
            {"gamma": 0.99},
        ]
    
    if verbose:
        print(f"\nRunning Algorithm 3 (Sequential Optimization) with {n_splits} folds...")
    
    # 1) Split data => G1..G_{n_splits}, G_{n_splits+1}
    splits = split_patients_kfold(df_all, n_splits=n_splits, seed=seed)
    group_dfs = {}
    for group_name, pid_set in splits.items():
        sub_df = filter_by_group(df_all, pid_set)
        group_dfs[group_name] = sub_df
    
    test_name = f"G{n_splits+1}"
    df_test   = group_dfs[test_name]
    
    # -------------------------------------------------------------------------
    # (A) CROSS-VALIDATE ML => pick best ML hyperparams for each fold
    # -------------------------------------------------------------------------
    ml_cv_details = []
    
    for i_val in range(1, n_splits+1):
        val_name = f"G{i_val}"
        
        # (A1) Find best ML hyperparams by AUC
        best_model_type, best_hparams, best_auc, val_preds = select_best_ml_hyperparams_by_auc(
            df_train_splits=group_dfs,
            val_split_name=val_name,
            model_list=model_list,
            param_grid_dict=ml_param_grid,
            feature_cols=FEATURE_COLS
        )
        
        # (A2) Store predicted_risk for that validation set
        df_val = group_dfs[val_name].copy()
        df_val["predicted_risk"] = val_preds
        
        # Save it back
        group_dfs[val_name] = df_val
        
        ml_cv_details.append({
            "fold": i_val,
            "best_model_type": best_model_type,
            "best_hparams": best_hparams,
            "AUC_val": best_auc
        })
    
    df_ml_cv_details = pd.DataFrame(ml_cv_details)
    # Summarize which ML hyperparams got chosen by each fold ...
    # (We will re-check them in the next step.)
    
    # -------------------------------------------------------------------------
    # (B) For each fold's chosen ML, do a DP hyper-param search => pick DP param
    # -------------------------------------------------------------------------
    dp_cv_details = []
    
    for i_val in range(1, n_splits+1):
        val_name  = f"G{i_val}"
        best_rec  = df_ml_cv_details[df_ml_cv_details['fold'] == i_val].iloc[0]
        ml_model_type = best_rec["best_model_type"]
        ml_hparams    = best_rec["best_hparams"]
        
        # 1) Retrain that ML on "training folds except G_i_val" => get predicted_risk
        #    for the union (train_folds) = G\G_i
        train_folds = []
        for j in range(1, n_splits+1):
            if j != i_val:
                train_folds.append(group_dfs[f"G{j}"])
        df_train_fold = pd.concat(train_folds, ignore_index=True).copy()
        
        # Train & predict on df_train_fold itself for DP transitions
        from sklearn.metrics import roc_auc_score
        
        X_train_f = df_train_fold[FEATURE_COLS]
        y_train_f = df_train_fold['label']
        
        # Rebuild the model
        if ml_model_type == "catboost":
            final_model = CatBoostClassifier(**ml_hparams, verbose=False)
            final_model.fit(X_train_f, y_train_f)
        elif ml_model_type == "rf":
            final_model = RandomForestClassifier(**ml_hparams, random_state=42)
            final_model.fit(X_train_f, y_train_f)
        else:
            final_model = GradientBoostingClassifier(**ml_hparams, random_state=42)
            final_model.fit(X_train_f, y_train_f)
        
        # Store predictions in df_train_fold
        df_train_fold["predicted_risk"] = final_model.predict_proba(X_train_f)[:,1]
        
        # 2) DP hyper-param search on this fold, using the same "train => val" logic
        #    Validation set is group_dfs[val_name], which already has *some* predicted risk 
        #    but that risk was from the *best model for i_val.* We should unify it carefully.
        
        # Actually, to be consistent: The DP sees the same final model that we have for i_val.
        # So let's do a fresh predicted risk for df_val as well. (Because we want consistent 
        # train->val usage for DP.)
        
        df_val_fold = group_dfs[val_name].copy()
        X_val_fold  = df_val_fold[FEATURE_COLS]
        df_val_fold["predicted_risk"] = final_model.predict_proba(X_val_fold)[:,1]
        
        best_dp_params, best_dp_stats = dp_param_search(
            df_train_fold=df_train_fold,
            df_val_fold=df_val_fold,
            dp_param_grid=dp_param_grid,
            T=T_MAX
        )
        
        dp_cv_details.append({
            "fold": i_val,
            "chosen_ML_model": ml_model_type,
            "chosen_ML_hparams": ml_hparams,
            "chosen_DP_params": best_dp_params,
            "dp_val_cost": best_dp_stats["cost"],
            "dp_val_prec": best_dp_stats["precision"],
            "dp_val_rec":  best_dp_stats["recall"],
            "dp_val_avgTT": best_dp_stats["avg_treatment_time"]
        })
    
    df_dp_cv_details = pd.DataFrame(dp_cv_details)
    
    # Collect all DP param sets that got chosen: mu(j) for j=1..n
    mu_all_star = []
    for _, row_ in df_dp_cv_details.iterrows():
        # each fold might have chosen a dictionary like {"gamma":0.95}
        mu_all_star.append(row_["chosen_DP_params"])
    
    # -------------------------------------------------------------------------
    # (C) Now do a second pass to pick the final ML hyperparams \lambda^*
    #     across all folds (by AUC).
    # -------------------------------------------------------------------------
    # The simplest approach: we do a standard cross-validation again for ML 
    # but ignoring DP for the moment, because this is "sequential" approach.
    # => Essentially the same method we used in step (A), but summarizing now 
    #    across all folds. We'll pick the single best (model_type, hyperparams)
    #    that leads to highest average AUC across G1..G_n.
    
    # We'll accumulate fold-level AUC for each candidate, then pick the best overall.
    candidate_list = []
    for model_type in model_list:
        for hyperparams in ml_param_grid[model_type]:
            candidate_list.append((model_type, hyperparams))
    
    results_auc_cv = []
    for (mtype, mhp) in candidate_list:
        fold_aucs = []
        for i_val in range(1, n_splits+1):
            val_name = f"G{i_val}"
            # Train on G\G_i
            train_folds = []
            for j in range(1, n_splits+1):
                if j != i_val:
                    train_folds.append(group_dfs[f"G{j}"])
            df_train_fold = pd.concat(train_folds, ignore_index=True)
            
            # Train model
            X_train_f = df_train_fold[FEATURE_COLS]
            y_train_f = df_train_fold['label']
            
            if mtype == "catboost":
                tmp_model = CatBoostClassifier(**mhp, verbose=False)
                tmp_model.fit(X_train_f, y_train_f)
            elif mtype == "rf":
                tmp_model = RandomForestClassifier(**mhp, random_state=42)
                tmp_model.fit(X_train_f, y_train_f)
            else:
                tmp_model = GradientBoostingClassifier(**mhp, random_state=42)
                tmp_model.fit(X_train_f, y_train_f)
            
            # Predict on validation G_i
            df_val_fold = group_dfs[val_name]
            X_val_fold  = df_val_fold[FEATURE_COLS]
            val_preds   = tmp_model.predict_proba(X_val_fold)[:,1]
            
            auc_val = roc_auc_score(df_val_fold['label'], val_preds)
            fold_aucs.append(auc_val)
        
        avg_auc = np.mean(fold_aucs)
        results_auc_cv.append({
            "model_type": mtype,
            "hyperparams": mhp,
            "avg_auc": avg_auc
        })
    
    df_results_auc_cv = pd.DataFrame(results_auc_cv)
    # pick best by avg_auc
    best_row = df_results_auc_cv.loc[df_results_auc_cv['avg_auc'].idxmax()]
    final_ml_type   = best_row["model_type"]
    final_ml_params = best_row["hyperparams"]
    final_ml_auc    = best_row["avg_auc"]
    
    # -------------------------------------------------------------------------
    # (D) Next, with that final ML type/params fixed, we pick the best DP hyperparams 
    #     from the union mu_all_star we collected above.
    #     We'll evaluate each candidate in mu_all_star with a new cross-validation 
    #     pass for cost, but with the final ML in place.
    # -------------------------------------------------------------------------
    
    # Because multiple folds might produce duplicates in mu_all_star, we can deduplicate:
    import json
    unique_mu = []
    seen_strs = set()
    for mu_dict in mu_all_star:
        s = json.dumps(mu_dict, sort_keys=True)
        if s not in seen_strs:
            seen_strs.add(s)
            unique_mu.append(mu_dict)
    
    dp_candidates = unique_mu
    
    # Evaluate each dp_candidates in cross-validation with final ML
    #  => for each fold i_val, we do: train final ML on G\G_i => predict => 
    #     run the DP with param from dp_candidates => measure cost => average across folds
    results_dp_cv = []
    
    for dp_params in dp_candidates:
        fold_costs = []
        for i_val in range(1, n_splits+1):
            val_name = f"G{i_val}"
            # Train final ML on G\G_i
            train_folds = []
            for j in range(1, n_splits+1):
                if j != i_val:
                    train_folds.append(group_dfs[f"G{j}"])
            df_train_fold = pd.concat(train_folds, ignore_index=True).copy()
            
            X_train_f = df_train_fold[FEATURE_COLS]
            y_train_f = df_train_fold['label']
            
            if final_ml_type == "catboost":
                tmp_model = CatBoostClassifier(**final_ml_params, verbose=False)
                tmp_model.fit(X_train_f, y_train_f)
            elif final_ml_type == "rf":
                tmp_model = RandomForestClassifier(**final_ml_params, random_state=42)
                tmp_model.fit(X_train_f, y_train_f)
            else:
                tmp_model = GradientBoostingClassifier(**final_ml_params, random_state=42)
                tmp_model.fit(X_train_f, y_train_f)
            
            df_train_fold["predicted_risk"] = tmp_model.predict_proba(X_train_f)[:,1]
            
            # Build DP for dp_params
            df_val_fold = group_dfs[val_name].copy()
            X_val_fold  = df_val_fold[FEATURE_COLS]
            df_val_fold["predicted_risk"] = tmp_model.predict_proba(X_val_fold)[:,1]
            
            # train DP on df_train_fold
            df_train_fold["risk_bucket"] = df_train_fold["predicted_risk"].apply(assign_buckets)
            p_trans, p_sick = estimate_transition_and_sick_probs(df_train_fold, T=T_MAX)
            
            gamma_ = dp_params.get("gamma", GAMMA)
            D_  = dp_params.get("D", D_COST)
            FP_ = dp_params.get("FP", FP_COST)
            FN_ = dp_params.get("FN", FN_COST)
            
            V, pi_ = train_data_driven_dp(
                p_trans, p_sick,
                FP=FP_, FN=FN_, D=D_, gamma=gamma_, T=T_MAX
            )
            policy_func = make_data_driven_dp_policy(V, pi_, T=T_MAX)
            
            # evaluate cost on df_val_fold
            df_val_fold["risk_bucket"] = df_val_fold["predicted_risk"].apply(assign_buckets)
            stats = simulate_policy(df_val_fold, policy_func)
            fold_costs.append(stats['cost'])
        
        avg_cost = np.mean(fold_costs)
        results_dp_cv.append({
            "dp_params": dp_params,
            "avg_cost": avg_cost
        })
    
    df_results_dp_cv = pd.DataFrame(results_dp_cv)
    best_dp_idx = df_results_dp_cv['avg_cost'].idxmin()
    final_dp_params = df_results_dp_cv.loc[best_dp_idx, "dp_params"]
    final_dp_cost   = df_results_dp_cv.loc[best_dp_idx, "avg_cost"]
    
    if verbose:
        print(f"Final chosen ML: {final_ml_type} {final_ml_params}, avg AUC={final_ml_auc:.3f}")
        print(f"Final chosen DP params: {final_dp_params}, avg cost={final_dp_cost:.3f}")
    
    # -------------------------------------------------------------------------
    # (E) Retrain on G1..G_n with final ML => evaluate on G_{n+1}
    # -------------------------------------------------------------------------
    train_all = []
    for i in range(1, n_splits+1):
        train_all.append(group_dfs[f"G{i}"])
    df_train_all = pd.concat(train_all, ignore_index=True).copy()
    
    X_train_all = df_train_all[FEATURE_COLS]
    y_train_all = df_train_all['label']
    
    if final_ml_type == "catboost":
        final_model = CatBoostClassifier(**final_ml_params, verbose=False)
        final_model.fit(X_train_all, y_train_all)
    elif final_ml_type == "rf":
        final_model = RandomForestClassifier(**final_ml_params, random_state=42)
        final_model.fit(X_train_all, y_train_all)
    else:
        final_model = GradientBoostingClassifier(**final_ml_params, random_state=42)
        final_model.fit(X_train_all, y_train_all)
    
    # Predict risk for train to build DP transitions
    df_train_all["predicted_risk"] = final_model.predict_proba(X_train_all)[:,1]
    df_train_all["risk_bucket"]    = df_train_all["predicted_risk"].apply(assign_buckets)
    
    # Build DP with final_dp_params
    gamma_ = final_dp_params.get("gamma", GAMMA)
    D_  = final_dp_params.get("D", D_COST)
    FP_ = final_dp_params.get("FP", FP_COST)
    FN_ = final_dp_params.get("FN", FN_COST)
    
    p_trans, p_sick = estimate_transition_and_sick_probs(df_train_all, T=T_MAX)
    V, pi_ = train_data_driven_dp(
        p_trans, p_sick,
        FP=FP_, FN=FN_, D=D_, gamma=gamma_, T=T_MAX
    )
    dp_final_policy = make_data_driven_dp_policy(V, pi_, T=T_MAX)
    
    # Evaluate on G_{n+1}
    df_test_eval = df_test.copy()
    X_test_eval  = df_test_eval[FEATURE_COLS]
    df_test_eval["predicted_risk"] = final_model.predict_proba(X_test_eval)[:,1]
    
    # Benchmark methods:
    best_thr_const, stats_const = constant_threshold_search(df_test_eval)
    best_dyn_vec, stats_dyn     = dynamic_threshold_random_search(df_test_eval)
    (bestA,bestB), stats_lin    = linear_threshold_search(df_test_eval)
    best_thr_wte, stats_wte     = wait_till_end_search(df_test_eval)
    
    df_test_eval["risk_bucket"] = df_test_eval["predicted_risk"].apply(assign_buckets)
    stats_dp = simulate_policy(df_test_eval, dp_final_policy)
    
    final_table = pd.DataFrame({
        "Method": [
            "Constant Threshold",
            "Dynamic Threshold-R",
            "Linear Threshold",
            "Wait Till End",
            "Dynamic Threshold-DP"
        ],
        "Precision (%)": [
            100*stats_const['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        "Cost": [
            stats_const['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        "Recall (%)": [
            100*stats_const['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        "Treatment Time": [
            stats_const['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })
    
    return {
        "ml_cv_details": df_ml_cv_details,
        "dp_cv_details": df_dp_cv_details,
        "ml_final_choice": (final_ml_type, final_ml_params, final_ml_auc),
        "dp_final_choice": (final_dp_params, final_dp_cost),
        "test_results_table": final_table
    }


###############################################################################
# 9. RUN MULTIPLE REPLICATIONS
###############################################################################
def run_multiple_replications(df_all, n_replications=30, n_splits=4):
    """
    Run Algorithm 3 multiple times with different random seeds.
    Compute mean and standard deviation for each metric.
    """
    # Define standard method names for consistent reporting
    standard_methods = [
        'Constant Threshold',
        'Dynamic Threshold-R',
        'Linear Threshold',
        'Wait Till End',
        'Dynamic Threshold-DP'
    ]
    
    # Initialize containers for each metric and method
    precision_values = {method: [] for method in standard_methods}
    cost_values = {method: [] for method in standard_methods}
    recall_values = {method: [] for method in standard_methods}
    treatment_time_values = {method: [] for method in standard_methods}
    
    for i in range(n_replications):
        seed = i  # Use a different seed for each replication
        print(f"\nRunning replication {i+1}/{n_replications} with seed={seed}")
        
        # Run Algorithm 3 with current seed
        results = run_experiment_algorithm3(
            df_all=df_all, 
            n_splits=n_splits, 
            seed=seed,
            verbose=False  # Turn off verbose output for cleaner console
        )
        
        # Extract final test results table
        test_table = results["test_results_table"]
        
        # Extract values for each method
        for _, row in test_table.iterrows():
            method = row['Method']
            
            if method in standard_methods:
                precision_values[method].append(row['Precision (%)'])
                cost_values[method].append(row['Cost'])
                recall_values[method].append(row['Recall (%)'])
                treatment_time_values[method].append(row['Treatment Time'])
    
    # Compute statistics
    final_data = []
    for method in standard_methods:
        if precision_values[method]:  # Check if we have data for this method
            precision_mean = np.mean(precision_values[method])
            precision_std = np.std(precision_values[method])
            cost_mean = np.mean(cost_values[method])
            cost_std = np.std(cost_values[method])
            recall_mean = np.mean(recall_values[method])
            recall_std = np.std(recall_values[method])
            treat_time_mean = np.mean(treatment_time_values[method])
            treat_time_std = np.std(treatment_time_values[method])
            
            final_data.append({
                'Method': method,
                'Precision (%)': f"{precision_mean:.2f} ± {precision_std:.2f}",
                'Cost': f"{cost_mean:.2f} ± {cost_std:.2f}",
                'Recall (%)': f"{recall_mean:.2f} ± {recall_std:.2f}",
                'Treatment Time': f"{treat_time_mean:.2f} ± {treat_time_std:.2f}"
            })
    
    return pd.DataFrame(final_data)


###############################################################################
# 10. MAIN SCRIPT 
###############################################################################
def main():
   
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    print(f"Total patients: {df_all['patient_id'].nunique()}")
    print(f"Columns: {list(df_all.columns)}")

    # 2) Run multiple replications
    n_replications = 30
    n_splits = 4
    final_results = run_multiple_replications(df_all, n_replications=n_replications, n_splits=n_splits)
    
    # 3) Print final results
    print(f"\n=== FINAL RESULTS (Mean ± Std Dev over {n_replications} Replications, Algorithm 3) ===")
    print(final_results.to_string(index=False))


if __name__ == "__main__":
    main()




#algorithm 5

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid
from catboost import CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
import warnings
from collections import defaultdict

warnings.simplefilter("ignore", category=UserWarning)

###############################################################################
# 1. GLOBAL PARAMETERS & SETTINGS
###############################################################################
FP_COST = 10    # Penalty for false positive treatment
FN_COST = 50    # Penalty for false negative (never treated but was sick)
D_COST  = 1     # Penalty per time-step of delay before treating a sick patient
GAMMA   = 0.99  # Discount factor
T_MAX   = 20    # Time horizon (discrete steps 0..T_MAX-1 for each patient)

FEATURE_COLS = ["time", "EIT", "NIRS", "EIS"]  # Adjust for your dataset columns

###############################################################################
# 2. DATA SPLITTING & HELPER FUNCTIONS
###############################################################################
def split_patients_kfold(df, n_splits=4, seed=0):
    """
    Shuffle unique patient IDs, then split into n_splits+1 groups:
       G1,...,G_{n_splits}, G_{n_splits+1} (the final holdout).
    """
    rng = np.random.RandomState(seed)
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    n = len(unique_pts)
    splits = {}
    
    for i in range(n_splits + 1):
        start_idx = int(i * n / (n_splits + 1))
        end_idx   = int((i + 1) * n / (n_splits + 1))
        group_name = f"G{i+1}"
        pid_subset = unique_pts[start_idx:end_idx]
        splits[group_name] = set(pid_subset)
    
    return splits

def filter_by_group(df, pid_set):
    """Return rows of df whose patient_id is in pid_set."""
    return df[df['patient_id'].isin(pid_set)].copy()

###############################################################################
# 3. ML TRAINING & RISK-SCORE GENERATION
###############################################################################
def train_and_predict_model(
    depth_val,
    lr_val,
    df_train,
    df_val,
    feature_cols=FEATURE_COLS):
    """
    Train a CatBoost model with specified (depth, learning_rate), 
    then return the predicted probabilities (risk scores) for df_val.
    """
    X_train = df_train[feature_cols]
    y_train = df_train['label']
    
    # We fix some CatBoost parameters for demonstration:
    params = {
        "iterations": 50,
        "depth": int(round(depth_val)),  # ensure integer
        "learning_rate": lr_val,
        "verbose": False,
        "random_seed": 42,
    }
    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train)
    
    X_val = df_val[feature_cols]
    risk_scores = model.predict_proba(X_val)[:,1]  # Probability of label=1
    return risk_scores

###############################################################################
# 4. POLICY SIMULATION & COST CALCULATION
###############################################################################
def simulate_policy(df, policy_func):
    """
    df has columns: [patient_id, time, label, predicted_risk, ...]
    policy_func(patient_rows) -> integer time step to treat, or None if never treat.
    Returns dict of {cost, precision, recall, avg_treatment_time}.
    """
    results = []
    for pid, patient_rows in df.groupby('patient_id'):
        patient_rows = patient_rows.sort_values('time')
        label = patient_rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(patient_rows)
        
        if treat_time is None:
            # never treated
            if label == 1:
                cost = FN_COST  # missed sick
            else:
                cost = 0
            tp = 0
            fp = 0
            treated_flag = 0
            tt = None
        else:
            treated_flag = 1
            # If label=1 => cost is D_COST * treat_time
            # else => cost is FP_COST
            if label == 1:
                cost = D_COST * treat_time
                tp   = 1
                fp   = 0
            else:
                cost = FP_COST
                tp   = 0
                fp   = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df = df_res[df_res['label'] == 1]
    total_sick = len(sick_df)
    if total_sick > 0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 5. BENCHMARK POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,0.5,8)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                if row['predicted_risk'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    
    return best_thr, best_stats

def make_constant_threshold_policy(thr):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            if row['predicted_risk'] >= thr:
                return int(row['time'])
        return None
    return policy_func


def dynamic_threshold_random_search(df,
                                    time_steps=T_MAX,
                                    threshold_candidates=[0.0, 0.25, 0.5, 0.75, 1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost= float('inf')
    best_stats=None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['predicted_risk'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec  = thr_vec.copy()
            best_stats= stats
    
    return best_vec, best_stats

def make_dynamic_threshold_policy(thr_vec):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < len(thr_vec):
                if row['predicted_risk'] >= thr_vec[t]:
                    return t
        return None
    return policy_func


def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 5)
    if B_candidates is None:
        B_candidates = np.linspace(0, 0.5, 6)
    
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A * t + B
                    thr = np.clip(thr, 0, 1)
                    if row['predicted_risk'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A    = A
                best_B    = B
                best_stats= stats
    return (best_A, best_B), best_stats

def make_linear_threshold_policy(A, B):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = row['time']
            thr = A * t + B
            thr = np.clip(thr, 0, 1)
            if row['predicted_risk'] >= thr:
                return int(t)
        return None
    return policy_func

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0, 1, 21)
    best_thr, best_cost = None, float('inf')
    best_stats = None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            # only check final time
            final_row = patient_rows.loc[patient_rows['time'].idxmax()]
            if final_row['predicted_risk'] >= thr:
                return int(final_row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr  = thr
            best_stats= stats
    return best_thr, best_stats

def make_wait_till_end_policy(thr):
    def policy_func(patient_rows):
        final_row = patient_rows.loc[patient_rows['time'].idxmax()]
        if final_row['predicted_risk'] >= thr:
            return int(final_row['time'])
        return None
    return policy_func

###############################################################################
# 6. DATA-DRIVEN DP (Unconstrained)
###############################################################################
def assign_buckets(prob, n_buckets=5):
    edges = np.linspace(0, 1, n_buckets+1)
    for b in range(n_buckets):
        if prob >= edges[b] and prob < edges[b+1]:
            return b
    return n_buckets-1  # fallback

def estimate_transition_and_sick_probs(df_train, T=20, n_buckets=5):
    """
    df_train has columns: [patient_id, time, predicted_risk, label].
    We build p_trans[t,b,b'] = Probability of (b' | b) from t->t+1
    p_sick[t,b] = Probability that a patient in (t,b) is sick.
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets))
    bucket_counts     = np.zeros((T, n_buckets))
    sick_counts       = np.zeros((T, n_buckets))
    
    df_sorted = df_train.sort_values(['patient_id','time'])
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows = grp.to_dict('records')
        
        for i, row in enumerate(rows):
            t = int(row['time'])
            b = int(row['risk_bucket'])
            lbl= int(row['label'])
            if t < T:
                bucket_counts[t, b] += 1
                sick_counts[t, b]   += lbl
            if i < len(rows) - 1:
                row_next = rows[i+1]
                t_next   = int(row_next['time'])
                b_next   = int(row_next['risk_bucket'])
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t, b, b_next] += 1.0
    
    # convert counts to probabilities
    p_trans = np.zeros((T-1, n_buckets, n_buckets))
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                p_trans[t_, b_, b_] = 1.0
    
    p_sick = np.zeros((T, n_buckets))
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    return p_trans, p_sick

def train_data_driven_dp(p_trans, p_sick, FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX):
    """
    Standard backward recursion for a single-patient unconstrained DP.
    V[t,b] = minimal future cost from time t, bucket b.
    pi_[t,b] in {0,1} => 1 = treat, 0 = wait.
    """
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    # Terminal cost at t=T: treat or not treat if still in bucket b
    for b in range(n_buckets):
        # Because we only have T steps 0..T-1, interpret "terminal" at t=T carefully.
        # We'll treat the cost if we decide to wait through time T-1 to the end:
        cost_treat   = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        cost_notreat = p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    # backward recursion
    for t in reversed(range(T)):
        for b in range(n_buckets):
            # cost if treat now
            cost_treat = p_sick[t,b]*(D*t) + (1 - p_sick[t,b])*FP
            
            # cost if wait
            if t == T-1:
                exp_future = V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next]* V[t+1,b_next]
            cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b]   = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b]   = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_data_driven_dp_policy(V, pi_, T=T_MAX):
    """
    Returns a function that decides treat (time) or wait based on DP policy.
    """
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < T:
                b = int(row['risk_bucket'])
                action = pi_[t,b]
                if action == 1:
                    return t
        return None
    return policy_func

###############################################################################
# 7. SPSA OPTIMIZATION (ALGORITHM 5 CORE)
###############################################################################
def spsa_optimization(
    df_train_fold,
    df_val_fold,
    n_iterations=20,
    alpha=0.602,
    gamma=0.101,
    a0=0.2,
    c0=0.1,
    depth_init=4.0,
    lr_init=0.1,
    depth_bounds=(2.0, 8.0),
    lr_bounds=(0.01, 0.3),
    T=T_MAX
):
    """
    Run SPSA to minimize the "actual cost" from data-driven DP,
    w.r.t. 2 continuous hyper-parameters: (depth, learning_rate).
    """
    def evaluate_cost(depth_val, lr_val):
        # 1) Train catboost on train_fold
        # 2) Predict risk on val_fold
        risk_scores = train_and_predict_model(
            depth_val, lr_val,
            df_train_fold, df_val_fold
        )
        df_val_eval = df_val_fold.copy()
        df_val_eval["predicted_risk"] = risk_scores
        
        # 3) data-driven DP transitions from df_train_fold
        risk_train = train_and_predict_model(
            depth_val, lr_val,
            df_train_fold, df_train_fold
        )
        df_train_dp = df_train_fold.copy()
        df_train_dp["predicted_risk"] = risk_train
        df_train_dp["risk_bucket"]    = df_train_dp["predicted_risk"].apply(assign_buckets)
        
        p_trans, p_sick = estimate_transition_and_sick_probs(df_train_dp, T=T, n_buckets=5)
        V, pi_ = train_data_driven_dp(p_trans, p_sick, FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T)
        
        # apply DP policy to val
        df_val_eval["risk_bucket"] = df_val_eval["predicted_risk"].apply(assign_buckets)
        dp_policy = make_data_driven_dp_policy(V, pi_, T=T)
        stats = simulate_policy(df_val_eval, dp_policy)
        
        return stats["cost"]

    #--- SPSA main loop ---
    params = np.array([depth_init, lr_init], dtype=float)
    
    best_params = params.copy()
    best_cost   = float('inf')
    log_history = []
    
    for k in range(1, n_iterations+1):
        a_k = 0.2 / (k ** 0.602)
        c_k = 0.1 / (k ** 0.101)
        
        # random perturbation delta in {-1, +1}^2
        delta = np.random.choice([-1,1], size=2)
        
        # param_plus, param_minus
        params_plus  = params + c_k * delta
        params_minus = params - c_k * delta
        
        # Clip to bounds
        params_plus[0]  = np.clip(params_plus[0], depth_bounds[0], depth_bounds[1])
        params_plus[1]  = np.clip(params_plus[1], lr_bounds[0],    lr_bounds[1])
        params_minus[0] = np.clip(params_minus[0],depth_bounds[0], depth_bounds[1])
        params_minus[1] = np.clip(params_minus[1],lr_bounds[0],    lr_bounds[1])
        
        # Evaluate cost
        cost_plus  = evaluate_cost(params_plus[0],  params_plus[1])
        cost_minus = evaluate_cost(params_minus[0], params_minus[1])
        
        # Gradient approx
        g_k = (cost_plus - cost_minus)/(2.0*c_k) * delta
        
        # Update
        params_new = params - a_k * g_k
        
        # Clip new param
        params_new[0] = np.clip(params_new[0], depth_bounds[0], depth_bounds[1])
        params_new[1] = np.clip(params_new[1], lr_bounds[0],    lr_bounds[1])
        
        # Evaluate cost at new param
        cost_new = evaluate_cost(params_new[0], params_new[1])
        
        # Check if better
        if cost_new < best_cost:
            best_cost   = cost_new
            best_params = params_new.copy()
        
        # Prepare next iteration
        params = params_new
        
        log_history.append({
            "iter": k,
            "params": (params[0], params[1]),
            "cost_plus": cost_plus,
            "cost_minus": cost_minus,
            "cost_new": cost_new,
            "best_cost_so_far": best_cost
        })
    
    return best_params, best_cost, pd.DataFrame(log_history)

###############################################################################
# 8. ALGORITHM 5 (SPSA) DRIVER
###############################################################################
def run_experiment_algorithm5(
    df_all,
    n_splits=4,
    seed=42,
    n_spsa_iters=20
):
    """
    Implements Algorithm 5 (SPSA) with cross-validation for the unconstrained scenario.
    Returns final_table (results on final holdout) and df_folds (CV details).
    """
    # 1) Split data
    splits = split_patients_kfold(df_all, n_splits=n_splits, seed=seed)
    group_dfs = {}
    for group_name, pid_set in splits.items():
        sub_df = filter_by_group(df_all, pid_set)
        group_dfs[group_name] = sub_df
    
    test_name = f"G{n_splits+1}"
    df_test = group_dfs[test_name]
    
    # We'll store fold results
    fold_records = []
    
    for i_val in range(1, n_splits+1):
        val_name = f"G{i_val}"
        df_val = group_dfs[val_name]
        
        # train folds = all except i_val (and except final G_{n+1})
        train_sets = []
        for j in range(1, n_splits+1):
            if j != i_val:
                train_sets.append(group_dfs[f"G{j}"])
        df_train_fold = pd.concat(train_sets, ignore_index=True)
        
        # ---- Run SPSA to find best hyperparams on this fold
        best_params, best_cost, df_log = spsa_optimization(
            df_train_fold, df_val,
            n_iterations=n_spsa_iters
        )
        depth_star, lr_star = best_params
        
        # Evaluate final cost on the validation set with those best params:
        cost_val = best_cost
        
        # Optionally compute final AUC on the validation set
        risk_scores_val = train_and_predict_model(
            depth_star, lr_star,
            df_train_fold, df_val
        )
        auc_val = roc_auc_score(df_val["label"], risk_scores_val)
        
        fold_records.append({
            "fold": i_val,
            "best_depth": depth_star,
            "best_lr": lr_star,
            "val_cost": cost_val,
            "val_auc": auc_val
        })
    
    df_folds = pd.DataFrame(fold_records)
    
    # pick final hyperparams => e.g. the fold with the minimal val_cost
    best_fold_idx = df_folds['val_cost'].idxmin()
    best_depth    = df_folds.loc[best_fold_idx, 'best_depth']
    best_lr       = df_folds.loc[best_fold_idx, 'best_lr']
    
    # Retrain final model on union of G1..G_n
    train_all = []
    for i in range(1, n_splits+1):
        train_all.append(group_dfs[f"G{i}"])
    df_train_all = pd.concat(train_all, ignore_index=True)
    
    # Final predictions on holdout G_{n_splits+1}
    risk_test = train_and_predict_model(
        best_depth, best_lr,
        df_train_all, df_test
    )
    df_test_eval = df_test.copy()
    df_test_eval["predicted_risk"] = risk_test
    
    # We'll compare 5 approaches on final holdout:
    # (1) Constant threshold
    thr_c, stats_c = constant_threshold_search(df_test_eval)
    # (2) Dynamic threshold
    thr_vec, stats_dyn = dynamic_threshold_random_search(df_test_eval, seed=seed)
    # (3) Linear threshold
    (bestA,bestB), stats_lin = linear_threshold_search(df_test_eval)
    # (4) Wait till end
    thr_wte, stats_wte = wait_till_end_search(df_test_eval)
    # (5) Data-driven DP
    risk_train_final = train_and_predict_model(
        best_depth, best_lr,
        df_train_all, df_train_all
    )
    df_train_final_dp = df_train_all.copy()
    df_train_final_dp["predicted_risk"] = risk_train_final
    df_train_final_dp["risk_bucket"] = df_train_final_dp["predicted_risk"].apply(assign_buckets)
    p_trans, p_sick = estimate_transition_and_sick_probs(df_train_final_dp, T=T_MAX, n_buckets=5)
    V, pi_ = train_data_driven_dp(p_trans, p_sick, FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX)
    
    df_test_eval["risk_bucket"] = df_test_eval["predicted_risk"].apply(assign_buckets)
    dp_policy_func = make_data_driven_dp_policy(V, pi_, T=T_MAX)
    stats_dp = simulate_policy(df_test_eval, dp_policy_func)
    
    final_table = pd.DataFrame({
        "Method": [
            "Constant Threshold",
            "Dynamic Threshold-R",
            "Linear Threshold",
            "Wait Till End",
            "Data-Driven DP (SPSA-catboost)"
        ],
        "Cost": [
            stats_c["cost"],
            stats_dyn["cost"],
            stats_lin["cost"],
            stats_wte["cost"],
            stats_dp["cost"]
        ],
        "Precision (%)": [
            100*stats_c["precision"],
            100*stats_dyn["precision"],
            100*stats_lin["precision"],
            100*stats_wte["precision"],
            100*stats_dp["precision"]
        ],
        "Recall (%)": [
            100*stats_c["recall"],
            100*stats_dyn["recall"],
            100*stats_lin["recall"],
            100*stats_wte["recall"],
            100*stats_dp["recall"]
        ],
        "Avg. Treat Time": [
            stats_c["avg_treatment_time"],
            stats_dyn["avg_treatment_time"],
            stats_lin["avg_treatment_time"],
            stats_wte["avg_treatment_time"],
            stats_dp["avg_treatment_time"]
        ]
    })
    
    return final_table, df_folds

###############################################################################
# 9. AGGREGATION FUNCTION FOR MULTIPLE REPLICATIONS
###############################################################################
def aggregate_results(list_of_tables):
    """
    Given a list of final_table DataFrames (one per replication),
    compute mean ± std for each method and each metric, then return
    a single aggregated DataFrame.
    """
    # We'll accumulate values for each method in a dictionary
    data_accum = defaultdict(lambda: {
        "Cost": [],
        "Precision (%)": [],
        "Recall (%)": [],
        "Avg. Treat Time": []
    })
    
    # Collect data across all replications
    for df_table in list_of_tables:
        for idx in range(len(df_table)):
            row = df_table.iloc[idx]
            method = row["Method"]
            data_accum[method]["Cost"].append(row["Cost"])
            data_accum[method]["Precision (%)"].append(row["Precision (%)"])
            data_accum[method]["Recall (%)"].append(row["Recall (%)"])
            data_accum[method]["Avg. Treat Time"].append(row["Avg. Treat Time"])
    
    # Now compute mean ± std
    results = []
    method_order = [
        "Constant Threshold",
        "Dynamic Threshold-R",
        "Linear Threshold",
        "Wait Till End",
        "Data-Driven DP (SPSA-catboost)"
    ]
    
    for method in method_order:
        metrics_dict = data_accum[method]
        
        cost_arr = np.array(metrics_dict["Cost"])
        prec_arr = np.array(metrics_dict["Precision (%)"])
        rec_arr  = np.array(metrics_dict["Recall (%)"])
        time_arr = np.array(metrics_dict["Avg. Treat Time"])
        
        cost_mean,  cost_std  = cost_arr.mean(),  cost_arr.std()
        prec_mean,  prec_std  = prec_arr.mean(),  prec_arr.std()
        rec_mean,   rec_std   = rec_arr.mean(),   rec_arr.std()
        time_mean,  time_std  = time_arr.mean(),  time_arr.std()
        
        cost_str  = f"{cost_mean:.2f} ± {cost_std:.2f}"
        prec_str  = f"{prec_mean:.2f} ± {prec_std:.2f}"
        rec_str   = f"{rec_mean:.2f} ± {rec_std:.2f}"
        time_str  = f"{time_mean:.2f} ± {time_std:.2f}"
        
        results.append({
            "Method": method,
            "Precision (%)": prec_str,
            "Cost": cost_str,
            "Recall (%)": rec_str,
            "Treatment Time": time_str  # rename "Avg. Treat Time" -> "Treatment Time"
        })
    
    return pd.DataFrame(results)

###############################################################################
# 10. MAIN SCRIPT (Modified to run multiple seeds and report final table)
###############################################################################
def main():
    # 1) Read CSV once
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    
    # We will run 30 replications with different seeds (0..29)
    n_replications = 30
    final_tables = []
    
    for rep in range(n_replications):
        seed_val = rep
        print(f"\nRunning replication {rep+1}/{n_replications} with seed={seed_val}")
        
        # Run the entire Algorithm 5 pipeline
        final_table, df_cv_details = run_experiment_algorithm5(
            df_all=df_all,
            n_splits=4,
            seed=seed_val,
            n_spsa_iters=20
        )
        
        # Collect the final holdout results table
        final_tables.append(final_table)
    
    # Aggregate results across all replications
    df_agg = aggregate_results(final_tables)
    
    # Print the aggregated table
    print("\n=== FINAL RESULTS (Mean ± Std Dev over 30 Replications, Algorithm 5) ===")
    print(df_agg.to_string(index=False))


if __name__ == "__main__":
    main()



Running replication 1/30 with seed=0
G1: 150 patients | G2: 150 | G3: 150 | G4: 150
Best ML model on (G1->G2): RandomForest_{'max_depth': 5, 'n_estimators': 100}, AUC=0.8552
Best DP gamma on G3 = 0.99, cost=470.00


KeyboardInterrupt: 