In [1]:

import numpy as np
import pandas as pd

###############################################################################
# 1. GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
GAMMA   = 0.99
T_MAX   = 20  

###############################################################################
# 2. HELPER FUNCTIONS: splitting, simulation, etc.
###############################################################################
def make_folds(df, n_folds=5, seed=0):
    """
    Algorithm 1 requires dividing the dataset into n_folds folds.
    Here, we do n_folds TOTALLY, meaning the last fold is "test set"
    and the first (n_folds-1) folds do the cross-validation.
    Example: n_folds=5 => G1, G2, G3, G4, G5
        - We'll treat G1..G4 for the semi cross-val,
        - G5 is final test set.
    Adjust as you wish.
    """
    rng = np.random.RandomState(seed)
    
    # We'll shuffle patient IDs, then chunk them into n_folds groups
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    folds = []
    fold_size = int(np.ceil(len(unique_pts) / n_folds))
    
    start_idx = 0
    for k in range(n_folds):
        end_idx = min(start_idx + fold_size, len(unique_pts))
        fold_pids = unique_pts[start_idx:end_idx]
        folds.append(set(fold_pids))
        start_idx = end_idx
    
    # If n_folds is large or does not divide the data exactly, 
    # the last fold might be smaller or empty; adapt as needed.
    return folds

def filter_by_group(df, pid_set):
    return df[df['patient_id'].isin(pid_set)].copy()

def simulate_policy(df, policy_func):
    """
    Same as the previous examples:
      - policy_func(patient_rows) -> treat_time in [0..T_MAX-1] or None
    Returns cost, precision, recall, avg_treatment_time
    """
    results = []
    
    for pid, patient_rows in df.groupby('patient_id'):
        patient_rows = patient_rows.sort_values('time')
        
        label = patient_rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(patient_rows)
        
        if treat_time is None:
            # never treated
            treated_flag = 0
            if label == 1:
                cost = FN_COST
            else:
                cost = 0
            tp = 0
            fp = 0
            tt = None
        else:
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time
                tp = 1
                fp = 0
            else:
                cost = FP_COST
                tp = 0
                fp = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df = df_res[df_res['label'] == 1]
    total_sick = len(sick_df)
    if total_sick > 0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 3. BENCHMARK POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0, 1, 21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                if row['risk_score'] >= thr:
                    return int(row['time'])
            return None
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats
    
    return best_thr, best_stats

def make_constant_threshold_policy(thr):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            if row['risk_score'] >= thr:
                return int(row['time'])
        return None
    return policy_func

def dynamic_threshold_random_search(df,
                                    time_steps=20,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost = float('inf')
    best_stats = None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['risk_score'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec = thr_vec.copy()
            best_stats = stats
    
    return best_vec, best_stats

def make_dynamic_threshold_policy(thr_vec):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < len(thr_vec):
                if row['risk_score'] >= thr_vec[t]:
                    return t
        return None
    return policy_func

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 11)
    if B_candidates is None:
        B_candidates = np.linspace(0, 1, 11)
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A * t + B
                    thr = max(0, min(1, thr))
                    if row['risk_score'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A = A
                best_B = B
                best_stats = stats
    
    return (best_A, best_B), best_stats

def make_linear_threshold_policy(A, B):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = row['time']
            thr = A*t + B
            thr = max(0, min(1, thr))
            if row['risk_score'] >= thr:
                return int(t)
        return None
    return policy_func

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            final_row = patient_rows.loc[patient_rows['time'].idxmax()]
            if final_row['risk_score'] >= thr:
                return int(final_row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats
    
    return best_thr, best_stats

def make_wait_till_end_policy(thr):
    def policy_func(patient_rows):
        final_row = patient_rows.loc[patient_rows['time'].idxmax()]
        if final_row['risk_score'] >= thr:
            return int(final_row['time'])
        return None
    return policy_func

###############################################################################
# 4. DATA-DRIVEN DP (Unconstrained) Example
###############################################################################
def estimate_transition_and_sick_probs(df_train, T=20, n_buckets=5):
    """
    We'll do a simple approach:
      p_sick[t,b] = fraction of df_train rows that have (time=t, bucket=b) AND label=1
      p_trans[t,b,b'] = fraction of patients at (t,b) who go to (t+1,b') ...
    (But for the unconstrained scenario, we might also just do a simpler policy.)
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])
    
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows = grp.to_dict('records')
        
        for i in range(len(rows)):
            t  = int(rows[i]['time'])
            b  = int(rows[i]['risk_bucket'])
            lb = int(rows[i]['label'])  # 0 or 1
            if t < T:
                bucket_counts[t, b] += 1
                sick_counts[t, b]   += lb
            
            if i < len(rows) - 1:
                t_next = int(rows[i+1]['time'])
                b_next = int(rows[i+1]['risk_bucket'])
                if t_next == t+1 and t < T-1:
                    transition_counts[t, b, b_next] += 1.0

    # Prob of transitions
    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                p_trans[t_, b_, b_] = 1.0  # default identity

    # Probability of sick in (t,b)
    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    return p_trans, p_sick

def train_data_driven_dp_unconstrained(p_trans, p_sick, 
                                       FP=10, FN=50, D=1, gamma=0.99, T=20):
    """
    Similar to the code shown previously for an *expected-cost DP* 
    but focusing on "treat vs. wait" in state (t,bucket).
    We do a single chain for all patients => only p_sick used.
    """
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    # boundary at t=T
    for b in range(n_buckets):
        cost_treat  = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        cost_notreat= p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    for t in reversed(range(T)):
        for b in range(n_buckets):
            cost_treat = p_sick[t,b]*(D*t) + (1-p_sick[t,b])*FP
            # cost_wait
            if t == T-1:
                # next step is T => no transitions => V[T,b]
                exp_future = V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next]*V[t+1,b_next]
            cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b] = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b] = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_data_driven_dp_policy_unconstrained(V, pi_, T=20):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            b = int(row['risk_bucket'])
            if t < T:
                if pi_[t,b] == 1:
                    return t
        return None
    return policy_func


###############################################################################
# 5. ALGORITHM 1: SEMI CROSS-VALIDATION
###############################################################################
def semi_crossval_unconstrained(df_all, n_folds=5, seed=0):
    r"""
    This implements the "Algorithm 1" idea for the unconstrained scenario.

    Steps:
     1) Partition df_all into n_folds folds: G1..G_{n_folds}.
     2) Let G_{n_folds} = final test set.
     3) For each j in [0..(n_folds-2)] (the "CV folds"):
         - Validation set = G_j
         - Training set = union of G_k for k != j
           => pick best hyperparams for benchmark (like constant threshold, etc.)
           => pick best hyperparams for DP
       We store the best hyperparams from each fold j.
     4) Combine or choose a final hyperparam set from these folds.
     5) Evaluate on the final fold G_{n_folds}.
    """
    # 1) Create folds
    folds = make_folds(df_all, n_folds=n_folds, seed=seed)
    # folds[0], folds[1], ..., folds[n_folds-1]
    
    # We'll treat folds[n_folds-1] as G_{n_folds} (the final test).
    test_fold_pid = folds[-1]
    
    # The first (n_folds-1) folds are used for the cross-validation.
    cv_folds = folds[:-1]
    
    # We'll collect best hyperparams for each fold j in [0..n_folds-2].
    best_thr_const_list = []
    best_dyn_vec_list   = []
    best_linAB_list     = []
    best_thr_wait_list  = []
   
    
    for j in range(len(cv_folds)):
        # Validation fold = j
        val_pid = cv_folds[j]
        # Training = union of all other folds except j
        train_pid = set()
        for k in range(len(cv_folds)):
            if k != j:
                train_pid = train_pid.union(cv_folds[k])
        
        df_train = filter_by_group(df_all, train_pid)
        df_val   = filter_by_group(df_all, val_pid)
        
        # 1) Constant threshold
        thr_c, _ = constant_threshold_search(df_train)
        
        best_thr_const_list.append(thr_c)
        
        # 2) Dynamic threshold
        thr_vec, _ = dynamic_threshold_random_search(
            df_train, 
            time_steps=T_MAX,
            threshold_candidates=[0,0.2,0.4,0.6,0.8,1.0],
            n_samples=200,
            seed=j
        )
        best_dyn_vec_list.append(thr_vec)
        
        # 3) Linear threshold
        (A,B), _ = linear_threshold_search(df_train)
        best_linAB_list.append((A,B))
        
        # 4) Wait till end
        thr_wte, _ = wait_till_end_search(df_train)
        best_thr_wait_list.append(thr_wte)
        
    
    
    # constant threshold average:
    thr_const_final = np.mean(best_thr_const_list)
    
    # dynamic threshold => pick the "middle" fold's threshold
    mid_idx = len(best_dyn_vec_list)//2
    thr_dyn_final = best_dyn_vec_list[mid_idx]
    
    # linear threshold => average (A, B)
    A_ave = np.mean([ab[0] for ab in best_linAB_list])
    B_ave = np.mean([ab[1] for ab in best_linAB_list])
    
    thr_wait_final = np.mean(best_thr_wait_list)
    
    train_pid_all = set()
    for j in range(len(cv_folds)):
        train_pid_all = train_pid_all.union(cv_folds[j])
    df_train_cv = filter_by_group(df_all, train_pid_all)
    
    p_trans, p_sick = estimate_transition_and_sick_probs(
        df_train_cv, T=T_MAX, n_buckets=5
    )
    V, pi_ = train_data_driven_dp_unconstrained(
        p_trans, p_sick, FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX
    )
    dp_policy_func = make_data_driven_dp_policy_unconstrained(V, pi_, T=T_MAX)
    
    # Now evaluate everything on final test fold folds[-1].
    df_test = filter_by_group(df_all, test_fold_pid)
    
    # Build final policies
    const_policy = make_constant_threshold_policy(thr_const_final)
    dyn_policy   = make_dynamic_threshold_policy(thr_dyn_final)
    lin_policy   = make_linear_threshold_policy(A_ave, B_ave)
    wte_policy   = make_wait_till_end_policy(thr_wait_final)
    
    stats_const_test = simulate_policy(df_test, const_policy)
    stats_dyn_test   = simulate_policy(df_test, dyn_policy)
    stats_lin_test   = simulate_policy(df_test, lin_policy)
    stats_wte_test   = simulate_policy(df_test, wte_policy)
    stats_dp_test    = simulate_policy(df_test, dp_policy_func)
    
    # Summarize
    table = pd.DataFrame({
        'Method': [
            'Constant Threshold',
            'Dynamic Threshold-R',
            'Linear Threshold',
            'Wait Till End',
            'Dynamic Threshold-DP (DataDriven)'
        ],
        'Precision (%)': [
            100*stats_const_test['precision'],
            100*stats_dyn_test['precision'],
            100*stats_lin_test['precision'],
            100*stats_wte_test['precision'],
            100*stats_dp_test['precision'],
        ],
        'Cost': [
            stats_const_test['cost'],
            stats_dyn_test['cost'],
            stats_lin_test['cost'],
            stats_wte_test['cost'],
            stats_dp_test['cost'],
        ],
        'Recall (%)': [
            100*stats_const_test['recall'],
            100*stats_dyn_test['recall'],
            100*stats_lin_test['recall'],
            100*stats_wte_test['recall'],
            100*stats_dp_test['recall'],
        ],
        'Treatment Time': [
            stats_const_test['avg_treatment_time'],
            stats_dyn_test['avg_treatment_time'],
            stats_lin_test['avg_treatment_time'],
            stats_wte_test['avg_treatment_time'],
            stats_dp_test['avg_treatment_time'],
        ]
    })
    return table

###############################################################################
# 6. MAIN: read CSV, run Algorithm 1
###############################################################################
def main():
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    df_all = df_all[df_all['time'] < T_MAX].copy()
    
    n_folds = 5
    
    result_table = semi_crossval_unconstrained(df_all, n_folds=n_folds, seed=42)
    
    print("\n=== Algorithm 1 (Semi Cross‐Validation) Results ===")
    print(result_table)

if __name__ == "__main__":
    main()


=== Algorithm 1 (Semi Cross‐Validation) Results ===
                              Method  Precision (%)  Cost  Recall (%)  \
0                 Constant Threshold      76.923077   276   95.238095   
1                Dynamic Threshold-R      17.500000   990  100.000000   
2                   Linear Threshold     100.000000   204  100.000000   
3                      Wait Till End     100.000000   399  100.000000   
4  Dynamic Threshold-DP (DataDriven)     100.000000   141  100.000000   

   Treatment Time  
0        6.384615  
1        0.000000  
2        9.714286  
3       19.000000  
4        6.714286  


In [3]:
import numpy as np
import pandas as pd

###############################################################################
# 1. GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
GAMMA   = 0.99
T_MAX   = 20

###############################################################################
# 2. HELPER FUNCTIONS: splitting, simulation, etc.
###############################################################################
def make_folds(df, n_folds=5, seed=0):
    """
    Algorithm 1 requires dividing the dataset into n_folds folds.
    Here, we do n_folds TOTALLY, meaning the last fold is "test set"
    and the first (n_folds-1) folds do the cross-validation.
    Example: n_folds=5 => G1, G2, G3, G4, G5
        - We'll treat G1..G4 for the semi cross-val,
        - G5 is final test set.
    Adjust as you wish.
    """
    rng = np.random.RandomState(seed)

    # We'll shuffle patient IDs, then chunk them into n_folds groups
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)

    folds = []
    fold_size = int(np.ceil(len(unique_pts) / n_folds))

    start_idx = 0
    for k in range(n_folds):
        end_idx = min(start_idx + fold_size, len(unique_pts))
        fold_pids = unique_pts[start_idx:end_idx]
        folds.append(set(fold_pids))
        start_idx = end_idx

    # If n_folds is large or does not divide the data exactly,
    # the last fold might be smaller or empty; adapt as needed.
    return folds

def filter_by_group(df, pid_set):
    return df[df['patient_id'].isin(pid_set)].copy()

def simulate_policy(df, policy_func):
    """
    Same as the previous examples:
      - policy_func(patient_rows) -> treat_time in [0..T_MAX-1] or None
    Returns cost, precision, recall, avg_treatment_time
    """
    results = []

    for pid, patient_rows in df.groupby('patient_id'):
        patient_rows = patient_rows.sort_values('time')

        label = patient_rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(patient_rows)

        if treat_time is None:
            # never treated
            treated_flag = 0
            if label == 1:
                cost = FN_COST
            else:
                cost = 0
            tp = 0
            fp = 0
            tt = None
        else:
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time
                tp = 1
                fp = 0
            else:
                cost = FP_COST
                tp = 0
                fp = 1
            tt = treat_time

        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })

    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()

    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0

    sick_df = df_res[df_res['label'] == 1]
    total_sick = len(sick_df)
    if total_sick > 0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0

    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0

    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 3. BENCHMARK POLICIES
###############################################################################
def constant_threshold_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0, 1, 21)
    best_thr, best_cost, best_stats = None, float('inf'), None

    for thr in thresholds:
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                if row['risk_score'] >= thr:
                    return int(row['time'])
            return None
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats

    return best_thr, best_stats

def make_constant_threshold_policy(thr):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            if row['risk_score'] >= thr:
                return int(row['time'])
        return None
    return policy_func

def dynamic_threshold_random_search(df,
                                    time_steps=20,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=200,
                                    seed=0):
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost = float('inf')
    best_stats = None

    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)

        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['risk_score'] >= thr_vec[t]:
                    return t
            return None

        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec = thr_vec.copy()
            best_stats = stats

    return best_vec, best_stats

def make_dynamic_threshold_policy(thr_vec):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < len(thr_vec):
                if row['risk_score'] >= thr_vec[t]:
                    return t
        return None
    return policy_func

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 11)
    if B_candidates is None:
        B_candidates = np.linspace(0, 1, 11)
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None

    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A * t + B
                    thr = max(0, min(1, thr))
                    if row['risk_score'] >= thr:
                        return int(t)
                return None

            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A = A
                best_B = B
                best_stats = stats

    return (best_A, best_B), best_stats

def make_linear_threshold_policy(A, B):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = row['time']
            thr = A*t + B
            thr = max(0, min(1, thr))
            if row['risk_score'] >= thr:
                return int(t)
        return None
    return policy_func

def wait_till_end_search(df, thresholds=None):
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost, best_stats = None, float('inf'), None

    for thr in thresholds:
        def policy_func(patient_rows):
            final_row = patient_rows.loc[patient_rows['time'].idxmax()]
            if final_row['risk_score'] >= thr:
                return int(final_row['time'])
            return None

        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats

    return best_thr, best_stats

def make_wait_till_end_policy(thr):
    def policy_func(patient_rows):
        final_row = patient_rows.loc[patient_rows['time'].idxmax()]
        if final_row['risk_score'] >= thr:
            return int(final_row['time'])
        return None
    return policy_func

###############################################################################
# 4. DATA-DRIVEN DP (Unconstrained) Example
###############################################################################
def estimate_transition_and_sick_probs(df_train, T=20, n_buckets=5):
    """
    We'll do a simple approach:
      p_sick[t,b] = fraction of df_train rows that have (time=t, bucket=b) AND label=1
      p_trans[t,b,b'] = fraction of patients at (t,b) who go to (t+1,b') ...
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])

    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows = grp.to_dict('records')

        for i in range(len(rows)):
            t  = int(rows[i]['time'])
            b  = int(rows[i]['risk_bucket'])
            lb = int(rows[i]['label'])  # 0 or 1
            if t < T:
                bucket_counts[t, b] += 1
                sick_counts[t, b]   += lb

            if i < len(rows) - 1:
                t_next = int(rows[i+1]['time'])
                b_next = int(rows[i+1]['risk_bucket'])
                if t_next == t+1 and t < T-1:
                    transition_counts[t, b, b_next] += 1.0

    # Prob of transitions
    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                p_trans[t_, b_, b_] = 1.0  # default identity

    # Probability of sick in (t,b)
    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    return p_trans, p_sick

def train_data_driven_dp_unconstrained(p_trans, p_sick,
                                       FP=10, FN=50, D=1, gamma=0.99, T=20):
    """
    DP for unconstrained problem.
    """
    n_buckets = p_sick.shape[1]
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)

    # boundary at t=T
    for b in range(n_buckets):
        cost_treat  = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        cost_notreat= p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)

    for t in reversed(range(T)):
        for b in range(n_buckets):
            cost_treat = p_sick[t,b]*(D*t) + (1-p_sick[t,b])*FP
            # cost_wait
            if t == T-1:
                exp_future = V[T,b]
            else:
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next]*V[t+1,b_next]
            cost_wait = gamma * exp_future

            if cost_treat <= cost_wait:
                V[t,b] = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b] = cost_wait
                pi_[t,b] = 0
    return V, pi_

def make_data_driven_dp_policy_unconstrained(V, pi_, T=20):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            b = int(row['risk_bucket'])
            if t < T:
                if pi_[t,b] == 1:
                    return t
        return None
    return policy_func


###############################################################################
# 5. ALGORITHM 1: SEMI CROSS-VALIDATION
###############################################################################
def semi_crossval_unconstrained(df_all, n_folds=5, seed=0):
    r"""
    This implements the "Algorithm 1" idea for the unconstrained scenario.

    Steps:
     1) Partition df_all into n_folds folds: G1..G_{n_folds}.
     2) Let G_{n_folds} = final test set.
     3) For each j in [0..(n_folds-2)] (the "CV folds"):
         - Validation set = G_j
         - Training set = union of G_k for k != j
           => pick best hyperparams for benchmark (like constant threshold, etc.)
           => pick best hyperparams for DP
       We store the best hyperparams from each fold j.
     4) Combine or choose a final hyperparam set from these folds.
     5) Evaluate on the final fold G_{n_folds}.
    """
    # 1) Create folds
    folds = make_folds(df_all, n_folds=n_folds, seed=seed)
    # folds[0], folds[1], ..., folds[n_folds-1]

    # We'll treat folds[n_folds-1] as G_{n_folds} (the final test).
    test_fold_pid = folds[-1]

    # The first (n_folds-1) folds are used for the cross-validation.
    cv_folds = folds[:-1]

    # We'll collect best hyperparams for each fold j in [0..n_folds-2].
    best_thr_const_list = []
    best_dyn_vec_list   = []
    best_linAB_list     = []
    best_thr_wait_list  = []


    for j in range(len(cv_folds)):
        # Validation fold = j
        val_pid = cv_folds[j]
        # Training = union of all other folds except j
        train_pid = set()
        for k in range(len(cv_folds)):
            if k != j:
                train_pid = train_pid.union(cv_folds[k])

        df_train = filter_by_group(df_all, train_pid)
        df_val   = filter_by_group(df_all, val_pid)

        # 1) Constant threshold
        thr_c, _ = constant_threshold_search(df_train)

        best_thr_const_list.append(thr_c)

        # 2) Dynamic threshold
        thr_vec, _ = dynamic_threshold_random_search(
            df_train,
            time_steps=T_MAX,
            threshold_candidates=[0,0.2,0.4,0.6,0.8,1.0],
            n_samples=200,
            seed=j
        )
        best_dyn_vec_list.append(thr_vec)

        # 3) Linear threshold
        (A,B), _ = linear_threshold_search(df_train)
        best_linAB_list.append((A,B))

        # 4) Wait till end
        thr_wte, _ = wait_till_end_search(df_train)
        best_thr_wait_list.append(thr_wte)

        # DP - if we had DP hyperparam search, we would do it here.

    # Now we have "best" hyperparams from each fold.
    # For simplicity, let's just pick some aggregator: average or median, etc.

    # constant threshold average:
    thr_const_final = np.mean(best_thr_const_list)

    # dynamic threshold => pick the "middle" fold's threshold
    mid_idx = len(best_dyn_vec_list)//2
    thr_dyn_final = best_dyn_vec_list[mid_idx]

    # linear threshold => average (A, B)
    A_ave = np.mean([ab[0] for ab in best_linAB_list])
    B_ave = np.mean([ab[1] for ab in best_linAB_list])

    # wait => average
    thr_wait_final = np.mean(best_thr_wait_list)

    # Next, build the data-driven DP with all CV folds combined (except test).
    train_pid_all = set()
    for j in range(len(cv_folds)):
        train_pid_all = train_pid_all.union(cv_folds[j])
    df_train_cv = filter_by_group(df_all, train_pid_all)

    p_trans, p_sick = estimate_transition_and_sick_probs(
        df_train_cv, T=T_MAX, n_buckets=5
    )
    V, pi_ = train_data_driven_dp_unconstrained(
        p_trans, p_sick, FP=FP_COST, FN=FN_COST, D=D_COST, gamma=GAMMA, T=T_MAX
    )
    dp_policy_func = make_data_driven_dp_policy_unconstrained(V, pi_, T=T_MAX)

    # Now evaluate everything on final test fold folds[-1].
    df_test = filter_by_group(df_all, test_fold_pid)

    # Build final policies
    const_policy = make_constant_threshold_policy(thr_const_final)
    dyn_policy   = make_dynamic_threshold_policy(thr_dyn_final)
    lin_policy   = make_linear_threshold_policy(A_ave, B_ave)
    wte_policy   = make_wait_till_end_policy(thr_wait_final)

    stats_const_test = simulate_policy(df_test, const_policy)
    stats_dyn_test   = simulate_policy(df_test, dyn_policy)
    stats_lin_test   = simulate_policy(df_test, lin_policy)
    stats_wte_test   = simulate_policy(df_test, wte_policy)
    stats_dp_test    = simulate_policy(df_test, dp_policy_func)

    # Summarize
    test_stats_results = { # Changed to return a dictionary for easier processing later
        'Constant Threshold': stats_const_test,
        'Dynamic Threshold-R': stats_dyn_test,
        'Linear Threshold': stats_lin_test,
        'Wait Till End': stats_wte_test,
        'Dynamic Threshold-DP': stats_dp_test
    }
    return test_stats_results # Return the dictionary


###############################################################################
# 6. MAIN: read CSV, run Algorithm 1 and repeat 30 times
###############################################################################
def main():
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    df_all = df_all[df_all['time'] < T_MAX].copy()

    num_replications = 30
    all_replication_results = []

    for rep_idx in range(num_replications):
        print(f"Running Replication {rep_idx+1}/{num_replications}...")
        replication_stats = semi_crossval_unconstrained(df_all, n_folds=5, seed=rep_idx) # Run Algorithm 1 with different seed
        all_replication_results.append(replication_stats) # Store results of each replication

    # Process and print final results (mean and std dev)
    method_names = list(all_replication_results[0].keys())
    final_summary_data = []

    for method_name in method_names:
        precision_values = [res[method_name]['precision'] for res in all_replication_results]
        cost_values      = [res[method_name]['cost']      for res in all_replication_results]
        recall_values    = [res[method_name]['recall']    for res in all_replication_results]
        tt_values      = [res[method_name]['avg_treatment_time'] for res in all_replication_results]

        final_summary_data.append({
            'Method': method_name,
            'Precision (%)': f"{np.mean(precision_values)*100:.2f} ± {np.std(precision_values)*100:.2f}",
            'Cost':          f"{np.mean(cost_values):.2f} ± {np.std(cost_values):.2f}",
            'Recall (%)':    f"{np.mean(recall_values)*100:.2f} ± {np.std(recall_values):.2f}",
            'Treatment Time':f"{np.mean(tt_values):.2f} ± {np.std(tt_values):.2f}",
        })

    result_table_final = pd.DataFrame(final_summary_data)

    print("\n=== Algorithm 1 (Semi Cross-Validation) Results (Mean ± Std Dev over 30 Replications) ===")
    print(result_table_final)


if __name__ == "__main__":
    main()

Running Replication 1/30...
Running Replication 2/30...
Running Replication 3/30...
Running Replication 4/30...
Running Replication 5/30...
Running Replication 6/30...
Running Replication 7/30...
Running Replication 8/30...
Running Replication 9/30...
Running Replication 10/30...
Running Replication 11/30...
Running Replication 12/30...
Running Replication 13/30...
Running Replication 14/30...
Running Replication 15/30...
Running Replication 16/30...
Running Replication 17/30...
Running Replication 18/30...
Running Replication 19/30...
Running Replication 20/30...
Running Replication 21/30...
Running Replication 22/30...
Running Replication 23/30...
Running Replication 24/30...
Running Replication 25/30...
Running Replication 26/30...
Running Replication 27/30...
Running Replication 28/30...
Running Replication 29/30...
Running Replication 30/30...

=== Algorithm 1 (Semi Cross-Validation) Results (Mean ± Std Dev over 30 Replications) ===
                 Method  Precision (%)          

In [19]:
"""
SEMI CROSS-VALIDATION (ALGORITHM 1) FOR UNCONSTRAINED HEMORRHAGE DIAGNOSIS & TREATMENT

Requirements:
  pip install numpy pandas scikit-learn catboost

Data assumptions:
  - CSV file has columns:
      patient_id, time, risk_bucket, risk_score, EIT, NIRS, EIS, label
    where:
      - 'patient_id' identifies each synthetic patient
      - 'time' is an integer time step (0..T_max)
      - 'risk_bucket' is an integer bucket (0..4 or 1..5)
      - 'EIT', 'NIRS', 'EIS' are numeric features
      - 'label' is 0=healthy, 1=sick
"""

import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

# Sklearn models, metrics, etc.
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import ParameterGrid
# CatBoost
from catboost import CatBoostClassifier

###############################################################################
# 1. GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
GAMMA   = 0.99
T_MAX   = 21   # maximum discrete time steps (0..T_MAX-1)

# For demonstration, we'll search a small set of hyperparameters for each ML model.
# You can expand these as needed.
RF_PARAM_GRID = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5]
}
GB_PARAM_GRID = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1],
    'max_depth': [3, 5]
}
CATBOOST_PARAM_GRID = {
    'iterations': [50, 100],
    'learning_rate': [0.05, 0.1],
    'depth': [3, 5]
}

###############################################################################
# 2. HELPER FUNCTIONS: splitting, ML training, DP, etc.
###############################################################################
def make_folds(df, n_folds=5, seed=0):
    """
    Semi Cross-Validation approach:
      - We'll produce n_folds separate sets: G1, G2, ..., G_{n_folds}.
      - We'll treat the last fold G_{n_folds} as the final holdout test.
      - The first (n_folds - 1) folds are used in the "semi cross-val" loops.
    """
    rng = np.random.RandomState(seed)
    
    # We'll shuffle patient IDs, then chunk them into n_folds groups
    unique_pts = df['patient_id'].unique()
    rng.shuffle(unique_pts)
    
    folds = []
    fold_size = int(np.ceil(len(unique_pts) / n_folds))
    
    start_idx = 0
    for k in range(n_folds):
        end_idx = min(start_idx + fold_size, len(unique_pts))
        fold_pids = unique_pts[start_idx:end_idx]
        folds.append(set(fold_pids))
        start_idx = end_idx
    
    return folds

def filter_by_group(df, pid_set):
    """Returns the subset of df whose patient_id is in pid_set."""
    return df[df['patient_id'].isin(pid_set)].copy()

def compute_auc_score(y_true, y_prob):
    """
    Safe AUC computation. If all y_true are the same class,
    AUC is not well-defined, so we'll return 0.5 by default.
    """
    if len(np.unique(y_true)) < 2:
        return 0.5
    return roc_auc_score(y_true, y_prob)

def train_and_select_best_model(X_train, y_train, X_val, y_val):
    """
    Trains multiple models (RandomForest, GradientBoosting, CatBoost)
    over small hyperparameter grids, picks the best by AUC on (X_val,y_val).
    
    Returns:
        best_model   (fitted model with best AUC)
        best_auc     (float)
        best_model_name (str, e.g. "RandomForest")
    """
    best_auc = -1.0
    best_model = None
    best_name  = None
    
    # 1) RandomForest
    for params in ParameterGrid(RF_PARAM_GRID):
        rf = RandomForestClassifier(random_state=0, **params)
        rf.fit(X_train, y_train)
        val_prob = rf.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = rf
            best_name  = f"RandomForest_{params}"
    
    # 2) GradientBoosting
    for params in ParameterGrid(GB_PARAM_GRID):
        gb = GradientBoostingClassifier(random_state=0, **params)
        gb.fit(X_train, y_train)
        val_prob = gb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = gb
            best_name  = f"GradientBoosting_{params}"
    
    # 3) CatBoost
    for params in ParameterGrid(CATBOOST_PARAM_GRID):
        # silent mode
        cb = CatBoostClassifier(verbose=0, random_state=0, **params)
        cb.fit(X_train, y_train, eval_set=(X_val,y_val), verbose=0)
        val_prob = cb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = cb
            best_name  = f"CatBoost_{params}"
    
    return best_model, best_auc, best_name


###############################################################################
# 3. POLICY SIMULATION (Unconstrained) 
###############################################################################
def simulate_policy(df, policy_func):
    """
    Evaluate total cost, precision, recall, avg_treatment_time under a 
    given policy_func. The policy_func is a function taking 
       policy_func(subDF_of_single_patient) -> treat_time or None
    """
    results = []
    
    for pid, patient_rows in df.groupby('patient_id'):
        patient_rows = patient_rows.sort_values('time')
        
        label = patient_rows['label'].iloc[0]  # 0 or 1
        treat_time = policy_func(patient_rows)
        
        if treat_time is None:
            # never treated
            if label == 1:
                cost = FN_COST
                tp   = 0
            else:
                cost = 0
                tp   = 0
            fp = 0
            treated_flag = 0
            tt = None
        else:
            treated_flag = 1
            if label == 1:
                cost = D_COST * treat_time  # delay cost
                tp = 1
                fp = 0
            else:
                cost = FP_COST
                tp = 0
                fp = 1
            tt = treat_time
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': tt,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df = df_res[df_res['label'] == 1]
    total_sick = len(sick_df)
    if total_sick > 0:
        recall = tp_sum / total_sick
    else:
        recall = 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) > 0 else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 4. BENCHMARK POLICIES (Threshold-based)
###############################################################################
def constant_threshold_search(df, thresholds=None):
    """Grid search over a set of constant thresholds."""
    if thresholds is None:
        thresholds = np.linspace(0, 1, 21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                if row['risk_score'] >= thr:
                    return int(row['time'])
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats
    
    return best_thr, best_stats

def make_constant_threshold_policy(thr):
    """Creates a policy that treats the patient at the first time whose risk_score >= thr."""
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            if row['risk_score'] >= thr:
                return int(row['time'])
        return None
    return policy_func

def dynamic_threshold_random_search(df,
                                    time_steps=20,
                                    threshold_candidates=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
                                    n_samples=200,
                                    seed=0):
    """
    Randomly sample vectors of length time_steps from threshold_candidates,
    pick the best by cost on df.
    """
    rng = np.random.RandomState(seed)
    best_vec = None
    best_cost = float('inf')
    best_stats = None
    
    for _ in range(n_samples):
        thr_vec = rng.choice(threshold_candidates, size=time_steps)
        
        def policy_func(patient_rows):
            for _, row in patient_rows.iterrows():
                t = int(row['time'])
                if t < time_steps and row['risk_score'] >= thr_vec[t]:
                    return t
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_vec = thr_vec.copy()
            best_stats = stats
    
    return best_vec, best_stats

def make_dynamic_threshold_policy(thr_vec):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            if t < len(thr_vec):
                if row['risk_score'] >= thr_vec[t]:
                    return t
        return None
    return policy_func

def linear_threshold_search(df, A_candidates=None, B_candidates=None):
    """
    threshold(t) = clamp( A*t + B, [0,1] )
    do grid search
    """
    if A_candidates is None:
        A_candidates = np.linspace(-0.05, 0.05, 11)
    if B_candidates is None:
        B_candidates = np.linspace(0, 1, 11)
    
    best_A, best_B = None, None
    best_cost, best_stats = float('inf'), None
    
    for A in A_candidates:
        for B in B_candidates:
            def policy_func(patient_rows):
                for _, row in patient_rows.iterrows():
                    t = row['time']
                    thr = A*t + B
                    thr = max(0, min(1, thr))
                    if row['risk_score'] >= thr:
                        return int(t)
                return None
            
            stats = simulate_policy(df, policy_func)
            if stats['cost'] < best_cost:
                best_cost = stats['cost']
                best_A = A
                best_B = B
                best_stats = stats
    
    return (best_A, best_B), best_stats

def make_linear_threshold_policy(A, B):
    def policy_func(patient_rows):
        for _, row in patient_rows.iterrows():
            t = row['time']
            thr = A*t + B
            thr = max(0, min(1, thr))
            if row['risk_score'] >= thr:
                return int(t)
        return None
    return policy_func

def wait_till_end_search(df, thresholds=None):
    """
    Evaluate policy: treat only at final time if risk_score >= thr.
    """
    if thresholds is None:
        thresholds = np.linspace(0,1,21)
    best_thr, best_cost, best_stats = None, float('inf'), None
    
    for thr in thresholds:
        def policy_func(patient_rows):
            # final row:
            final_t = patient_rows['time'].max()
            final_row = patient_rows[patient_rows['time']==final_t].iloc[0]
            if final_row['risk_score'] >= thr:
                return int(final_t)
            return None
        
        stats = simulate_policy(df, policy_func)
        if stats['cost'] < best_cost:
            best_cost = stats['cost']
            best_thr = thr
            best_stats = stats
    
    return best_thr, best_stats

def make_wait_till_end_policy(thr):
    def policy_func(patient_rows):
        final_t = patient_rows['time'].max()
        final_row = patient_rows[patient_rows['time']==final_t].iloc[0]
        if final_row['risk_score'] >= thr:
            return int(final_t)
        return None
    return policy_func

###############################################################################
# 5. DATA-DRIVEN DP (Unconstrained)
###############################################################################
def estimate_transition_and_sick_probs(df_train, T=20, n_buckets=5):
    """
    We'll estimate:
      p_trans[t, b, b_next]: Probability that a patient in risk-bucket b at time t
                             transitions to bucket b_next at time t+1
      p_sick[t, b]: Probability that a patient is sick given that they are in bucket b at time t.
    This is a naive aggregator (assuming Markov wrt bucket).
    """
    transition_counts = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    bucket_counts     = np.zeros((T, n_buckets), dtype=float)
    sick_counts       = np.zeros((T, n_buckets), dtype=float)

    df_sorted = df_train.sort_values(['patient_id','time'])
    
    for pid, grp in df_sorted.groupby('patient_id'):
        grp = grp.sort_values('time')
        rows = grp.to_dict('records')
        
        for i in range(len(rows)):
            t  = int(rows[i]['time'])
            b  = int(rows[i]['risk_bucket'])
            lb = int(rows[i]['label'])  # 0 or 1
            if t < T:
                bucket_counts[t, b] += 1
                sick_counts[t, b]   += lb
            
            if i < len(rows) - 1:
                # consider the next row if it's exactly t+1
                t_next = int(rows[i+1]['time'])
                b_next = int(rows[i+1]['risk_bucket'])
                if (t_next == t+1) and (t < T-1):
                    transition_counts[t, b, b_next] += 1.0

    # p_trans
    n_buckets = bucket_counts.shape[1]
    p_trans = np.zeros((T-1, n_buckets, n_buckets), dtype=float)
    for t_ in range(T-1):
        for b_ in range(n_buckets):
            denom = transition_counts[t_, b_, :].sum()
            if denom > 0:
                p_trans[t_, b_, :] = transition_counts[t_, b_, :] / denom
            else:
                # if no data, fallback to identity
                p_trans[t_, b_, b_] = 1.0
    
    # p_sick
    p_sick = np.zeros((T, n_buckets), dtype=float)
    for t_ in range(T):
        for b_ in range(n_buckets):
            denom = bucket_counts[t_, b_]
            if denom > 0:
                p_sick[t_, b_] = sick_counts[t_, b_] / denom
            else:
                p_sick[t_, b_] = 0.0
    
    return p_trans, p_sick

def train_data_driven_dp_unconstrained(p_trans, p_sick, 
                                       FP=10, FN=50, D=1, gamma=0.99, T=20):
    """
    We define states as (t, bucket), and actions: 0=wait, 1=treat now.
    We'll do a simple backward recursion:
       V[t,b] = min( cost_of_treat_now, cost_of_wait )
    cost_of_treat_now = p_sick[t,b]* (D*t) + (1-p_sick[t,b])* FP
    cost_of_wait      = gamma * E_{b_next}[ V[t+1, b_next] ]
    At t=T, we define cost if not treated:
       => p_sick[T-1,b]*FN  vs. cost_of_treat_now at T-1
    We'll store the policy in pi_[t,b].
    """
    n_buckets = p_sick.shape[1]
    # Note: We'll define V[t,b] for t in [0..T], b in [0..n_buckets-1].
    # But we actually only have transitions up to T-1 in p_trans.
    V = np.zeros((T+1, n_buckets))
    pi_ = np.zeros((T, n_buckets), dtype=int)
    
    # boundary at t=T: if we haven't treated yet, the cost is:
    # min( treat at T, not treat at all ).
    # But let's define it simply as "if not treat => FN" or "if treat => cost_treatNow".
    for b in range(n_buckets):
        # "treat now at time T" => D*T?? but actually t goes up to T-1. 
        # We'll define an effective "t = T" as if it's the final step.
        # so cost_treat = p_sick[T-1,b]*(D*(T-1)) + (1-p_sick[T-1,b])*FP
        # cost_notreat  = p_sick[T-1,b]*FN
        # We'll just do that here:
        cost_treat  = p_sick[T-1,b]*(D*(T-1)) + (1 - p_sick[T-1,b])*FP
        cost_notreat= p_sick[T-1,b]*FN
        V[T,b] = min(cost_treat, cost_notreat)
    
    # now go backward:
    for t in reversed(range(T)):
        for b in range(n_buckets):
            # cost if treat now
            cost_treat = p_sick[t,b]*(D*t) + (1 - p_sick[t,b])*FP
            
            # cost if wait
            if t == T-1:
                # if wait at T-1, next is T => no transitions => V[T,b]
                cost_wait = gamma * V[T,b]
            else:
                # compute expected cost from next state
                exp_future = 0.0
                for b_next in range(n_buckets):
                    exp_future += p_trans[t,b,b_next]*V[t+1,b_next]
                cost_wait = gamma * exp_future
            
            if cost_treat <= cost_wait:
                V[t,b] = cost_treat
                pi_[t,b] = 1
            else:
                V[t,b] = cost_wait
                pi_[t,b] = 0
    
    return V, pi_

def make_data_driven_dp_policy_unconstrained(V, pi_, T=20):
    """
    Creates a function that iterates over time steps of a patient.
    As soon as DP says "treat" at (t,b), we do so and stop.
    """
    def policy_func(patient_rows):
        # naive approach: read each row in chronological order
        for _, row in patient_rows.iterrows():
            t = int(row['time'])
            b = int(row['risk_bucket'])
            if t < T:
                if pi_[t,b] == 1:
                    return t
        # if we never treat => None
        return None
    return policy_func

###############################################################################
# 6. ALGORITHM 1: SEMI CROSS-VALIDATION (Unconstrained)
###############################################################################
def semi_crossval_unconstrained(df_all, n_folds=5, seed=0):
    """
    Implements the "semi cross-validation" approach for ML + DP 
    in the unconstrained scenario (Algorithm 1).
    
    Steps (schematic):
      1) Create n_folds. 
         Let G_{n_folds} be final holdout. G_1..G_{n_folds-1} for "semi-CV".
      2) For j in [1..(n_folds-1)]:
          - Validation fold = G_j
          - Training fold = union of G_k for k != j
          - Among that "training fold," we do an (n_folds-2)-fold approach 
            to select best ML hyperparams (AUC).
            (In a simpler "semi" approach, we might skip an inner fold and just train ML on train set.)
          - Evaluate DP hyperparams on G_j, store best result.
      3) Aggregate or pick final hyperparams from these folds.
      4) Retrain ML + DP on union G_1..G_{n_folds-1}, evaluate on G_{n_folds}.

    For brevity, we do a simpler version:
      - For each j in 0..(n_folds-2):
         * Train ML on (all except G_j),
         * Evaluate best threshold or best DP on G_j
      - Then average or pick the median. 
      - Finally, evaluate on G_{n_folds-1} as holdout.
    """
    folds = make_folds(df_all, n_folds=n_folds, seed=seed)
    # final test:
    test_fold_pid = folds[-1]
    # the first n_folds-1 are the "CV folds"
    cv_folds = folds[:-1]
    
    # We'll store each fold's best ML model among {RF,GB,CatBoost}
    # Then we'll store the best threshold or DP approach.
    # In a "true" Algorithm 1, you'd do an "inner loop" for each fold as well, 
    # but we’ll keep it simpler for demonstration.

    # Lists to store benchmark hyperparams found in each fold j
    best_thr_const_list = []
    best_dyn_vec_list   = []
    best_linAB_list     = []
    best_thr_wait_list  = []
    best_dp_policies    = []
    
    for j, val_pid in enumerate(cv_folds):
        # Validation fold j => G_j
        df_val   = filter_by_group(df_all, val_pid)
        
        # Training = union of all other folds except j
        train_pid = set()
        for k, fold_pids in enumerate(cv_folds):
            if k != j:
                train_pid = train_pid.union(fold_pids)
        df_train = filter_by_group(df_all, train_pid)
        
        # ============== (A) Train ML model on df_train => pick best by AUC on the same df_train ==============
        # (In a real approach, you'd do a smaller sub-fold split or separate val for ML only, 
        #  but here we do "semi" for brevity.)
        X_train = df_train[['EIT','NIRS','EIS']].values
        y_train = df_train['label'].values
        
        # Just do a train/val = we can do a small split inside df_train, or 
        # let's do the entire df_train for training and the same df_train for ML selection 
        # (not ideal, but simpler).
        X_val = df_train[['EIT','NIRS','EIS']].values
        y_val = df_train['label'].values
        
        ml_model, best_auc, best_mname = train_and_select_best_model(X_train, y_train, X_val, y_val)
        
        # Now, we apply this trained model to produce a risk_score for ALL ROWS in df_train+df_val
        # So we can do threshold tuning, DP, etc.
        # We'll store them back in the main df so we can do the policy searches.
        # But be careful not to pollute folds with each other => for demonstration, it's simpler 
        # to do it just for df_val "on the fly" for cost evaluation.

        # (B) Evaluate on VAL fold => get risk scores
        X_val_fold = df_val[['EIT','NIRS','EIS']].values
        val_probs  = ml_model.predict_proba(X_val_fold)[:,1]
        df_val.loc[:,'risk_score'] = val_probs  # set the model-based risk

        # We do the same for df_train because we need to estimate Markov transitions for the DP
        X_train_fold = df_train[['EIT','NIRS','EIS']].values
        train_probs  = ml_model.predict_proba(X_train_fold)[:,1]
        df_train.loc[:,'risk_score'] = train_probs

        # Also discretize into risk buckets again, e.g. 5 equally sized:
        # We'll do a simple approach:  (0,0.2)->0, [0.2,0.4)->1, ...
        def to_bucket(p):
            return min(int(p*5), 4)
        df_train.loc[:,'risk_bucket'] = df_train['risk_score'].apply(to_bucket)
        df_val.loc[:,'risk_bucket']   = df_val['risk_score'].apply(to_bucket)

        # ============= (C) Benchmark Policies on VAL fold =============
        #  (C.1) Constant threshold
        thr_c, _ = constant_threshold_search(df_train)  # or do it on df_train
        best_thr_const_list.append(thr_c)
        
        #  (C.2) Dynamic threshold
        thr_vec, _ = dynamic_threshold_random_search(df_train, 
                                                     time_steps=T_MAX,
                                                     threshold_candidates=[0,0.2,0.4,0.6,0.8,1.0],
                                                     n_samples=200,
                                                     seed=j)
        best_dyn_vec_list.append(thr_vec)
        
        #  (C.3) Linear threshold
        (A,B), _ = linear_threshold_search(df_train)
        best_linAB_list.append((A,B))
        
        #  (C.4) Wait till end
        thr_wte, _ = wait_till_end_search(df_train)
        best_thr_wait_list.append(thr_wte)
        
        # ============= (D) DP Approach =============
        # We'll fit the Markov chain from df_train => p_trans, p_sick
        p_trans, p_sick = estimate_transition_and_sick_probs(df_train, T=T_MAX, n_buckets=5)
        V, pi_ = train_data_driven_dp_unconstrained(p_trans, p_sick, 
                                                    FP=FP_COST, FN=FN_COST, 
                                                    D=D_COST, gamma=GAMMA, T=T_MAX)
        best_dp_policies.append((V, pi_))
        
        # (In a full search for DP hyperparams, you might loop over gamma in {0.95,0.99}, 
        #  or vary cost ratios, etc. For brevity, we only use the above.)
        
        # End of fold j

    # ----- (E) Combine or pick final hyperparams from these folds ------
    # For demonstration, let's pick the average or the median from the sets we found:

    thr_const_final = np.mean(best_thr_const_list)
    
    mid_idx = len(best_dyn_vec_list)//2
    thr_dyn_final = best_dyn_vec_list[mid_idx]  # pick the "middle" one
    
    A_ave = np.mean([ab[0] for ab in best_linAB_list])
    B_ave = np.mean([ab[1] for ab in best_linAB_list])
    
    thr_wait_final = np.mean(best_thr_wait_list)
    
    # For DP, let's pick the last fold's (V, pi_). 
    # Or we could store them all and pick the one with minimal val cost. 
    # We'll just pick the last for demonstration:
    V_final, pi_final = best_dp_policies[-1]
    
    # ========== (F) Retrain ML model on all CV folds except test fold => final model ==========
    train_pid_all = set()
    for fold_pid in cv_folds:
        train_pid_all = train_pid_all.union(fold_pid)
    df_train_cv = filter_by_group(df_all, train_pid_all)
    
    X_train_cv = df_train_cv[['EIT','NIRS','EIS']].values
    y_train_cv = df_train_cv['label'].values
    
    # We'll do the same "train_and_select_best_model" approach 
    # but we have no separate val set, so we'll just reuse X_train_cv for selection:
    final_model, _, _ = train_and_select_best_model(X_train_cv, y_train_cv,
                                                    X_train_cv, y_train_cv)
    
    # We'll produce final risk scores for test set G_{n_folds}.
    df_test = filter_by_group(df_all, test_fold_pid).copy()
    
    X_test  = df_test[['EIT','NIRS','EIS']].values
    test_probs = final_model.predict_proba(X_test)[:,1]
    df_test.loc[:,'risk_score'] = test_probs
    
    # Re-bucket for DP or threshold logic
    def to_bucket(p):
        return min(int(p*5), 4)
    df_test.loc[:,'risk_bucket'] = df_test['risk_score'].apply(to_bucket)
    
    # (F.1) Build final policies from the chosen final hyperparams:
    const_policy = make_constant_threshold_policy(thr_const_final)
    dyn_policy   = make_dynamic_threshold_policy(thr_dyn_final)
    lin_policy   = make_linear_threshold_policy(A_ave, B_ave)
    wte_policy   = make_wait_till_end_policy(thr_wait_final)
    dp_policy    = make_data_driven_dp_policy_unconstrained(V_final, pi_final, T=T_MAX)
    
    # (F.2) Evaluate on test set
    stats_const = simulate_policy(df_test, const_policy)
    stats_dyn   = simulate_policy(df_test, dyn_policy)
    stats_lin   = simulate_policy(df_test, lin_policy)
    stats_wte   = simulate_policy(df_test, wte_policy)
    stats_dp    = simulate_policy(df_test, dp_policy)
    
    table = pd.DataFrame({
        'Method': [
            'Constant Threshold',
            'Dynamic Threshold-R',
            'Linear Threshold',
            'Wait Till End',
            'Dynamic Threshold-DP'
        ],
        'Precision (%)': [
            100*stats_const['precision'],
            100*stats_dyn['precision'],
            100*stats_lin['precision'],
            100*stats_wte['precision'],
            100*stats_dp['precision']
        ],
        'Cost': [
            stats_const['cost'],
            stats_dyn['cost'],
            stats_lin['cost'],
            stats_wte['cost'],
            stats_dp['cost']
        ],
        'Recall (%)': [
            100*stats_const['recall'],
            100*stats_dyn['recall'],
            100*stats_lin['recall'],
            100*stats_wte['recall'],
            100*stats_dp['recall']
        ],
        'Treatment Time': [
            stats_const['avg_treatment_time'],
            stats_dyn['avg_treatment_time'],
            stats_lin['avg_treatment_time'],
            stats_wte['avg_treatment_time'],
            stats_dp['avg_treatment_time']
        ]
    })
    
    return table

###############################################################################
# 7. MAIN
###############################################################################
def main():
    # Load your CSV file (should have columns patient_id, time, risk_bucket, risk_score, EIT, NIRS, EIS, label)
    df_all = pd.read_csv("synthetic_patients_with_features.csv")
    # Possibly filter to time < T_MAX if your dataset has more time steps
    df_all = df_all[df_all['time'] < T_MAX].copy()
    
    # Just ensure columns exist:
    required_cols = {'patient_id','time','risk_bucket','risk_score','EIT','NIRS','EIS','label'}
    missing = required_cols - set(df_all.columns)
    if missing:
        raise ValueError(f"Your CSV is missing columns: {missing}")
    
    n_folds = 5
    seed = 42
    
    result_table = semi_crossval_unconstrained(df_all, n_folds=n_folds, seed=seed)
    
    print("\n=== Algorithm 1 (Semi Cross-Validation) Results (Unconstrained) ===")
    print(result_table)

if __name__ == "__main__":
    main()


=== Algorithm 1 (Semi Cross-Validation) Results (Unconstrained) ===
                 Method  Precision (%)  Cost  Recall (%)  Treatment Time
0    Constant Threshold      52.500000   282  100.000000        2.350000
1   Dynamic Threshold-R      53.846154   278  100.000000        5.846154
2      Linear Threshold      75.000000   174  100.000000        4.428571
3         Wait Till End     100.000000   450   95.238095       20.000000
4  Dynamic Threshold-DP      80.769231   149  100.000000        4.384615
