In [1]:
"""
CAPACITATED VALIDATION (ALGORITHM 0) FOR HEMORRHAGE DIAGNOSIS & TREATMENT
with an ADP approach that does epsilon-greedy exploration.

Requirements:
  pip install numpy pandas scikit-learn catboost
"""

import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

# Sklearn models, metrics, etc.
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import ParameterGrid
# CatBoost
from catboost import CatBoostClassifier

###############################################################################
# 1. GLOBAL PARAMETERS
###############################################################################
FP_COST = 10
FN_COST = 50
D_COST  = 1
T_MAX   = 21   # maximum discrete time steps (0..T_MAX-1)
CAPACITY_FACTOR = 0.5  # treat at most 50% of sick patients

# Example small hyperparam grid:
RF_PARAM_GRID = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5]
}
GB_PARAM_GRID = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1],
    'max_depth': [3, 5]
}
CATBOOST_PARAM_GRID = {
    'iterations': [50, 100],
    'learning_rate': [0.05, 0.1],
    'depth': [3, 5]
}

###############################################################################
# 2. HELPER FUNCTIONS
###############################################################################
def split_into_four_groups(df, seed=0):
    """
    Shuffle patient IDs and split ~evenly into four groups: G1, G2, G3, G4.
    Used for Algorithm 0 validation.
    """
    rng = np.random.RandomState(seed)
    unique_pids = df['patient_id'].unique()
    rng.shuffle(unique_pids)
    
    n = len(unique_pids)
    i1 = int(0.25 * n)
    i2 = int(0.50 * n)
    i3 = int(0.75 * n)
    
    G1_pids = unique_pids[:i1]
    G2_pids = unique_pids[i1:i2]
    G3_pids = unique_pids[i2:i3]
    G4_pids = unique_pids[i3:]
    
    G1 = df[df['patient_id'].isin(G1_pids)].copy()
    G2 = df[df['patient_id'].isin(G2_pids)].copy()
    G3 = df[df['patient_id'].isin(G3_pids)].copy()
    G4 = df[df['patient_id'].isin(G4_pids)].copy()
    return G1, G2, G3, G4

def compute_auc_score(y_true, y_prob):
    """Compute AUC safely. If only one class present, return 0.5."""
    if len(np.unique(y_true)) < 2:
        return 0.5
    return roc_auc_score(y_true, y_prob)

def train_and_select_best_model(X_train, y_train, X_val, y_val):
    """
    Trains multiple models (RandomForest, GB, CatBoost)
    over small hyperparam grids, picks best by AUC.
    Returns: (best_model, best_auc, best_model_name)
    """
    best_auc = -1.0
    best_model = None
    best_name  = None
    
    # 1) RandomForest
    for params in ParameterGrid(RF_PARAM_GRID):
        rf = RandomForestClassifier(random_state=0, **params)
        rf.fit(X_train, y_train)
        val_prob = rf.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc = auc_val
            best_model = rf
            best_name = f"RandomForest_{params}"
    
    # 2) GradientBoosting
    for params in ParameterGrid(GB_PARAM_GRID):
        gb = GradientBoostingClassifier(random_state=0, **params)
        gb.fit(X_train, y_train)
        val_prob = gb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc   = auc_val
            best_model = gb
            best_name  = f"GradientBoosting_{params}"
    
    # 3) CatBoost
    for params in ParameterGrid(CATBOOST_PARAM_GRID):
        cb = CatBoostClassifier(verbose=0, random_state=0, **params)
        cb.fit(X_train, y_train)
        val_prob = cb.predict_proba(X_val)[:,1]
        auc_val  = compute_auc_score(y_val, val_prob)
        if auc_val > best_auc:
            best_auc   = auc_val
            best_model = cb
            best_name  = f"CatBoost_{params}"
    
    return best_model, best_auc, best_name

def to_bucket(prob):
    """Simple function to map prob into a 5-bucket scale [0..4]."""
    b = int(prob * 5)
    return min(b, 4)

###############################################################################
# 3. CAPACITATED SIMULATION
###############################################################################
def simulate_capacitated_policy(df, policy_func, capacity_factor=0.5):
    """
    df must contain columns: [patient_id, time, risk_score, label].
    `policy_func(df) -> dict {pid: treatment_time}` where each patient is mapped
    to the time step they get treated. Then we compute cost as:
       - If patient is treated and label=1 => cost = (D_COST * treat_time).
       - If patient is treated and label=0 => cost = FP_COST.
       - If patient is never treated and label=1 => cost = FN_COST.
       - If never treated and label=0 => cost = 0.
    We also compute precision, recall, and average treatment time.
    """
    treatment_dict = policy_func(df)
    
    results = []
    for pid, grp in df.groupby('patient_id'):
        grp = grp.sort_values('time')
        label = grp['label'].iloc[0]
        
        if pid in treatment_dict:
            ttime = treatment_dict[pid]
            if label == 1:
                cost = D_COST * ttime
                tp = 1
                fp = 0
            else:
                cost = FP_COST
                tp = 0
                fp = 1
            treated_flag = 1
        else:
            # not treated
            ttime = None
            if label == 1:
                cost = FN_COST
                tp = 0
                fp = 0
            else:
                cost = 0
                tp = 0
                fp = 0
            treated_flag = 0
        
        results.append({
            'patient_id': pid,
            'label': label,
            'treated': treated_flag,
            'treat_time': ttime,
            'cost': cost,
            'tp': tp,
            'fp': fp
        })
    
    df_res = pd.DataFrame(results)
    total_cost = df_res['cost'].sum()
    
    treated_df = df_res[df_res['treated'] == 1]
    tp_sum = treated_df['tp'].sum()
    fp_sum = treated_df['fp'].sum()
    
    if len(treated_df) > 0:
        precision = tp_sum / (tp_sum + fp_sum)
    else:
        precision = 0.0
    
    sick_df = df_res[df_res['label'] == 1]
    if len(sick_df) > 0:
        recall = tp_sum / len(sick_df)
    else:
        recall = 0.0
    
    if len(treated_df) > 0:
        valid_tt = treated_df['treat_time'].dropna()
        avg_tt = valid_tt.mean() if len(valid_tt) else 0.0
    else:
        avg_tt = 0.0
    
    return {
        'cost': total_cost,
        'precision': precision,
        'recall': recall,
        'avg_treatment_time': avg_tt
    }

###############################################################################
# 4. THRESHOLD-BASED CAPACITATED POLICIES
###############################################################################
def make_capacitated_constant_threshold_policy(thr, capacity_factor=0.5):
    def policy_func(df):
        # capacity = 0.5 * # of distinct sick patients
        n_sick_patients = df.groupby('patient_id')['label'].max().sum()
        max_capacity = int(capacity_factor * n_sick_patients)
        
        treatment_dict = {}
        remaining_capacity = max_capacity
        
        for t in range(T_MAX):
            if remaining_capacity <= 0:
                break
            
            patients_at_t = df[df['time'] == t]
            if len(patients_at_t) == 0:
                continue
            
            # eligible = risk_score >= thr
            eligible = patients_at_t[patients_at_t['risk_score'] >= thr]
            if len(eligible) == 0:
                continue
            
            eligible = eligible.sort_values('risk_score', ascending=False)
            # treat up to remaining_capacity
            to_treat = min(remaining_capacity, len(eligible))
            chosen = eligible.iloc[:to_treat]
            for pid in chosen['patient_id']:
                treatment_dict[pid] = t
            
            remaining_capacity -= to_treat
        
        return treatment_dict
    return policy_func

def make_capacitated_linear_threshold_policy(A, B, capacity_factor=0.5):
    def policy_func(df):
        n_sick_patients = df.groupby('patient_id')['label'].max().sum()
        max_capacity = int(capacity_factor * n_sick_patients)
        
        treatment_dict = {}
        remaining_capacity = max_capacity
        
        for t in range(T_MAX):
            if remaining_capacity <= 0:
                break
            patients_at_t = df[df['time'] == t]
            if len(patients_at_t) == 0:
                continue
            
            # threshold = clamp(A * t + B between [0,1])
            thr = max(0, min(1, A * t + B))
            eligible = patients_at_t[patients_at_t['risk_score'] >= thr]
            if len(eligible) == 0:
                continue
            
            eligible = eligible.sort_values('risk_score', ascending=False)
            to_treat = min(remaining_capacity, len(eligible))
            chosen = eligible.iloc[:to_treat]
            for pid in chosen['patient_id']:
                treatment_dict[pid] = t
            
            remaining_capacity -= to_treat
        
        return treatment_dict
    return policy_func

def make_capacitated_wait_till_end_policy(thr, capacity_factor=0.5):
    def policy_func(df):
        n_sick_patients = df.groupby('patient_id')['label'].max().sum()
        max_capacity = int(capacity_factor * n_sick_patients)
        
        treatment_dict = {}
        # wait until final time
        final_t = df['time'].max()
        final_patients = df[df['time'] == final_t]
        
        eligible = final_patients[final_patients['risk_score'] >= thr]
        if len(eligible):
            eligible = eligible.sort_values('risk_score', ascending=False)
            to_treat = min(max_capacity, len(eligible))
            chosen = eligible.iloc[:to_treat]
            for pid in chosen['patient_id']:
                treatment_dict[pid] = final_t
        
        return treatment_dict
    return policy_func

###############################################################################
# 5. TRAINING AN ADP MODEL WITH EPSILON-GREEDY
###############################################################################
def train_adp_linear_epsilon_greedy(
    df_train, 
    capacity_factor=0.5, 
    n_episodes=1000, 
    learning_rate=0.01, 
    gamma=0.99, 
    T=21,
    epsilon=0.1
):
    """
    Q-learning with linear function approximation and epsilon-greedy exploration.
    We do not "free" capacity each step. Once we treat a patient, capacity is permanently reduced.
    We also ensure we never treat the same patient more than once by tracking a set of treated IDs.

    :param df_train: DataFrame with columns [patient_id, time, risk_bucket, label, risk_score].
                     Must have 0 <= time < T.
    :param capacity_factor: fraction of (distinct) sick patients we can treat in total
    :param n_episodes: number of episodes
    :param learning_rate: step size
    :param gamma: discount factor
    :param T: number of time steps (21 by default)
    :param epsilon: exploration rate
    :return: learned weight vector (shape = [8])
    """

    # 1) Distinct sick patients => define max_capacity
    n_sick_patients = df_train.groupby('patient_id')['label'].max().sum()
    max_capacity = int(capacity_factor * n_sick_patients)
    print(f"Training ADP with max capacity = {max_capacity} (distinct sick)")

    # 2) We define Q(s,a) = dot(weights, features), where features = 
    #    [bucket0_frac, bucket1_frac, ..., bucket4_frac, remain_cap_frac, sick_treated_frac, action_frac]

    n_features = 8
    weights = np.zeros(n_features)

    # Pre-group by time (for efficiency)
    patients_by_time = {}
    for tstep in range(T):
        subset = df_train[df_train['time'] == tstep].copy()
        # no need to sort here, but we can
        subset.sort_values('risk_score', ascending=False, inplace=True)
        patients_by_time[tstep] = subset

    for episode in range(n_episodes):
        # Start a new "episode" with full capacity
        remaining_capacity = max_capacity
        treated_pid_set = set()       # patients we have already treated
        sick_treated_so_far = 0       # how many sick patients we've treated

        for t in range(T):
            # If we are out of capacity, we can only choose action=0
            if remaining_capacity <= 0:
                # we can do a quick terminal update:
                # We still have to pay FN_COST for untreated sick patients
                if t == T-1:
                    # final step anyway
                    pass
                else:
                    # skip to end
                    pass
                break

            # Filter out patients who were already treated
            candidates = patients_by_time[t]
            candidates = candidates[~candidates['patient_id'].isin(treated_pid_set)]
            n_candidates = len(candidates)
            if n_candidates == 0:
                # no one to treat at this time
                continue

            # state features
            # 1) risk bucket fractions among these candidates
            bucket_counts = np.zeros(5)
            for b in range(5):
                bucket_counts[b] = (candidates['risk_bucket'] == b).mean()
            
            remain_cap_frac = (remaining_capacity / max_capacity) if max_capacity>0 else 0
            sick_treated_frac = (sick_treated_so_far / n_sick_patients) if n_sick_patients>0 else 0

            # possible actions = how many patients to treat from [0.. min(remaining_capacity, n_candidates)]
            possible_actions = list(range(min(remaining_capacity, n_candidates) + 1))
            
            # Q-value for each possible action
            q_vals = []
            for a in possible_actions:
                # features
                feat = np.concatenate([
                    bucket_counts,
                    [remain_cap_frac, sick_treated_frac, a / max_capacity if max_capacity>0 else 0]
                ])
                q_val = np.dot(weights, feat)
                q_vals.append(q_val)
            
            # pick action using epsilon-greedy
            if np.random.rand() < epsilon:
                action = np.random.choice(possible_actions)
            else:
                best_a_idx = np.argmax(q_vals)
                action = possible_actions[best_a_idx]

            # (a) immediate cost => negative reward
            if action > 0:
                # treat top `action` patients by risk_score
                # patients_by_time[t] is sorted descending by risk_score
                chosen = candidates.iloc[:action]
                fp_cost = (chosen['label'] == 0).sum() * FP_COST
                tp_cost = (chosen['label'] == 1).sum() * (D_COST * t)
                reward = -(fp_cost + tp_cost)

                # update environment
                newly_treated_pids = chosen['patient_id'].unique()
                treated_pid_set.update(newly_treated_pids)
                sick_treated_now = (chosen['label'] == 1).sum()
                sick_treated_so_far += sick_treated_now
                remaining_capacity -= action
            else:
                reward = 0.0

            # Next state Q
            if t < T-1:
                # we look at time t+1
                # if we have capacity left, we can do a next-state Q
                # if capacity is 0, we'd be forced to do action=0 anyway
                next_t = t + 1
                next_candidates = patients_by_time[next_t]
                next_candidates = next_candidates[~next_candidates['patient_id'].isin(treated_pid_set)]
                if len(next_candidates) == 0:
                    # no next Q
                    target = reward
                else:
                    next_bucket = np.zeros(5)
                    for b in range(5):
                        next_bucket[b] = (next_candidates['risk_bucket'] == b).mean()
                    next_remcap_frac = (remaining_capacity / max_capacity) if max_capacity>0 else 0
                    next_sicktreated_frac = (sick_treated_so_far / n_sick_patients) if n_sick_patients>0 else 0

                    next_possible_actions = list(range(min(remaining_capacity, len(next_candidates)) + 1))
                    if not next_possible_actions:
                        # no next Q
                        target = reward
                    else:
                        next_qvals = []
                        for a2 in next_possible_actions:
                            feat2 = np.concatenate([
                                next_bucket,
                                [next_remcap_frac, next_sicktreated_frac, a2 / max_capacity if max_capacity>0 else 0]
                            ])
                            val2 = np.dot(weights, feat2)
                            next_qvals.append(val2)
                        best_future_q = np.max(next_qvals)
                        target = reward + gamma * best_future_q
            else:
                # final time step => pay FN_COST for all untreated sick
                # figure out how many distinct sick patients are not in treated_pid_set
                all_sick_pids = (
                    df_train.groupby('patient_id')['label'].max()
                    .loc[lambda s: s == 1].index
                )
                untreated_sick_pids = set(all_sick_pids) - set(treated_pid_set)
                final_cost = len(untreated_sick_pids) * FN_COST
                final_reward = -final_cost
                target = reward + final_reward
            
            # TD update
            # recompute the current state-action feature
            feat_curr = np.concatenate([
                bucket_counts,
                [remain_cap_frac, sick_treated_frac, action / max_capacity if max_capacity>0 else 0]
            ])
            pred = np.dot(weights, feat_curr)
            td_error = target - pred
            weights += learning_rate * td_error * feat_curr
        
        # end for t
        if episode % 100 == 0:
            print(f"Episode {episode}/{n_episodes}")
    # end for episode

    return weights

###############################################################################
# 6. MAKING AN ADP POLICY (TEST-TIME) FROM LEARNED WEIGHTS
###############################################################################
def make_adp_policy_linear(weights, capacity_factor=0.5, T=21):
    """
    Create a policy_func(df) that picks actions by argmax Q(s,a), 
    with NO exploration (epsilon=0).
    """
    def policy_func(df_in):
        # capacity from distinct sick
        n_sick_patients = df_in.groupby('patient_id')['label'].max().sum()
        max_capacity = int(capacity_factor * n_sick_patients)
        remaining_capacity = max_capacity
        
        # we will produce a dictionary: patient_id -> treat_time
        treatment_dict = {}
        
        # track which patients are already treated
        treated_pid_set = set()
        # how many sick have we treated
        sick_treated_so_far = 0
        
        # group by time so we can get subsets quickly
        patients_by_time = {}
        for tstep in range(T):
            pts = df_in[df_in['time'] == tstep].copy()
            pts.sort_values('risk_score', ascending=False, inplace=True)
            patients_by_time[tstep] = pts
        
        for t in range(T):
            if remaining_capacity <= 0:
                break
            
            candidates = patients_by_time[t]
            # remove those already treated
            candidates = candidates[~candidates['patient_id'].isin(treated_pid_set)]
            n_candidates = len(candidates)
            if n_candidates == 0:
                continue
            
            # bucket fractions
            bucket_counts = np.zeros(5)
            for b in range(5):
                bucket_counts[b] = (candidates['risk_bucket'] == b).mean()
            
            remain_cap_frac = (remaining_capacity / max_capacity) if max_capacity>0 else 0
            sick_treated_frac = (sick_treated_so_far / n_sick_patients) if n_sick_patients>0 else 0

            possible_actions = list(range(min(remaining_capacity, n_candidates) + 1))
            if not possible_actions:
                continue
            
            q_vals = []
            for a in possible_actions:
                feat = np.concatenate([
                    bucket_counts,
                    [remain_cap_frac, sick_treated_frac, a / max_capacity if max_capacity>0 else 0]
                ])
                q_vals.append(np.dot(weights, feat))
            
            best_a_idx = np.argmax(q_vals)
            action = possible_actions[best_a_idx]
            if action > 0:
                chosen = candidates.iloc[:action]
                # treat them at time t
                for pid in chosen['patient_id']:
                    treatment_dict[pid] = t
                # update environment
                sick_now = (chosen['label'] == 1).sum()
                sick_treated_so_far += sick_now
                remaining_capacity -= action
        
        return treatment_dict
    return policy_func

###############################################################################
# 7. ALGORITHM 0 FOR CAPACITATED SCENARIO
###############################################################################
def run_algorithm0_capacitated(df_all, capacity_factor=0.5, seed=0):
    """
    1) Split df_all -> G1, G2, G3, G4
    2) ML hyperparam search on (G1->G2)
    3) Retrain best ML on G1+G2
    4) On G3, tune:
         - ADP with (n_episodes, etc.)
         - threshold-based (thr, A,B, etc.)
    5) Evaluate all tuned policies on G4
    """
    # 1) Split
    G1, G2, G3, G4 = split_into_four_groups(df_all, seed=seed)

    # 2) ML hyperparam search on (G1->G2)
    X_train = G1[['EIT','NIRS','EIS']].values
    y_train = G1['label'].values
    
    X_val = G2[['EIT','NIRS','EIS']].values
    y_val = G2['label'].values
    
    best_model, best_auc, best_name = train_and_select_best_model(X_train, y_train, X_val, y_val)
    
    # 3) Retrain best ML on G1+G2
    G12 = pd.concat([G1, G2], ignore_index=True)
    X_12 = G12[['EIT','NIRS','EIS']].values
    y_12 = G12['label'].values
    best_model.fit(X_12, y_12)
    
    # Add risk_score + risk_bucket to G12, G3, G4
    G12 = G12.copy()
    G3 = G3.copy()
    G4 = G4.copy()
    
    prob_12 = best_model.predict_proba(G12[['EIT','NIRS','EIS']])[:,1]
    prob_3  = best_model.predict_proba(G3[['EIT','NIRS','EIS']])[:,1]
    prob_4  = best_model.predict_proba(G4[['EIT','NIRS','EIS']])[:,1]
    
    G12['risk_score'] = prob_12
    G3['risk_score']  = prob_3
    G4['risk_score']  = prob_4
    
    G12['risk_bucket'] = G12['risk_score'].apply(to_bucket)
    G3['risk_bucket']  = G3['risk_score'].apply(to_bucket)
    G4['risk_bucket']  = G4['risk_score'].apply(to_bucket)
    
    # (A) TUNE THRESHOLD-BASED ON G3
    best_thr_const = None
    best_cost_const = float('inf')
    for thr in np.linspace(0,1,21):
        policy_const = make_capacitated_constant_threshold_policy(thr, capacity_factor)
        stats_const = simulate_capacitated_policy(G3, policy_const, capacity_factor)
        if stats_const['cost'] < best_cost_const:
            best_cost_const = stats_const['cost']
            best_thr_const = thr
    
    best_lin_params = None
    best_cost_lin = float('inf')
    for A in np.linspace(-0.05, 0.01, 7):
        for B in np.linspace(0, 0.8, 7):
            policy_lin = make_capacitated_linear_threshold_policy(A, B, capacity_factor)
            stats_lin = simulate_capacitated_policy(G3, policy_lin, capacity_factor)
            if stats_lin['cost'] < best_cost_lin:
                best_cost_lin = stats_lin['cost']
                best_lin_params = (A, B)
    
    best_thr_wte = None
    best_cost_wte = float('inf')
    for thr in np.linspace(0,1,21):
        policy_wte = make_capacitated_wait_till_end_policy(thr, capacity_factor)
        stats_wte = simulate_capacitated_policy(G3, policy_wte, capacity_factor)
        if stats_wte['cost'] < best_cost_wte:
            best_cost_wte = stats_wte['cost']
            best_thr_wte = thr
    
    # (B) TRAIN ADP ON G12
    adp_weights = train_adp_linear_epsilon_greedy(
        df_train=G12, 
        capacity_factor=capacity_factor,
        n_episodes=1000,
        learning_rate=0.01,
        gamma=0.99,
        T=T_MAX,
        epsilon=0.1
    )
    
    # Evaluate on G4
    # Threshold-based
    policy_const_g4 = make_capacitated_constant_threshold_policy(best_thr_const, capacity_factor)
    stats_const_g4  = simulate_capacitated_policy(G4, policy_const_g4, capacity_factor)
    
    A_best, B_best = best_lin_params
    policy_lin_g4 = make_capacitated_linear_threshold_policy(A_best, B_best, capacity_factor)
    stats_lin_g4 = simulate_capacitated_policy(G4, policy_lin_g4, capacity_factor)
    
    policy_wte_g4 = make_capacitated_wait_till_end_policy(best_thr_wte, capacity_factor)
    stats_wte_g4 = simulate_capacitated_policy(G4, policy_wte_g4, capacity_factor)
    
    # ADP
    adp_policy_g4 = make_adp_policy_linear(adp_weights, capacity_factor=capacity_factor, T=T_MAX)
    stats_adp_g4 = simulate_capacitated_policy(G4, adp_policy_g4, capacity_factor)
    
    table = pd.DataFrame({
        'Method': [
            'Capacitated Constant Threshold',
            'Capacitated Linear Threshold',
            'Capacitated Wait Till End',
            'Capacitated ADP'
        ],
        'Cost': [
            stats_const_g4['cost'],
            stats_lin_g4['cost'],
            stats_wte_g4['cost'],
            stats_adp_g4['cost']
        ],
        'Precision (%)': [
            100*stats_const_g4['precision'],
            100*stats_lin_g4['precision'],
            100*stats_wte_g4['precision'],
            100*stats_adp_g4['precision']
        ],
        'Recall (%)': [
            100*stats_const_g4['recall'],
            100*stats_lin_g4['recall'],
            100*stats_wte_g4['recall'],
            100*stats_adp_g4['recall']
        ],
        'Avg Treat Time': [
            stats_const_g4['avg_treatment_time'],
            stats_lin_g4['avg_treatment_time'],
            stats_wte_g4['avg_treatment_time'],
            stats_adp_g4['avg_treatment_time']
        ]
    })
    return table

###############################################################################
# 8. MAIN
###############################################################################
def main():
    df_all = pd.read_csv("synthetic_patients_with_features.csv")

    # filter time if needed
    df_all = df_all[df_all['time'] < T_MAX].copy()

    # check required columns:
    required = {'patient_id','time','EIT','NIRS','EIS','label'}
    if not required.issubset(df_all.columns):
        raise ValueError(f"CSV must have columns {required}, found {df_all.columns}.")
    
    print(f"\n=== ALGORITHM 0 RESULTS (CAPACITATED - {int(CAPACITY_FACTOR*100)}% capacity) ===")
    final_table = run_algorithm0_capacitated(df_all, capacity_factor=CAPACITY_FACTOR, seed=4)
    print(final_table.to_string(index=False))

if __name__ == "__main__":
    main()


=== ALGORITHM 0 RESULTS (CAPACITATED - 50% capacity) ===
Training ADP model with max capacity (distinct patients): 28
Episode 0/1000
Episode 100/1000
Episode 200/1000
Episode 300/1000
Episode 400/1000
Episode 500/1000
Episode 600/1000
Episode 700/1000
Episode 800/1000
Episode 900/1000
                        Method  Cost  Precision (%)  Recall (%)  Avg Treat Time
Capacitated Constant Threshold  1800       0.000000    0.000000             0.0
  Capacitated Linear Threshold  1680      27.777778   13.888889             0.0
     Capacitated Wait Till End  1260     100.000000   50.000000            20.0
               Capacitated ADP  1800       0.000000    0.000000             0.0
