In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from collections import deque
import random
import gym
from gym import spaces

# Configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Hyperparamètres PPO
GAMMA = 0.99
LR_ACTOR = 0.0003
LR_CRITIC = 0.001
K_EPOCHS = 4
EPS_CLIP = 0.2
UPDATE_TIMESTEP = 2000
BATCH_SIZE = 64
MAX_EPISODES = 1000
MAX_TIMESTEPS = 100

# Chemins
DATA_PATH = "sepsis_data.csv"
MODEL_DIR = "models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Environnement Sepsis
class SepsisEnv(gym.Env):
    def __init__(self, data):
        super(SepsisEnv, self).__init__()

        self.data = data
        self.n_patients = len(data)
        self.current_patient_idx = 0
        self.current_step = 0
        self.max_steps = 50  # Maximum steps per patient

        # Définition des espaces d'observation et d'action
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0, 0, 35, 0, 0, 0, 0, 0]),
            high=np.array([200, 200, 150, 60, 41, 100, 10, 30, 24, 24]),
            dtype=np.float32
        )

        self.action_space = spaces.Discrete(6)  # 6 actions possibles
        self.action_meanings = {
            0: "Ne rien faire",
            1: "Administrer fluides IV",
            2: "Administrer antibiotiques",
            3: "Administrer vasopresseurs",
            4: "Alerter clinicien",
            5: "Combo (fluides + antibiotiques)"
        }

        # Définir les limites de sepsis (simplifiée)
        self.sepsis_thresholds = {
            "HR": (90, 130),  # bpm
            "SysBP": (0, 90),  # mmHg (hypotension)
            "Temp": (38.3, 39.5),  # °C (fièvre)
            "Lactate": (2.0, 10.0),  # mmol/L
            "WBC": (12, 30)  # 10^9/L (leucocytose)
        }

        # Variables d'état internes
        self.reset()

    def reset(self):
        # Sélectionnez un patient au hasard
        self.current_patient_idx = np.random.randint(0, self.n_patients)
        self.current_step = 0

        # Initialiser l'état
        self.current_state = self._get_state()
        return self.current_state

    def _get_state(self):
        # Obtenir les données du patient actuel à l'étape actuelle
        if self.current_patient_idx >= len(self.data):
            self.current_patient_idx = 0

        patient_data = self.data.iloc[self.current_patient_idx]

        # Créer un vecteur d'état
        state = np.array([
            patient_data['HR'],
            patient_data['SysBP'],
            patient_data['DiaBP'],
            patient_data['Temp'],
            patient_data['O2'],
            patient_data['Lactate'],
            patient_data['WBC'],
            patient_data['Hour'],
            patient_data['TimeSinceLastAction'],
            self.current_step
        ], dtype=np.float32)

        return state

    def _calculate_reward(self, action, state_label):
        # Base reward from state
        if state_label == "Stable":
            base_reward = 1.0
        elif state_label == "AtRisk":
            base_reward = -0.3
        else:  # Sepsis
            base_reward = -1.0

        # Action penalty for overtreatment
        action_penalty = 0
        if action > 0:  # Any action except "do nothing"
            # Higher penalty for stronger interventions when stable
            if state_label == "Stable":
                action_penalty = -0.2 * action  # Stronger actions get higher penalties
            elif state_label == "AtRisk":
                # Moderate penalty for strong actions, but encourage appropriate interventions
                if action in [1, 2, 4]:  # Fluids, antibiotics, or alert are appropriate
                    action_penalty = -0.05
                else:
                    action_penalty = -0.1  # Stronger penalties for vasopressors or combos
            else:  # Sepsis
                # In sepsis, we want strong intervention
                if action == 5:  # Combo (fluids + antibiotics)
                    action_penalty = 0.3  # Bonus for appropriate intensive treatment
                elif action in [1, 2, 3]:  # Individual treatments
                    action_penalty = 0.1  # Smaller bonus
                else:
                    action_penalty = -0.1  # Penalty for not treating or just alerting

        return base_reward + action_penalty

    def step(self, action):
        # Execution de l'action et calcul de la récompense
        current_data = self.data.iloc[self.current_patient_idx]
        state_label = current_data['StateLabel']
        reward = self._calculate_reward(action, state_label)

        # Avancer dans le temps
        self.current_step += 1

        # Vérifier si l'épisode est terminé
        done = False
        if self.current_step >= self.max_steps or state_label == "Sepsis":
            done = True

        # Si non terminé, obtenir le nouvel état
        if not done:
            # Dans un environnement réel, l'état suivant dépendrait de l'action
            # Ici, nous simulons en passant au patient suivant
            self.current_patient_idx = (self.current_patient_idx + 1) % self.n_patients
            next_state = self._get_state()
        else:
            next_state = self.current_state  # État terminal

        self.current_state = next_state

        info = {
            "state_label": state_label,
            "action_taken": self.action_meanings[action]
        }

        return next_state, reward, done, info

    def render(self, mode='human'):
        print(f"Patient: {self.current_patient_idx}, Step: {self.current_step}")
        print(f"State: HR={self.current_state[0]:.1f}, SysBP={self.current_state[1]:.1f}, "
              f"Temp={self.current_state[3]:.1f}°C, Lactate={self.current_state[5]:.1f}")

# Définition des réseaux de neurones pour l'acteur et le critique
class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(ActorCritic, self).__init__()

        # Couche partagée
        self.shared = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU()
        )

        # Acteur (Politique)
        self.actor = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, action_dim),
            nn.Softmax(dim=-1)
        )

        # Critique (Fonction de valeur)
        self.critic = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def act(self, state, memory=None):
        state = torch.from_numpy(state).float().to(DEVICE)
        shared_features = self.shared(state)
        action_probs = self.actor(shared_features)
        dist = Categorical(action_probs)
        action = dist.sample()

        if memory is not None:
            memory.states.append(state)
            memory.actions.append(action)
            memory.logprobs.append(dist.log_prob(action))

        return action.item()

    def evaluate(self, state, action):
        shared_features = self.shared(state)
        action_probs = self.actor(shared_features)
        dist = Categorical(action_probs)

        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_value = self.critic(shared_features)

        return action_logprobs, torch.squeeze(state_value), dist_entropy

# Mémoire pour stocker l'expérience
class Memory:
    def __init__(self):
        self.states = []
        self.actions = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []

    def clear(self):
        self.states = []
        self.actions = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []

# Agent PPO
class PPO:
    def __init__(self, state_dim, action_dim):
        self.policy = ActorCritic(state_dim, action_dim).to(DEVICE)
        self.optimizer = torch.optim.Adam([
            {'params': self.policy.shared.parameters(), 'lr': LR_ACTOR},
            {'params': self.policy.actor.parameters(), 'lr': LR_ACTOR},
            {'params': self.policy.critic.parameters(), 'lr': LR_CRITIC}
        ])
        self.policy_old = ActorCritic(state_dim, action_dim).to(DEVICE)
        self.policy_old.load_state_dict(self.policy.state_dict())

        self.MseLoss = nn.MSELoss()

    def select_action(self, state, memory):
        return self.policy_old.act(state, memory)

    def update(self, memory):
        # Monte Carlo estimation des récompenses
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(memory.rewards), reversed(memory.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (GAMMA * discounted_reward)
            rewards.insert(0, discounted_reward)

        # Normalisation des récompenses
        rewards = torch.tensor(rewards, dtype=torch.float32).to(DEVICE)
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-5)

        # Conversion des listes en tensors
        old_states = torch.stack(memory.states).to(DEVICE).detach()
        old_actions = torch.stack(memory.actions).to(DEVICE).detach()
        old_logprobs = torch.stack(memory.logprobs).to(DEVICE).detach()

        # Mise à jour des paramètres de la politique
        for _ in range(K_EPOCHS):
            # Évaluation des actions et des états
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)

            # Trouver le ratio (pi_theta / pi_theta__old)
            ratios = torch.exp(logprobs - old_logprobs.detach())

            # Fonction objectif surrogate
            advantages = rewards - state_values.detach()
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-EPS_CLIP, 1+EPS_CLIP) * advantages

            # Calcul de la perte
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy

            # Rétropropagation
            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()

        # Copier les nouveaux poids dans le réseau de politique old
        self.policy_old.load_state_dict(self.policy.state_dict())

# Prétraitement des données
def preprocess_data(data_path):
    df = pd.read_csv(data_path)

    # Normalisation des données numériques
    numeric_columns = ['HR', 'SysBP', 'DiaBP', 'Temp', 'O2', 'Lactate', 'WBC', 'Hour', 'TimeSinceLastAction']
    scaler = StandardScaler()
    df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

    return df

# Fonction principale d'entraînement
def train():
    print("Démarrage de l'entraînement...")

    # Charger et prétraiter les données
    data = pd.read_csv(DATA_PATH)
    print(f"Données chargées: {data.shape}")

    # Créer l'environnement
    env = SepsisEnv(data)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    # Créer l'agent et la mémoire
    memory = Memory()
    ppo = PPO(state_dim, action_dim)

    # Variables de suivi
    running_reward = 0
    avg_rewards = []
    frame_idx = 0
    early_stop = False

    # Boucle d'entraînement
    for i_episode in range(1, MAX_EPISODES+1):
        state = env.reset()
        episode_reward = 0

        for t in range(MAX_TIMESTEPS):
            # Sélection d'une action
            action = ppo.select_action(state, memory)

            # Exécution de l'action
            state, reward, done, _ = env.step(action)

            # Stockage des résultats
            memory.rewards.append(reward)
            memory.is_terminals.append(done)

            frame_idx += 1
            episode_reward += reward

            # Mise à jour si on atteint la taille de lot
            if frame_idx % UPDATE_TIMESTEP == 0:
                ppo.update(memory)
                memory.clear()

            if done:
                break

        # Mise à jour de la récompense moyenne
        running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward
        avg_rewards.append(running_reward)

        # Affichage du progrès
        if i_episode % 10 == 0:
            print(f"Episode {i_episode}\tAverage Reward: {running_reward:.2f}")

        # Sauvegarde du modèle
        if i_episode % 100 == 0:
            model_path = os.path.join(MODEL_DIR, f'sepsis_ppo_model_{i_episode}.pth')
            torch.save(ppo.policy.state_dict(), model_path)
            print(f"Modèle sauvegardé: {model_path}")

        # Arrêt anticipé si la performance est suffisamment bonne
        if running_reward > 95:
            print(f"Problème résolu en {i_episode} épisodes!")
            early_stop = True
            break

    # Sauvegarde du modèle final
    final_model_path = os.path.join(MODEL_DIR, 'sepsis_ppo_model_final.pth')
    torch.save(ppo.policy.state_dict(), final_model_path)
    print(f"Modèle final sauvegardé: {final_model_path}")

    # Tracer la courbe d'apprentissage
    plt.figure(figsize=(10, 5))
    plt.plot(avg_rewards)
    plt.title('Courbe d\'apprentissage PPO')
    plt.xlabel('Episodes')
    plt.ylabel('Récompense moyenne')
    plt.savefig('learning_curve.png')
    plt.close()

    return ppo

# Fonction d'évaluation
def evaluate(policy, env, num_episodes=10):
    all_rewards = []
    all_actions = []
    all_states = []

    for i in range(num_episodes):
        state = env.reset()
        episode_reward = 0
        episode_actions = []
        episode_states = []

        done = False
        while not done:
            # Convertir l'état en tensor et sélectionner une action
            state_tensor = torch.FloatTensor(state).to(DEVICE)
            shared_features = policy.shared(state_tensor)
            action_probs = policy.actor(shared_features)
            dist = Categorical(action_probs)
            action = dist.sample().item()

            # Exécuter l'action
            next_state, reward, done, info = env.step(action)

            # Stocker les informations
            episode_reward += reward
            episode_actions.append(action)
            episode_states.append(state)

            # Passer à l'état suivant
            state = next_state

        all_rewards.append(episode_reward)
        all_actions.append(episode_actions)
        all_states.append(episode_states)

    avg_reward = sum(all_rewards) / len(all_rewards)
    print(f"Récompense moyenne sur {num_episodes} épisodes: {avg_reward:.2f}")

    # Analyser les actions prises
    action_counts = {}
    for actions in all_actions:
        for a in actions:
            if a not in action_counts:
                action_counts[a] = 0
            action_counts[a] += 1

    total_actions = sum(action_counts.values())
    print("\nDistribution des actions:")
    for a, count in sorted(action_counts.items()):
        percentage = (count / total_actions) * 100
        print(f"  {a} ({env.action_meanings[a]}): {count} ({percentage:.1f}%)")

    return avg_reward, action_counts, all_states, all_rewards

# Fonction pour exécuter l'agent en production
def run_agent(model_path, data_path):
    # Charger les données
    data = pd.read_csv(data_path)

    # Créer l'environnement
    env = SepsisEnv(data)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n

    # Créer et charger le modèle
    policy = ActorCritic(state_dim, action_dim).to(DEVICE)
    policy.load_state_dict(torch.load(model_path))
    policy.eval()

    # Évaluer le modèle
    avg_reward, action_counts, states, rewards = evaluate(policy, env, num_episodes=100)

    return avg_reward, action_counts

# Exécuter l'entraînement si lancé directement
if __name__ == "__main__":
    # Entraîner l'agent
    trained_policy = train()

    # Créer l'environnement d'évaluation
    data = pd.read_csv(DATA_PATH)
    eval_env = SepsisEnv(data)

    # Évaluer le modèle final
    print("\nÉvaluation du modèle final:")
    avg_reward, action_counts, states, rewards = evaluate(trained_policy.policy, eval_env)

    print("\nEntraînement et évaluation terminés!")

Using device: cpu
Démarrage de l'entraînement...
Données chargées: (500, 11)
Episode 10	Average Reward: -0.39
Episode 20	Average Reward: -0.53
Episode 30	Average Reward: -0.63
Episode 40	Average Reward: -0.73
Episode 50	Average Reward: -0.77
Episode 60	Average Reward: -0.86
Episode 70	Average Reward: -0.75


  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


Episode 80	Average Reward: -0.73
Episode 90	Average Reward: -0.73
Episode 100	Average Reward: -0.75
Modèle sauvegardé: models/sepsis_ppo_model_100.pth
Episode 110	Average Reward: -0.73
Episode 120	Average Reward: -0.70
Episode 130	Average Reward: -0.82
Episode 140	Average Reward: -0.76
Episode 150	Average Reward: -0.82
Episode 160	Average Reward: -0.79
Episode 170	Average Reward: -0.76
Episode 180	Average Reward: -0.78
Episode 190	Average Reward: -1.00
Episode 200	Average Reward: -0.92
Modèle sauvegardé: models/sepsis_ppo_model_200.pth
Episode 210	Average Reward: -0.80
Episode 220	Average Reward: -0.81
Episode 230	Average Reward: -0.84
Episode 240	Average Reward: -0.83
Episode 250	Average Reward: -0.85
Episode 260	Average Reward: -0.86
Episode 270	Average Reward: -0.77
Episode 280	Average Reward: -0.87
Episode 290	Average Reward: -0.89
Episode 300	Average Reward: -0.87
Modèle sauvegardé: models/sepsis_ppo_model_300.pth
Episode 310	Average Reward: -0.92
Episode 320	Average Reward: -0.93

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [32]:
def evaluate(policy, env, num_episodes=100): # Increased episodes for better statistics
    # This function remains as it was in the previous turn, performing the evaluation
    # and printing the results including reward, action distribution, and intervention analysis.
    # We will call this function from the new reporting function.

    print(f"\n--- Running Evaluation on {num_episodes} episodes ---")
    all_rewards = []
    all_actions_taken = [] # Store the actual action index
    all_state_labels = [] # Store the state label for each step

    # Analysis counters
    at_risk_steps = 0
    sepsis_steps = 0
    total_steps = 0
    timely_interventions = 0 # Actions 1, 2, 4, 5 when AtRisk
    sepsis_treatments = 0 # Actions 1, 2, 3, 5 when Sepsis
    missed_sepsis_treatments = 0 # Action 0 when Sepsis

    # Define which actions are considered interventions
    # intervention_actions = [1, 2, 3, 4, 5] # All except "Do nothing"

    # Define appropriate actions per state for analysis
    appropriate_at_risk_actions = [1, 2, 4, 5] # Fluids, Antibiotics, Alert, Combo
    appropriate_sepsis_actions = [1, 2, 3, 5] # Fluids, Antibiotics, Vasopressors, Combo

    for i_episode in range(num_episodes):
        state = env.reset()
        episode_reward = 0
        episode_actions = []
        episode_state_labels = []

        done = False
        while not done:
            # Select an action using the policy (in evaluation mode)
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(DEVICE) # Add batch dim
            # Need to get action_probs to sample, or just take the argmax for deterministic eval
            # For evaluation metrics like action distribution, sampling is better.
            # For comparing performance, deterministic (argmax) might be used.
            # Let's sample for action distribution analysis.
            with torch.no_grad(): # No gradients needed during evaluation
                 shared_features = policy.shared(state_tensor)
                 action_probs = policy.actor(shared_features)
                 dist = Categorical(action_probs)
                 action = dist.sample().item() # Sample action

            # Exécuter l'action
            next_state, reward, done, info = env.step(action)

            # Store information for analysis
            episode_reward += reward
            episode_actions.append(action)
            episode_state_labels.append(info['state_label']) # Store the label of the state *before* the action was taken

            # Count state occurrences and interventions
            total_steps += 1
            if info['state_label'] == "AtRisk":
                at_risk_steps += 1
                if action in appropriate_at_risk_actions:
                    timely_interventions += 1
            elif info['state_label'] == "Sepsis":
                sepsis_steps += 1
                if action in appropriate_sepsis_actions:
                    sepsis_treatments += 1
                if action == 0:
                     missed_sepsis_treatments += 1


            # Move to the next state
            state = next_state

        all_rewards.append(episode_reward)
        all_actions_taken.extend(episode_actions) # Flatten the list of actions
        all_state_labels.extend(episode_state_labels) # Flatten the list of state labels

    # --- Print the metrics ---
    print(f"\n--- Evaluation Results ---")

    # 📈 Cumulative reward per episode
    avg_reward = sum(all_rewards) / len(all_rewards) if all_rewards else 0
    print(f"\n📈 Average Cumulative Reward per Episode: {avg_reward:.2f}")

    # Action Distribution
    action_counts = {}
    for a in all_actions_taken:
        action_counts[a] = action_counts.get(a, 0) + 1

    print("\n📊 Action Distribution (total over all steps):")
    total_actions = len(all_actions_taken)
    for a in sorted(env.action_meanings.keys()):
         count = action_counts.get(a, 0)
         percentage = (count / total_actions) * 100 if total_actions > 0 else 0
         print(f"  {a} ({env.action_meanings[a]}): {count} ({percentage:.1f}%)")

    # 🕒 Number of timely interventions made (Proxy) & Sepsis Treatment
    print("\n🔬 Analysis of Interventions based on State:")

    print(f"Total steps evaluated: {total_steps}")
    print(f"Steps in 'AtRisk' state: {at_risk_steps}")
    print(f"Steps in 'Sepsis' state: {sepsis_steps}")

    # Timely interventions when AtRisk
    timely_intervention_rate = (timely_interventions / at_risk_steps) * 100 if at_risk_steps > 0 else 0
    print(f"🕒 Timely interventions (appropriate actions) when 'AtRisk': {timely_interventions} ({timely_intervention_rate:.1f}%)")
    print("(Note: This is a proxy based on state label, not true clinical timeliness)")

    # Sepsis treatments when Sepsis
    sepsis_treatment_rate = (sepsis_treatments / sepsis_steps) * 100 if sepsis_steps > 0 else 0
    print(f"Appropriate sepsis treatments when 'Sepsis': {sepsis_treatments} ({sepsis_treatment_rate:.1f}%)")

    # Missed sepsis treatments (inaction) when Sepsis
    missed_sepsis_treatment_rate = (missed_sepsis_treatments / sepsis_steps) * 100 if sepsis_steps > 0 else 0
    print(f"Inaction ('Ne rien faire') when 'Sepsis': {missed_sepsis_treatments} ({missed_sepsis_treatment_rate:.1f}%)")

    # 🔍 AUC for early sepsis detection (Discussion)
    print("\n🔍 AUC for Early Sepsis Detection:")
    print("Note: Calculating a true AUC is not directly feasible with this environment and agent structure.")
    print("It requires a model outputting a sepsis risk score and a dynamic environment with future outcomes.")

    # 💀 Mortality rate reduction (Discussion)
    print("\n💀 Mortality Rate Reduction:")
    print("Note: Calculating mortality rate reduction is not possible as the environment does not simulate mortality.")

    print(f"\n--- Evaluation Complete ---")

    # You can return these metrics if you want to process them further outside this function
    return avg_reward, action_counts, timely_intervention_rate, sepsis_treatment_rate, missed_sepsis_treatment_rate


def report_evaluation_metrics(policy, env, num_episodes=100):
    """
    Dedicated function to evaluate the trained policy and report the specified metrics.

    Args:
        policy: The trained ActorCritic policy model.
        env: The SepsisEnv environment configured for evaluation.
        num_episodes: Number of episodes to run for evaluation statistics.
    """
    print("\n" + "="*60)
    print("         📊 Reporting Evaluation Metrics 📊")
    print("="*60)

    # The 'evaluate' function already calculates and prints the required metrics
    # including Average Cumulative Reward, Action Distribution, Timely Interventions
    # (proxy), Sepsis Treatment analysis, and notes on AUC/Mortality limitations.
    # We just need to call it here.

    evaluate(policy.policy, env, num_episodes=num_episodes)

    print("\n" + "="*60)
    print("       ✅ Evaluation Reporting Complete ✅")
    print("="*60)

report_evaluation_metrics(trained_policy, eval_env, num_episodes=200)


         📊 Reporting Evaluation Metrics 📊

--- Running Evaluation on 200 episodes ---

--- Evaluation Results ---

📈 Average Cumulative Reward per Episode: -0.83

📊 Action Distribution (total over all steps):
  0 (Ne rien faire): 44 (7.9%)
  1 (Administrer fluides IV): 55 (9.9%)
  2 (Administrer antibiotiques): 62 (11.2%)
  3 (Administrer vasopresseurs): 289 (52.0%)
  4 (Alerter clinicien): 3 (0.5%)
  5 (Combo (fluides + antibiotiques)): 103 (18.5%)

🔬 Analysis of Interventions based on State:
Total steps evaluated: 556
Steps in 'AtRisk' state: 174
Steps in 'Sepsis' state: 200
🕒 Timely interventions (appropriate actions) when 'AtRisk': 65 (37.4%)
(Note: This is a proxy based on state label, not true clinical timeliness)
Appropriate sepsis treatments when 'Sepsis': 181 (90.5%)
Inaction ('Ne rien faire') when 'Sepsis': 17 (8.5%)

🔍 AUC for Early Sepsis Detection:
Note: Calculating a true AUC is not directly feasible with this environment and agent structure.
It requires a model outputti