# Patient-Level ICU Mortality Analysis

Analyze ICU mortality rate on patient level for both datasets.


In [None]:
# Setup
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Add project root to path
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 11


In [None]:
# Import required modules
from src.data.ecg.ecg_loader import build_npy_index
from src.data.ecg.ecg_dataset import extract_subject_id_from_path
from src.data.labeling import load_icustays
from typing import Set, Dict, List
import pandas as pd

# Define functions
def get_unique_patients_from_dataset(data_dir: Path) -> Set[int]:
    """Extract unique subject_ids from ECG files in a dataset directory."""
    if not data_dir.exists():
        return set()
    
    print(f"  Scanning ECG files in: {data_dir}")
    records = build_npy_index(data_dir=str(data_dir))
    print(f"  Found {len(records):,} ECG files")
    
    unique_patients = set()
    for record in records:
        base_path = record["base_path"]
        try:
            subject_id = extract_subject_id_from_path(base_path)
            unique_patients.add(subject_id)
        except Exception:
            continue
    
    print(f"  Extracted {len(unique_patients):,} unique patients")
    return unique_patients

def analyze_patient_mortality(
    dataset_name: str,
    data_dir: Path,
    icustays_df: pd.DataFrame,
    admissions_df: pd.DataFrame
) -> Dict:
    """Calculate ICU mortality rate on patient level."""
    print(f"\n{'='*80}")
    print(f"Analyzing dataset: {dataset_name}")
    print(f"{'='*80}")
    
    unique_patients = get_unique_patients_from_dataset(data_dir)
    
    if len(unique_patients) == 0:
        print(f"  Warning: No patients found in dataset {dataset_name}")
        return None
    
    dataset_icustays = icustays_df[icustays_df['subject_id'].isin(unique_patients)].copy()
    dataset_icustays['intime'] = pd.to_datetime(dataset_icustays['intime'])
    dataset_icustays['outtime'] = pd.to_datetime(dataset_icustays['outtime'])
    
    print(f"  Found ICU stays for {len(dataset_icustays):,} patients")
    
    admissions_with_deathtime = admissions_df[['hadm_id', 'deathtime']].copy()
    admissions_with_deathtime['deathtime'] = pd.to_datetime(admissions_with_deathtime['deathtime'], errors='coerce')
    
    icustays_with_death = dataset_icustays.merge(
        admissions_with_deathtime,
        on='hadm_id',
        how='left'
    )
    
    icustays_with_death['died_in_icu'] = (
        icustays_with_death['deathtime'].notna() &
        (icustays_with_death['deathtime'] >= icustays_with_death['intime']) &
        (icustays_with_death['deathtime'] <= icustays_with_death['outtime'])
    )
    
    patient_died_in_icu = icustays_with_death.groupby('subject_id')['died_in_icu'].any()
    
    died_patients = patient_died_in_icu.sum()
    alive_patients = len(patient_died_in_icu) - died_patients
    total_patients = len(patient_died_in_icu)
    mortality_rate = (died_patients / total_patients * 100) if total_patients > 0 else 0.0
    
    print(f"\n  Results for {dataset_name}:")
    print(f"    Total patients: {total_patients:,}")
    print(f"    Died: {died_patients:,} ({mortality_rate:.2f}%)")
    print(f"    Survived: {alive_patients:,} ({100 - mortality_rate:.2f}%)")
    
    return {
        'dataset_name': dataset_name,
        'total_patients': total_patients,
        'died_patients': died_patients,
        'alive_patients': alive_patients,
        'mortality_rate': mortality_rate
    }


In [None]:
# Load data
icustays_path = "data/labeling/labels_csv/icustays.csv"
admissions_path = "data/labeling/labels_csv/admissions.csv"

icustays_df = load_icustays(icustays_path)
admissions_df = pd.read_csv(admissions_path)

print(f"Loaded {len(icustays_df):,} ICU stays")
print(f"Loaded {len(admissions_df):,} admissions")


In [None]:
# Analyze both datasets
datasets = {
    'all_icu_ecgs': {
        'name': 'All ICU ECGs',
        'data_dir': Path('data/all_icu_ecgs_P1')
    },
    'icu_24h': {
        'name': 'ICU 24h',
        'data_dir': Path('data/icu_ecgs_24h')
    }
}

results = []
for dataset_key, dataset_info in datasets.items():
    if dataset_info['data_dir'].exists():
        result = analyze_patient_mortality(
            dataset_name=dataset_info['name'],
            data_dir=dataset_info['data_dir'],
            icustays_df=icustays_df,
            admissions_df=admissions_df
        )
        if result:
            results.append(result)
    else:
        print(f"Dataset {dataset_info['name']} not found at {dataset_info['data_dir']}")


In [None]:
# Visualize results
if results:
    fig, ax = plt.subplots(figsize=(10, 6))
    
    datasets_names = [r['dataset_name'] for r in results]
    mortality_rates = [r['mortality_rate'] for r in results]
    
    bars = ax.bar(datasets_names, mortality_rates, color=['steelblue', 'coral'], alpha=0.7)
    
    ax.set_ylabel('Mortality Rate (%)', fontsize=12)
    ax.set_title('ICU Mortality Rate Comparison (Patient-Level)', fontsize=14, fontweight='bold')
    ax.set_ylim(0, max(mortality_rates) * 1.2)
    ax.grid(axis='y', alpha=0.3)
    
    # Add value labels
    for bar, rate, result in zip(bars, mortality_rates, results):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{rate:.2f}%\n({result["died_patients"]:,}/{result["total_patients"]:,})',
                ha='center', va='bottom', fontsize=11, fontweight='bold')
    
    plt.tight_layout()
    plt.show()
    
    # Print summary table
    print("\n" + "="*60)
    print("SUMMARY TABLE")
    print("="*60)
    df = pd.DataFrame(results)
    print(df.to_string(index=False))
