# ICU Statistics Visualization

Plot ICU statistics: number of patients and length of stay.


In [None]:
# Setup
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Add project root to path
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 11


In [None]:
# Import required modules
import numpy as np

# Define functions
def load_and_analyze_icu_data(icustays_path: str) -> pd.DataFrame:
    """Load ICU stays data and calculate statistics."""
    print(f"Loading ICU stays from {icustays_path}...")
    df = pd.read_csv(icustays_path)
    
    df['intime'] = pd.to_datetime(df['intime'])
    df['outtime'] = pd.to_datetime(df['outtime'])
    
    print(f"Loaded {len(df)} ICU stays")
    print(f"Unique patients (subject_id): {df['subject_id'].nunique()}")
    print(f"Average length of stay: {df['los'].mean():.2f} days")
    print(f"Median length of stay: {df['los'].median():.2f} days")
    
    patient_stats_temp = df.groupby('subject_id')['los'].sum()
    patients_3plus = (patient_stats_temp >= 3).sum()
    print(f"Patients with >=3 days total in ICU (across all stays): {patients_3plus:,} ({patients_3plus/len(patient_stats_temp)*100:.1f}%)")
    
    return df

def create_plots(df: pd.DataFrame, output_dir: str = None, return_figure: bool = True):
    """Create visualization plots for ICU statistics."""
    if output_dir:
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
    
    patient_stats = df.groupby('subject_id').agg({
        'stay_id': 'count',
        'los': 'sum'
    }).rename(columns={'stay_id': 'num_stays', 'los': 'total_days'})
    
    patients_3plus_days = (patient_stats['total_days'] >= 3).sum()
    patients_less_3_days = (patient_stats['total_days'] < 3).sum()
    pct_3plus = (patients_3plus_days / len(patient_stats)) * 100
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle('ICU Statistics: Patients and Length of Stay', fontsize=16, fontweight='bold')
    
    ax1 = axes[0, 0]
    stay_counts = patient_stats['num_stays'].value_counts().sort_index()
    ax1.bar(stay_counts.index, stay_counts.values, color='steelblue', alpha=0.7)
    ax1.set_xlabel('Number of ICU Stays per Patient', fontsize=11)
    ax1.set_ylabel('Number of Patients', fontsize=11)
    ax1.set_title('Distribution: ICU Stays per Patient', fontsize=12, fontweight='bold')
    ax1.grid(axis='y', alpha=0.3)
    for x, y in zip(stay_counts.index, stay_counts.values):
        ax1.text(x, y, str(y), ha='center', va='bottom', fontsize=9)
    
    ax2 = axes[0, 1]
    bins = np.linspace(0, patient_stats['total_days'].max(), 50)
    ax2.hist(patient_stats['total_days'], bins=bins, color='coral', alpha=0.7, edgecolor='black')
    ax2.set_xlabel('Total Days in ICU per Patient (sum across all stays)', fontsize=11)
    ax2.set_ylabel('Number of Patients', fontsize=11)
    ax2.set_title('Distribution: Total ICU Days per Patient (across all stays)', fontsize=12, fontweight='bold')
    ax2.grid(axis='y', alpha=0.3)
    median_days = patient_stats['total_days'].median()
    mean_days = patient_stats['total_days'].mean()
    ax2.axvline(median_days, color='red', linestyle='--', linewidth=2, label=f'Median: {median_days:.1f} days')
    ax2.axvline(mean_days, color='green', linestyle='--', linewidth=2, label=f'Mean: {mean_days:.1f} days')
    ax2.axvline(3, color='purple', linestyle=':', linewidth=2, label='3 days threshold')
    ax2.legend()
    
    ax3 = axes[1, 0]
    bins_los = np.linspace(0, df['los'].quantile(0.95), 50)
    ax3.hist(df['los'], bins=bins_los, color='mediumseagreen', alpha=0.7, edgecolor='black')
    ax3.set_xlabel('Length of Stay (days)', fontsize=11)
    ax3.set_ylabel('Number of ICU Stays', fontsize=11)
    ax3.set_title('Distribution: Length of Stay per ICU Stay', fontsize=12, fontweight='bold')
    ax3.grid(axis='y', alpha=0.3)
    median_los = df['los'].median()
    mean_los = df['los'].mean()
    ax3.axvline(median_los, color='red', linestyle='--', linewidth=2, label=f'Median: {median_los:.2f} days')
    ax3.axvline(mean_los, color='green', linestyle='--', linewidth=2, label=f'Mean: {mean_los:.2f} days')
    ax3.legend()
    
    ax4 = axes[1, 1]
    ax4.axis('off')
    summary_data = [
        ['Total ICU Stays', f"{len(df):,}"],
        ['Unique Patients', f"{df['subject_id'].nunique():,}"],
        ['Patients with 1 stay', f"{(patient_stats['num_stays'] == 1).sum():,}"],
        ['Patients with >1 stay', f"{(patient_stats['num_stays'] > 1).sum():,}"],
        ['', ''],
        ['Patients by Total ICU Days:', ''],
        ['  < 3 days (total)', f"{patients_less_3_days:,} ({100-pct_3plus:.1f}%)"],
        ['  >= 3 days (total)', f"{patients_3plus_days:,} ({pct_3plus:.1f}%)"],
        ['', ''],
        ['Length of Stay (per stay):', ''],
        ['  Mean', f"{df['los'].mean():.2f} days"],
        ['  Median', f"{df['los'].median():.2f} days"],
        ['  Min', f"{df['los'].min():.2f} days"],
        ['  Max', f"{df['los'].max():.2f} days"],
        ['  Q25', f"{df['los'].quantile(0.25):.2f} days"],
        ['  Q75', f"{df['los'].quantile(0.75):.2f} days"],
        ['', ''],
        ['Total ICU Days (per patient):', ''],
        ['  Mean', f"{patient_stats['total_days'].mean():.2f} days"],
        ['  Median', f"{patient_stats['total_days'].median():.2f} days"],
        ['  Min', f"{patient_stats['total_days'].min():.2f} days"],
        ['  Max', f"{patient_stats['total_days'].max():.2f} days"],
    ]
    
    table = ax4.table(cellText=summary_data, cellLoc='left', loc='center',
                     colWidths=[0.65, 0.35], bbox=[0, 0, 1, 0.95])
    table.auto_set_font_size(False)
    table.set_fontsize(11)
    table.scale(1, 2.2)
    
    for i in range(len(summary_data)):
        for j in range(2):
            cell = table[(i, j)]
            cell.set_edgecolor('lightgray')
            cell.set_linewidth(0.5)
            if summary_data[i][j] and (summary_data[i][j][0].isdigit() or summary_data[i][j].startswith('  ')):
                cell.set_text_props(fontsize=10, weight='normal')
            elif summary_data[i][j] and ':' in summary_data[i][j]:
                cell.set_text_props(fontsize=11, weight='bold')
                cell.set_facecolor('#E8F5E9')
            elif summary_data[i][j]:
                cell.set_text_props(fontsize=10, weight='normal')
    
    for i in range(2):
        cell = table[(0, i)]
        cell.set_facecolor('#4CAF50')
        cell.set_text_props(weight='bold', color='white', fontsize=11)
    
    plt.tight_layout()
    
    if output_dir:
        output_file = output_path / 'icu_statistics.png'
        plt.savefig(output_file, dpi=300, bbox_inches='tight')
        print(f"\nPlot saved to: {output_file}")
    
    if return_figure:
        return fig, patient_stats
    else:
        plt.close()
        return patient_stats


In [None]:
# Load and analyze data
icustays_path = "data/labeling/labels_csv/icustays.csv"
df = load_and_analyze_icu_data(icustays_path)


In [None]:
# Create visualizations (display inline)
fig, patient_stats = create_plots(df, return_figure=True)
plt.show()

# Display patient statistics
print("\n" + "="*60)
print("PATIENT STATISTICS")
print("="*60)
print(patient_stats.to_string())
