# ECG ICU Location Analysis

Analyze how many ECGs were actually taken in ICU vs other locations.


In [None]:
# Setup
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add project root to path
project_root = Path().resolve().parent.parent
sys.path.insert(0, str(project_root))

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11


In [None]:
# Load filtered data
filtered_path = Path("data/labeling/labels_csv/records_w_diag_icd10_filtered_icu.csv")

if filtered_path.exists():
    filtered = pd.read_csv(filtered_path)
    print(f"Loaded {len(filtered):,} filtered records")
else:
    print(f"File not found: {filtered_path}")
    filtered = None


In [None]:
# Analyze location columns
if filtered is not None:
    # Check available columns
    location_cols = [c for c in filtered.columns if 'ed' in c.lower() or 'hosp' in c.lower() or 'icu' in c.lower()]
    print("Available location columns:")
    print(location_cols)
    
    # Analyze location data
    location_stats = {}
    
    if 'ecg_taken_in_ed' in filtered.columns:
        ed_count = filtered['ecg_taken_in_ed'].sum()
        location_stats['ED'] = ed_count
        print(f"\nECGs taken in ED: {ed_count:,} ({ed_count/len(filtered)*100:.1f}%)")
    
    if 'ecg_taken_in_hosp' in filtered.columns:
        hosp_count = filtered['ecg_taken_in_hosp'].sum()
        location_stats['Hospital'] = hosp_count
        print(f"ECGs taken in hospital: {hosp_count:,} ({hosp_count/len(filtered)*100:.1f}%)")
    
    if 'ecg_taken_in_ed_or_hosp' in filtered.columns:
        ed_or_hosp_count = filtered['ecg_taken_in_ed_or_hosp'].sum()
        location_stats['ED or Hospital'] = ed_or_hosp_count
        print(f"ECGs taken in ED or hospital: {ed_or_hosp_count:,} ({ed_or_hosp_count/len(filtered)*100:.1f}%)")
    
    # Visualize
    if location_stats:
        fig, ax = plt.subplots(figsize=(10, 6))
        locations = list(location_stats.keys())
        counts = list(location_stats.values())
        percentages = [c / len(filtered) * 100 for c in counts]
        
        bars = ax.bar(locations, counts, color=['steelblue', 'coral', 'mediumseagreen'], alpha=0.7)
        ax.set_ylabel('Number of ECGs', fontsize=12)
        ax.set_title('ECG Location Distribution', fontsize=14, fontweight='bold')
        ax.grid(axis='y', alpha=0.3)
        
        # Add value labels
        for bar, count, pct in zip(bars, counts, percentages):
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                    f'{count:,}\n({pct:.1f}%)',
                    ha='center', va='bottom', fontsize=11)
        
        plt.tight_layout()
        plt.show()
    
    # Show sample data
    print("\n" + "="*60)
    print("SAMPLE DATA")
    print("="*60)
    sample_cols = ['subject_id', 'study_id', 'ecg_time']
    if 'ecg_taken_in_ed' in filtered.columns:
        sample_cols.append('ecg_taken_in_ed')
    if 'ecg_taken_in_hosp' in filtered.columns:
        sample_cols.append('ecg_taken_in_hosp')
    
    print(filtered[sample_cols].head(10))
