In [33]:
# === Cell 0: Paths & Imports ===
import os, re, sqlite3
import pandas as pd
import numpy as np

# ▼▼▼ 여기만 환경에 맞게 수정 ▼▼▼
DB_PATH = "/content/drive/MyDrive/Colab Notebooks/MIMIC4-hosp-icu.db"   # SQLite 파일 경로
COHORT_CSV = "/content/drive/MyDrive/Colab Notebooks/cohort.csv"        # 팀원이 만든 코호트 CSV
SAVE_PATH  = "/content/drive/MyDrive/Colab Notebooks/final_cohort_with_timevars.csv"  # 결과 저장 경로
# ▲▲▲ 여기만 수정 ▲▲▲
conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)  # 읽기 전용
conn.execute("PRAGMA journal_mode=OFF;")
conn.execute("PRAGMA synchronous=OFF;")
conn.execute("PRAGMA temp_store=MEMORY;")
conn.execute("PRAGMA cache_size=-500000;")  # 약 500MB 캐시
conn.commit()

pd.options.display.max_rows = 10
pd.options.display.max_columns = 200
print("Paths set.\nDB:", DB_PATH, "\nCOHORT:", COHORT_CSV)


Paths set.
DB: /content/drive/MyDrive/Colab Notebooks/MIMIC4-hosp-icu.db 
COHORT: /content/drive/MyDrive/Colab Notebooks/cohort.csv


In [34]:
# === Cell 1: Load cohort.csv ===
cohort = pd.read_csv(COHORT_CSV)

# 시간 컬럼 자동 파싱
for c in cohort.columns:
    if any(k in c.lower() for k in ["time","date","intime","outtime","admit","disch","ecg"]):
        cohort[c] = pd.to_datetime(cohort[c], errors="coerce")

print("cohort shape:", cohort.shape)
display(cohort.head())
print("\nColumns:", list(cohort.columns))

cohort shape: (465, 11)


Unnamed: 0,subject_id,hadm_id,stay_id,intime,outtime,gender,race,arrival_transport,disposition,first_ecg_time,door_to_ecg_min
0,10013310,27682188,35160955,2153-05-06 10:21:00,2153-05-06 18:28:00,F,BLACK/AFRICAN,AMBULANCE,ADMITTED,2153-05-06 10:25:00,1970-01-01 00:00:00.000000004
1,10033552,26487381,32542054,2136-02-29 22:09:00,2136-03-01 02:26:37,F,OTHER,AMBULANCE,ADMITTED,2136-02-29 22:21:00,1970-01-01 00:00:00.000000012
2,10055344,29209451,39410952,2171-10-31 14:50:00,2171-10-31 19:38:25,M,BLACK/CAPE VERDEAN,UNKNOWN,OTHER,2171-10-31 17:59:00,1970-01-01 00:00:00.000000189
3,10096420,25396519,39083351,2204-07-16 12:11:00,2204-07-16 13:58:56,M,WHITE,AMBULANCE,OTHER,2204-07-16 12:19:00,1970-01-01 00:00:00.000000008
4,10130348,24286651,38674082,2197-05-27 06:54:00,2197-05-27 10:41:05,F,BLACK/AFRICAN AMERICAN,WALK IN,ADMITTED,2197-05-27 07:07:00,1970-01-01 00:00:00.000000013



Columns: ['subject_id', 'hadm_id', 'stay_id', 'intime', 'outtime', 'gender', 'race', 'arrival_transport', 'disposition', 'first_ecg_time', 'door_to_ecg_min']


In [35]:
# === Cell 2: admissions merge & Boarding Delay ===
conn = sqlite3.connect(DB_PATH)

admissions = pd.read_sql("""
    SELECT subject_id, hadm_id, admittime, dischtime, admission_location
    FROM admissions
""", conn, parse_dates=["admittime","dischtime"])

conn.close()

cohort2 = cohort.merge(
    admissions, on=["subject_id","hadm_id"], how="left"
)

# Boarding Delay = (admittime - outtime) [분]
cohort2["boarding_delay_min"] = (cohort2["admittime"] - cohort2["outtime"]).dt.total_seconds() / 60.0

print("After admissions merge:", cohort2.shape)
display(cohort2[["subject_id","hadm_id","intime","outtime","admittime","boarding_delay_min"]].head())

After admissions merge: (465, 15)


Unnamed: 0,subject_id,hadm_id,intime,outtime,admittime,boarding_delay_min
0,10013310,27682188,2153-05-06 10:21:00,2153-05-06 18:28:00,2153-05-06 18:03:00,-25.0
1,10033552,26487381,2136-02-29 22:09:00,2136-03-01 02:26:37,2136-03-01 02:25:00,-1.616667
2,10055344,29209451,2171-10-31 14:50:00,2171-10-31 19:38:25,2171-10-31 19:37:00,-1.416667
3,10096420,25396519,2204-07-16 12:11:00,2204-07-16 13:58:56,2204-07-16 13:58:00,-0.933333
4,10130348,24286651,2197-05-27 06:54:00,2197-05-27 10:41:05,2197-05-27 10:40:00,-1.083333


In [36]:
# === Cell 3C: Lab TAT without to_sql — batched read + Python window filter ===
import sqlite3, re, numpy as np, pandas as pd

conn = sqlite3.connect(DB_PATH)

# 1) 타겟 라벨 → itemid
d_labitems = pd.read_sql("SELECT itemid, label FROM d_labitems", conn)
lab_targets = d_labitems[d_labitems["label"].str.contains(r"(?:troponin|ck[-\s]?mb)", flags=re.I, regex=True, na=False)].copy()
if lab_targets.empty:
    lab_targets = pd.DataFrame({"itemid":[51002,51003,50908,52642,51580]})
target_itemids = sorted(set(lab_targets["itemid"].tolist()))
print("Lab target itemids:", target_itemids)

# 2) 코호트 시간창 준비 (ED intime ~ admit+24h)
tmp = cohort2.dropna(subset=["intime","admittime"])[["subject_id","hadm_id","intime","admittime"]].drop_duplicates().copy()
tmp["win_start"] = tmp["intime"]
tmp["win_end"]   = tmp["admittime"] + pd.Timedelta(hours=24)

# 3) hadm_id 배치로 나눠서 labevents 읽기 (SQL에 시간창 조건 안걸고 Python에서 필터)
hadms = tmp["hadm_id"].dropna().unique().tolist()
hadms.sort()
BATCH = 1500  # 환경에 맞게 500~3000 사이 조절

all_parts = []
for i in range(0, len(hadms), BATCH):
    batch = hadms[i:i+BATCH]
    placeholders = ",".join(["?"]*len(batch))
    sql = f"""
        SELECT subject_id, hadm_id, charttime, storetime, itemid
        FROM labevents
        WHERE itemid IN ({",".join(map(str, target_itemids))})
          AND hadm_id IN ({placeholders})
    """
    le = pd.read_sql(sql, conn, params=batch, parse_dates=["charttime","storetime"])
    if le.empty:
        print(f"batch {i//BATCH+1}: rows=0")
        continue

    # 윈도우 조인(파이썬 병합 후 시간필터)
    merged = le.merge(tmp[["subject_id","hadm_id","win_start","win_end"]],
                      on=["subject_id","hadm_id"], how="inner")

    in_win = merged[
        (merged["charttime"].between(merged["win_start"], merged["win_end"])) |
        (merged["storetime"].between(merged["win_start"], merged["win_end"]))
    ].copy()

    if not in_win.empty:
        in_win["lab_tat_min"] = (in_win["storetime"] - in_win["charttime"]).dt.total_seconds()/60.0
        all_parts.append(in_win[["subject_id","hadm_id","lab_tat_min"]])

    print(f"batch {i//BATCH+1}: rows={len(le)}, in_window={0 if in_win is None else len(in_win)}")

conn.close()

if all_parts:
    lab_rows = pd.concat(all_parts, ignore_index=True)
    lab_tat  = lab_rows.groupby(["subject_id","hadm_id"], as_index=False)["lab_tat_min"].median()
else:
    lab_rows = pd.DataFrame(columns=["subject_id","hadm_id","lab_tat_min"])
    lab_tat  = lab_rows.copy()

print("Lab TAT available hadm:", len(lab_tat))
display(lab_tat.head())
labtat_df = lab_tat.copy()  # cols: subject_id, hadm_id, lab_tat_min

Lab target itemids: [50908, 51002, 51003, 51580, 52642]
batch 1: rows=2007, in_window=1390
Lab TAT available hadm: 433


Unnamed: 0,subject_id,hadm_id,lab_tat_min
0,10013310,27682188,61.0
1,10033552,26487381,107.0
2,10055344,29209451,142.5
3,10096420,25396519,142.0
4,10130348,24286651,90.5


In [37]:
# === Cell 4: Door-to-Antithrombotic (IV) — batched, no writes ===
import re, sqlite3

conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)

# 코호트의 hadm 목록 (ED+admission이 붙은 cohort2 가정)
hadms = cohort2["hadm_id"].dropna().unique().tolist()
hadms.sort()

BATCH = 2000  # 필요시 500~5000 범위에서 조절
parts = []

# 대표 키워드 (필요시 병원 약물명 추가)
drug_keywords = [
    r"\bheparin\b", r"\bbivalirudin\b", r"\beptifibatide\b",
    r"\babciximab\b", r"\btirofiban\b", r"\bclopidogrel\b", r"\baspirin\b"
]
pat = re.compile("|".join(drug_keywords), flags=re.I)

for i in range(0, len(hadms), BATCH):
    batch = hadms[i:i+BATCH]
    placeholders = ",".join(["?"]*len(batch))
    sql = f"""
        SELECT subject_id, hadm_id, starttime, drug, route
        FROM prescriptions
        WHERE hadm_id IN ({placeholders})
          AND starttime IS NOT NULL
    """
    df = pd.read_sql(sql, conn, params=batch, parse_dates=["starttime"])
    if df.empty:
        print(f"rx batch {i//BATCH+1}: rows=0")
        continue

    df["drug_lc"] = df["drug"].astype(str).str.lower()
    df["route_lc"] = df["route"].astype(str).str.lower()

    # IV + 항혈전/항혈소판 후보
    cand = df[
        df["drug_lc"].str.contains(pat, na=False) &
        df["route_lc"].str.contains(r"\biv\b|intravenous", na=False)
    ].copy()

    if cand.empty:
        print(f"rx batch {i//BATCH+1}: rows={len(df)}, hits=0")
        continue

    # ED intime과 병합해 시간차 계산
    cand = cand.merge(cohort2[["subject_id","hadm_id","intime"]],
                      on=["subject_id","hadm_id"], how="inner")
    cand["door_to_antithrombotic_min"] = (cand["starttime"] - cand["intime"]).dt.total_seconds()/60.0

    # hadm별 최소(가장 빠른 투여)
    g = cand.groupby(["subject_id","hadm_id"], as_index=False)["door_to_antithrombotic_min"].min()
    parts.append(g)
    print(f"rx batch {i//BATCH+1}: rows={len(df)}, hits={len(cand)}, hadm_with_IV={len(g)}")

conn.close()

if parts:
    anti_time = pd.concat(parts, ignore_index=True)
else:
    anti_time = pd.DataFrame(columns=["subject_id","hadm_id","door_to_antithrombotic_min"])

print("Antithrombotic (IV) available hadm:", len(anti_time))
display(anti_time.head())
anti_df = anti_time.copy()  # cols: subject_id, hadm_id, door_to_antithrombotic_min

rx batch 1: rows=25495, hits=1003, hadm_with_IV=301
Antithrombotic (IV) available hadm: 301


Unnamed: 0,subject_id,hadm_id,door_to_antithrombotic_min
0,10055344,29209451,550.0
1,10096420,25396519,229.0
2,10130348,24286651,546.0
3,10174935,23150740,327.0
4,10185295,25419883,520.0


In [38]:
# === Cell 5 (Improved): Door-to-Cath proxy with ICD prefilter + stronger regex ===
import re, sqlite3
import pandas as pd

# ---- 옵션 플래그 ----
USE_PROC_ICD_PREFILTER = True   # True면 procedures_icd로 cath-가능 hadm 먼저 제한
INCLUDE_CABG_AS_CATH   = False  # True면 CABG도 '시술 개시'로 인정(보조분석용 권장)

# ---- 정규식(라벨/유닛) 보강 ----
# procedureevents.label 후보
label_patterns = [
    r"\bcath\b", r"cath\s*lab", r"catheter(?:ization)?",
    r"coronary", r"angiograph", r"angioplast", r"\bpci\b", r"\bstent\b",
    r"interventional cardiology", r"left heart cath", r"right heart cath",
]
if INCLUDE_CABG_AS_CATH:
    label_patterns += [r"\bcabg\b", r"bypass(?: graft)?"]

kw_label = re.compile("|".join(label_patterns), flags=re.I)

# transfers.careunit 후보
unit_patterns = [
    r"\bcath\b", r"cath\s*lab", r"cardiac cath", r"cardiac catheteriz",
    r"interventional", r"\bccl\b", r"ep/cath", r"coronary angiography",
]
kw_unit = re.compile("|".join(unit_patterns), flags=re.I)

# ---- 0) procedures_icd로 cath/PCI/angio 가능 hadm 먼저 좁히기(선택) ----
def get_likely_cath_hadm(conn, cohort_hadm_df):
    proc_icd = pd.read_sql("""
        SELECT subject_id, hadm_id, icd_version, icd_code
        FROM procedures_icd
        WHERE hadm_id IN (SELECT DISTINCT hadm_id FROM diagnoses_icd)
    """, conn)
    proc_icd["code_clean"] = (proc_icd["icd_code"].astype(str)
                              .str.replace(".","", regex=False)
                              .str.upper())

    # ICD-9: PCI/관상동맥 조영/확장
    mask9 = (proc_icd["icd_version"]==9) & (
        proc_icd["code_clean"].str.startswith(("360","0066","8855"))  # 36.0x(PCI), 00.66(관상동맥 스텐트), 88.55(관상동맥 조영)
    )
    # ICD-10-PCS: 조영(B211/B212), PCI(0270~0273*)
    mask10 = (proc_icd["icd_version"]==10) & (
        proc_icd["code_clean"].str.match(r"(B211|B212|0270|0271|0272|0273)", na=False)
    )

    if INCLUDE_CABG_AS_CATH:
        # ICD-9 CABG 36.1x, ICD-10-PCS 0210~ 등(단, 여기는 간단히 021로 전방일치)
        mask9  = mask9  | ((proc_icd["icd_version"]==9)  & proc_icd["code_clean"].str.startswith("361"))
        mask10 = mask10 | ((proc_icd["icd_version"]==10) & proc_icd["code_clean"].str.startswith("021"))

    likely = proc_icd[mask9 | mask10][["subject_id","hadm_id"]].drop_duplicates()
    likely = likely.merge(cohort_hadm_df[["subject_id","hadm_id"]].drop_duplicates(),
                          on=["subject_id","hadm_id"], how="inner")
    return likely

conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)

if USE_PROC_ICD_PREFILTER:
    likely_cath = get_likely_cath_hadm(conn, cohort2)
    hadms = likely_cath["hadm_id"].unique().tolist()
    print("procedures_icd 기반 cath-가능 hadm:", len(hadms))
else:
    hadms = cohort2["hadm_id"].dropna().unique().tolist()

hadms.sort()

# ---- d_items 라벨 준비(있으면 merge) ----
try:
    d_items = pd.read_sql("SELECT itemid, label FROM d_items", conn)
    d_items["label_lc"] = d_items["label"].astype(str).str.lower()
except Exception:
    d_items = pd.DataFrame(columns=["itemid","label","label_lc"])

# ---- A) procedureevents 기반 탐색 ----
BATCH = 2000
proc_parts = []

for i in range(0, len(hadms), BATCH):
    batch = hadms[i:i+BATCH]
    placeholders = ",".join(["?"]*len(batch))
    sql = f"""
        SELECT subject_id, hadm_id, starttime, endtime, itemid
        FROM procedureevents
        WHERE hadm_id IN ({placeholders})
          AND starttime IS NOT NULL
    """
    pe = pd.read_sql(sql, conn, params=batch, parse_dates=["starttime","endtime"])
    if pe.empty:
        print(f"proc batch {i//BATCH+1}: rows=0")
        continue

    if not d_items.empty:
        pe = pe.merge(d_items[["itemid","label_lc"]], on="itemid", how="left")
    else:
        pe["label_lc"] = ""

    pe_cath = pe[ pe["label_lc"].str.contains(kw_label, na=False) ].copy()
    if pe_cath.empty:
        print(f"proc batch {i//BATCH+1}: rows={len(pe)}, cath=0")
        continue

    pe_cath = pe_cath.merge(cohort2[["subject_id","hadm_id","intime"]],
                            on=["subject_id","hadm_id"], how="inner")
    pe_cath["door_to_cath_min_A"] = (pe_cath["starttime"] - pe_cath["intime"]).dt.total_seconds()/60.0
    gA = pe_cath.groupby(["subject_id","hadm_id"], as_index=False)["door_to_cath_min_A"].min()
    proc_parts.append(gA)
    print(f"proc batch {i//BATCH+1}: rows={len(pe)}, cath={len(pe_cath)}, hadm_A={len(gA)}")

cath_A = (pd.concat(proc_parts, ignore_index=True)
          if proc_parts else
          pd.DataFrame(columns=["subject_id","hadm_id","door_to_cath_min_A"]))

# ---- B) transfers 기반 탐색 ----
trf_parts = []
for i in range(0, len(hadms), BATCH):
    batch = hadms[i:i+BATCH]
    placeholders = ",".join(["?"]*len(batch))
    sql = f"""
        SELECT subject_id, hadm_id, intime, outtime, careunit
        FROM transfers
        WHERE hadm_id IN ({placeholders})
          AND intime IS NOT NULL
    """
    tr = pd.read_sql(sql, conn, params=batch, parse_dates=["intime","outtime"])
    if tr.empty:
        print(f"trf batch {i//BATCH+1}: rows=0")
        continue

    tr["careunit_lc"] = tr["careunit"].astype(str).str.lower()
    tr_cath = tr[ tr["careunit_lc"].str.contains(kw_unit, na=False) ].copy()
    if tr_cath.empty:
        print(f"trf batch {i//BATCH+1}: rows={len(tr)}, cath_unit=0")
        continue

    tr_cath = tr_cath.merge(cohort2[["subject_id","hadm_id","intime"]],
                            on=["subject_id","hadm_id"], how="inner", suffixes=("","_ed"))
    tr_cath["door_to_cath_min_B"] = (tr_cath["intime"] - tr_cath["intime_ed"]).dt.total_seconds()/60.0
    gB = tr_cath.groupby(["subject_id","hadm_id"], as_index=False)["door_to_cath_min_B"].min()
    trf_parts.append(gB)
    print(f"trf batch {i//BATCH+1}: rows={len(tr)}, cath_unit={len(tr_cath)}, hadm_B={len(gB)}")

conn.close()

cath_B = (pd.concat(trf_parts, ignore_index=True)
          if trf_parts else
          pd.DataFrame(columns=["subject_id","hadm_id","door_to_cath_min_B"]))

# ---- A 우선, 없으면 B로 보완 ----
door_to_cath = (cohort2[["subject_id","hadm_id"]]
                .drop_duplicates()
                .merge(cath_A, on=["subject_id","hadm_id"], how="left")
                .merge(cath_B, on=["subject_id","hadm_id"], how="left"))

door_to_cath["door_to_cath_min"] = door_to_cath["door_to_cath_min_A"].combine_first(
    door_to_cath["door_to_cath_min_B"]
)

door_to_cath = door_to_cath[["subject_id","hadm_id","door_to_cath_min",
                             "door_to_cath_min_A","door_to_cath_min_B"]]

print("Door-to-Cath (A-only):", door_to_cath["door_to_cath_min_A"].notna().sum())
print("Door-to-Cath (B-only):", door_to_cath["door_to_cath_min_B"].notna().sum())
print("Door-to-Cath (A or B):", door_to_cath["door_to_cath_min"].notna().sum())
display(door_to_cath.head())
cath_df = door_to_cath.copy()  # cols: subject_id, hadm_id, door_to_cath_min(+_A,_B)

procedures_icd 기반 cath-가능 hadm: 464
proc batch 1: rows=2390, cath=117, hadm_A=75
trf batch 1: rows=2029, cath_unit=0
Door-to-Cath (A-only): 75
Door-to-Cath (B-only): 0
Door-to-Cath (A or B): 75


Unnamed: 0,subject_id,hadm_id,door_to_cath_min,door_to_cath_min_A,door_to_cath_min_B
0,10013310,27682188,484.0,484.0,
1,10033552,26487381,,,
2,10055344,29209451,,,
3,10096420,25396519,,,
4,10130348,24286651,,,


In [39]:
# === Cell 5.5: Boarding Delay (ED outtime → admissions.admittime) — cohort2 기반 ===
import numpy as np
import pandas as pd

# cohort2에는 최소한 아래 컬럼이 있어야 합니다:
# ['subject_id','hadm_id','intime','outtime','admittime']

base_df = cohort2.copy()

# datetime 보정 (이미 datetime이면 영향 없음)
for c in ["intime", "outtime", "admittime"]:
    if c in base_df.columns and not np.issubdtype(base_df[c].dtype, np.datetime64):
        base_df[c] = pd.to_datetime(base_df[c], errors="coerce")

# 계산: boarding_delay_min = (admittime - outtime) [분]
boarding_df = (
    base_df[["subject_id", "hadm_id", "outtime", "admittime"]]
      .dropna(subset=["outtime", "admittime"])
      .copy()
)

boarding_df["boarding_delay_min"] = (
    (boarding_df["admittime"] - boarding_df["outtime"]).dt.total_seconds() / 60.0
)

# 품질 관리(QC): 0~24시간(1440분)만 채택 (원하면 48h로 완화 가능)
boarding_df = boarding_df[
    (boarding_df["boarding_delay_min"] >= 0) &
    (boarding_df["boarding_delay_min"] <= 24 * 60)
].copy()

# 동일 hadm 다수 ED-stay가 있으면 가장 가까운 값(최소 delay) 1건만 채택
boarding_df.sort_values(["hadm_id", "boarding_delay_min"], inplace=True)
boarding_df = boarding_df.drop_duplicates(subset=["subject_id", "hadm_id"], keep="first")

print(f"Boarding Delay available hadm: {len(boarding_df)}")
display(boarding_df.head())

Boarding Delay available hadm: 1


Unnamed: 0,subject_id,hadm_id,outtime,admittime,boarding_delay_min
438,19565020,28427129,2135-02-13 05:46:00,2135-02-13 06:55:00,69.0


In [40]:
# === Cell 6: 시간변수 병합 & 요약(있는 것만 안전 병합) ===
want_cols = ["subject_id","hadm_id","intime","outtime","admittime"]
out = cohort2[want_cols].drop_duplicates().copy()

merge_plan = [
    ("labtat_df",  "lab_tat_min"),
    ("anti_df",    "door_to_antithrombotic_min"),
    ("cath_df",    "door_to_cath_min"),
    ("boarding_df","boarding_delay_min"),
]

for df_name, col in merge_plan:
    df_obj = globals().get(df_name, None)
    if isinstance(df_obj, pd.DataFrame) and col in df_obj.columns:
        out = out.merge(df_obj[["subject_id","hadm_id", col]], on=["subject_id","hadm_id"], how="left")
        print(f"✓ merged: {df_name}.{col} (n={df_obj[col].notna().sum()})")
    else:
        print(f"↪︎ skip: {df_name} (없거나 '{col}' 컬럼 없음)")

print("\nMerged shape:", out.shape)
display(out.head())

def summarize(col, label):
    if col not in out.columns:
        print(f"{label} -> (컬럼 없음)")
        return
    s = out[col].dropna()
    if len(s)==0:
        print(f"{label} -> n=0")
        return
    print(f"{label} -> n={len(s)}, median={s.median():.1f}, p75={s.quantile(0.75):.1f}, p90={s.quantile(0.90):.1f}")

print("\n=== Summary (minutes) ===")
summarize("lab_tat_min",                "Lab TAT")
summarize("door_to_antithrombotic_min", "Door→Antithrombotic")
summarize("door_to_cath_min",           "Door→Cath")
summarize("boarding_delay_min",         "Boarding Delay")

# KPI (Door→Cath)이 있으면만
if "door_to_cath_min" in out.columns:
    d2c = out["door_to_cath_min"].dropna()
    if len(d2c)>0:
        print(f"\nKPI) Door-to-Cath >  90 min: {(d2c>90).mean()*100:.1f}%")
        print(f"KPI) Door-to-Cath > 120 min: {(d2c>120).mean()*100:.1f}%")

# 저장
out.to_csv(SAVE_PATH, index=False)
print("\nSaved to:", SAVE_PATH)

✓ merged: labtat_df.lab_tat_min (n=433)
✓ merged: anti_df.door_to_antithrombotic_min (n=301)
✓ merged: cath_df.door_to_cath_min (n=75)
✓ merged: boarding_df.boarding_delay_min (n=1)

Merged shape: (465, 9)


Unnamed: 0,subject_id,hadm_id,intime,outtime,admittime,lab_tat_min,door_to_antithrombotic_min,door_to_cath_min,boarding_delay_min
0,10013310,27682188,2153-05-06 10:21:00,2153-05-06 18:28:00,2153-05-06 18:03:00,61.0,,484.0,
1,10033552,26487381,2136-02-29 22:09:00,2136-03-01 02:26:37,2136-03-01 02:25:00,107.0,,,
2,10055344,29209451,2171-10-31 14:50:00,2171-10-31 19:38:25,2171-10-31 19:37:00,142.5,550.0,,
3,10096420,25396519,2204-07-16 12:11:00,2204-07-16 13:58:56,2204-07-16 13:58:00,142.0,229.0,,
4,10130348,24286651,2197-05-27 06:54:00,2197-05-27 10:41:05,2197-05-27 10:40:00,90.5,546.0,,



=== Summary (minutes) ===
Lab TAT -> n=434, median=87.2, p75=108.9, p90=157.3
Door→Antithrombotic -> n=302, median=363.0, p75=655.5, p90=2052.7
Door→Cath -> n=75, median=402.0, p75=1839.0, p90=4666.2
Boarding Delay -> n=2, median=69.0, p75=69.0, p90=69.0

KPI) Door-to-Cath >  90 min: 97.3%
KPI) Door-to-Cath > 120 min: 97.3%

Saved to: /content/drive/MyDrive/Colab Notebooks/final_cohort_with_timevars.csv
