In [1]:
# Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import os

# 파일 경로 설정
ver5_path = "/content/drive/MyDrive/cohort_ver5_unity.csv"
ver6_path = "/content/drive/MyDrive/cohort_ver6_icu_ed.csv"

# CSV 불러오기
ver5 = pd.read_csv(ver5_path)
ver6 = pd.read_csv(ver6_path)

# 병합 키 후보 (존재하는 컬럼만 선택)
KEY_PRIORITY = ["subject_id", "hadm_id", "stay_id", "icustay_id", "edstay_id", "patient_id"]
key_cols = [c for c in KEY_PRIORITY if c in ver5.columns and c in ver6.columns]

use_merge = len(key_cols) > 0

# 병합 수행 (ver5 기준)
if use_merge:
    merged = ver5.merge(ver6, on=key_cols, how="left", suffixes=("", "_ver6"))

    overlap_nonkeys = [c for c in ver5.columns if c in ver6.columns and c not in key_cols]
    for c in overlap_nonkeys:
        v6c = c + "_ver6"
        if v6c in merged.columns:
            merged[c] = merged[c].where(~merged[c].isna(), merged[v6c])
            merged.drop(columns=[v6c], inplace=True)
else:
    merged = ver5.copy()
    for col in ver6.columns:
        if col not in merged.columns:
            merged[col] = pd.NA

# ver6에서 키 결측이거나 ver5에 존재하지 않는 행 찾기
if use_merge:
    ver6_key_na = ver6[key_cols].isna().any(axis=1)
    ver6_na_part = ver6[ver6_key_na].copy()

    v6_non_na = ver6[~ver6_key_na].copy()
    ver5_keys_only = ver5[key_cols].drop_duplicates()
    probe = v6_non_na.merge(ver5_keys_only, on=key_cols, how="left", indicator=True)
    v6_unmatched = v6_non_na[probe["_merge"].eq("left_only")].copy()

    to_append = pd.concat([ver6_na_part, v6_unmatched], axis=0, ignore_index=True)

    if not to_append.empty:
        for col in merged.columns:
            if col not in to_append.columns:
                to_append[col] = pd.NA
        to_append = to_append[merged.columns]
        merged = pd.concat([merged, to_append], axis=0, ignore_index=True)
else:
    ver6_aligned = ver6.copy()
    for col in merged.columns:
        if col not in ver6_aligned.columns:
            ver6_aligned[col] = pd.NA
    for col in ver6_aligned.columns:
        if col not in merged.columns:
            merged[col] = pd.NA
    ver6_aligned = ver6_aligned[merged.columns]
    merged = pd.concat([merged, ver6_aligned], axis=0, ignore_index=True)

# 정렬
try:
    if key_cols:
        merged = merged.sort_values(by=key_cols).reset_index(drop=True)
except Exception:
    pass

# 저장
out_path = "/content/drive/MyDrive/cohort_ver7_add_no_ecg.csv"
merged.to_csv(out_path, index=False)

print("Saved:", out_path)
print("ver5 rows:", len(ver5))
print("ver6 rows:", len(ver6))
print("merged rows (after append):", len(merged))
print("used keys:", key_cols if key_cols else "no key; appended ver6 entirely")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Saved: /content/drive/MyDrive/cohort_ver7_add_no_ecg.csv
ver5 rows: 465
ver6 rows: 836
merged rows (after append): 836
used keys: ['subject_id', 'hadm_id', 'stay_id']


  merged = pd.concat([merged, to_append], axis=0, ignore_index=True)
