In [28]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [29]:

import os

base_path = "/content/drive/MyDrive/Rahul_DTSC5082_Project/Scenario2_MIMIC/outputs"

print("Files in outputs/:")
os.listdir(base_path)


Files in outputs/:


['RA_COHORT_FINAL.csv',
 'RA_COHORT_FINAL.parquet',
 'RA_COHORT_PHASE2_STRUCTURED.parquet',
 'RA_NOTES_CLEANED.parquet',
 'RA_NOTES_EMB_ClinicalBERT.parquet',
 'RA_NOTES_EMB_ClinicalBERT_meanpool.parquet',
 'RA_NOTES_EMB_BioBERT_meanpool.parquet',
 'RA_NOTES_EMB_BERTbase_meanpool.parquet',
 'RA_MODELING_BASE_ClinicalBERT.parquet',
 'RA_severity_labels.csv',
 'RA_MODELING_WITH_SEVERITY.csv']

In [30]:
import pandas as pd

file_path = "/content/drive/MyDrive/Rahul_DTSC5082_Project/Scenario2_MIMIC/outputs/RA_COHORT_FINAL.csv"

# Load WITHOUT parse_dates first
df = pd.read_csv(file_path, low_memory=False)

print("Number of columns:", len(df.columns))
df.columns.tolist()


Number of columns: 44


['subject_id',
 'hadm_id',
 'admittime',
 'dischtime',
 'deathtime',
 'admission_type',
 'admit_provider_id',
 'admission_location',
 'discharge_location',
 'insurance',
 'language',
 'marital_status',
 'race',
 'edregtime',
 'edouttime',
 'hospital_expire_flag',
 'curr_service',
 'temp_max',
 'temp_min',
 'temp_mean',
 'hr_max',
 'hr_mean',
 'rr_max',
 'rr_mean',
 'o2_min',
 'o2_mean',
 'sbp_min',
 'dbp_min',
 'pain_max',
 'lab_max_CRP',
 'lab_max_Hematocrit',
 'lab_max_Hemoglobin',
 'lab_max_Platelets',
 'lab_max_WBC',
 'lab_mean_CRP',
 'lab_mean_Hematocrit',
 'lab_mean_Hemoglobin',
 'lab_mean_Platelets',
 'lab_mean_WBC',
 'lab_min_CRP',
 'lab_min_Hematocrit',
 'lab_min_Hemoglobin',
 'lab_min_Platelets',
 'lab_min_WBC']

In [31]:
# ==========================
# CELL 1: Load RA Cohort
# ==========================

import pandas as pd

file_path = "/content/drive/MyDrive/Rahul_DTSC5082_Project/Scenario2_MIMIC/outputs/RA_COHORT_FINAL.csv"

df = pd.read_csv(
    file_path,
    parse_dates=[
        'admittime', 'dischtime', 'deathtime',
        'edregtime', 'edouttime'
    ],
    low_memory=False
)

print("Loaded shape:", df.shape)
df.head()


Loaded shape: (15462, 44)


Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admit_provider_id,admission_location,discharge_location,insurance,...,lab_mean_CRP,lab_mean_Hematocrit,lab_mean_Hemoglobin,lab_mean_Platelets,lab_mean_WBC,lab_min_CRP,lab_min_Hematocrit,lab_min_Hemoglobin,lab_min_Platelets,lab_min_WBC
0,10002443,21329020,2183-10-17 23:20:00,2183-10-20 18:47:00,NaT,EW EMER.,P343TV,TRANSFER FROM HOSPITAL,HOME,Private,...,,40.866667,13.466667,266.0,16.533333,,37.3,12.3,219.0,13.0
1,10003203,25146996,2153-04-26 02:05:00,2153-04-29 14:19:00,NaT,EU OBSERVATION,P57BOT,EMERGENCY ROOM,,Medicare,...,,26.15,7.8,307.0,8.5,,23.3,7.6,296.0,8.4
2,10010718,29947356,2169-01-20 13:21:00,2169-01-27 14:20:00,NaT,OBSERVATION ADMIT,P50GUR,EMERGENCY ROOM,HOME HEALTH CARE,Medicare,...,,37.585714,12.285714,325.571429,10.014286,,34.7,11.3,274.0,7.8
3,10010997,20783870,2139-04-28 16:45:00,2139-05-02 12:20:00,NaT,OBSERVATION ADMIT,P756E2,TRANSFER FROM HOSPITAL,HOME,Private,...,,34.0,11.2,292.0,6.3,,34.0,11.2,292.0,6.3
4,10010997,20783870,2139-04-28 16:45:00,2139-05-02 12:20:00,NaT,OBSERVATION ADMIT,P756E2,TRANSFER FROM HOSPITAL,HOME,Private,...,,34.0,11.2,292.0,6.3,,34.0,11.2,292.0,6.3


In [32]:
# ==========================
# CELL 2: Select one patient
# ==========================

# Pick a random subject_id or set one manually
patient_id = df['subject_id'].iloc[0]   # first patient in dataset

patient_df = df[df['subject_id'] == patient_id].copy()

print("Patient ID:", patient_id)
print("Rows for this patient:", patient_df.shape[0])

patient_df.head()


Patient ID: 10002443
Rows for this patient: 1


Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admit_provider_id,admission_location,discharge_location,insurance,...,lab_mean_CRP,lab_mean_Hematocrit,lab_mean_Hemoglobin,lab_mean_Platelets,lab_mean_WBC,lab_min_CRP,lab_min_Hematocrit,lab_min_Hemoglobin,lab_min_Platelets,lab_min_WBC
0,10002443,21329020,2183-10-17 23:20:00,2183-10-20 18:47:00,NaT,EW EMER.,P343TV,TRANSFER FROM HOSPITAL,HOME,Private,...,,40.866667,13.466667,266.0,16.533333,,37.3,12.3,219.0,13.0


In [33]:
# ============================================
# CELL 3: Create timeline event list for patient
# ============================================

timeline = []

row = patient_df.iloc[0]

# 1. ED Registration (if available)
if pd.notna(row['edregtime']):
    timeline.append({
        'event': 'ED Registration',
        'time': row['edregtime']
    })

# 2. ED Out (if available)
if pd.notna(row['edouttime']):
    timeline.append({
        'event': 'ED Departure',
        'time': row['edouttime']
    })

# 3. Admission
timeline.append({
    'event': 'Hospital Admission',
    'time': row['admittime']
})

# 4. Discharge
timeline.append({
    'event': 'Hospital Discharge',
    'time': row['dischtime']
})

# 5. Death (if any)
if pd.notna(row['deathtime']):
    timeline.append({
        'event': 'Death',
        'time': row['deathtime']
    })

timeline_df = pd.DataFrame(timeline).sort_values("time")
timeline_df


Unnamed: 0,event,time
0,ED Registration,2183-10-17 22:11:00
2,Hospital Admission,2183-10-17 23:20:00
1,ED Departure,2183-10-18 00:47:00
3,Hospital Discharge,2183-10-20 18:47:00


In [34]:
# ============================================
# CELL 4: Add vitals alert events
# ============================================

vital_events = []

r = row  # patient row

# High heart rate
if pd.notna(r['hr_max']) and r['hr_max'] > 100:
    vital_events.append({'event': f"High Heart Rate (HR={r['hr_max']})", 'time': r['admittime']})

# Low O2
if pd.notna(r['o2_min']) and r['o2_min'] < 92:
    vital_events.append({'event': f"Low Oxygen (O2={r['o2_min']})", 'time': r['admittime']})

# High pain level
if pd.notna(r['pain_max']) and r['pain_max'] >= 6:
    vital_events.append({'event': f"High Pain Score (Pain={r['pain_max']})", 'time': r['admittime']})

vitals_df = pd.DataFrame(vital_events)
vitals_df


In [35]:
# ============================================
# CELL 5: Add lab alert events
# ============================================

lab_events = []

# High WBC
if pd.notna(r['lab_max_WBC']) and r['lab_max_WBC'] > 11:
    lab_events.append({
        'event': f"High WBC (max={r['lab_max_WBC']})",
        'time': r['admittime']
    })

# Low Hemoglobin
if pd.notna(r['lab_min_Hemoglobin']) and r['lab_min_Hemoglobin'] < 10:
    lab_events.append({
        'event': f"Low Hemoglobin (min={r['lab_min_Hemoglobin']})",
        'time': r['admittime']
    })

# Low Platelets
if pd.notna(r['lab_min_Platelets']) and r['lab_min_Platelets'] < 150:
    lab_events.append({
        'event': f"Low Platelets (min={r['lab_min_Platelets']})",
        'time': r['admittime']
    })

# High CRP
if pd.notna(r['lab_max_CRP']) and r['lab_max_CRP'] > 10:
    lab_events.append({
        'event': f"High CRP (max={r['lab_max_CRP']})",
        'time': r['admittime']
    })

labs_df = pd.DataFrame(lab_events)
labs_df


Unnamed: 0,event,time
0,High WBC (max=20.7),2183-10-17 23:20:00


In [36]:
# ============================================
# CELL 6: Merge all timeline events
# ============================================

# Convert vitals_df and labs_df safely if empty
if 'vitals_df' not in globals() or vitals_df.empty:
    vitals_df = pd.DataFrame(columns=['event', 'time'])

if 'labs_df' not in globals() or labs_df.empty:
    labs_df = pd.DataFrame(columns=['event', 'time'])

# Combine all
full_timeline = pd.concat([
    timeline_df,
    vitals_df,
    labs_df
], ignore_index=True)

# Sort by time
full_timeline = full_timeline.sort_values("time").reset_index(drop=True)

full_timeline


  full_timeline = pd.concat([


Unnamed: 0,event,time
0,ED Registration,2183-10-17 22:11:00
1,Hospital Admission,2183-10-17 23:20:00
2,High WBC (max=20.7),2183-10-17 23:20:00
3,ED Departure,2183-10-18 00:47:00
4,Hospital Discharge,2183-10-20 18:47:00


In [37]:
# ============================================
# CELL 7: Timeline generator function
# ============================================

def generate_timeline(df, patient_id):
    # Filter patient rows
    p = df[df['subject_id'] == patient_id].copy()
    if p.empty:
        return pd.DataFrame(columns=["event", "time"])

    row = p.iloc[0]

    timeline = []

    # ED registration
    if pd.notna(row['edregtime']):
        timeline.append({"event": "ED Registration", "time": row['edregtime']})

    # Admission
    timeline.append({"event": "Hospital Admission", "time": row['admittime']})

    # ED out
    if pd.notna(row['edouttime']):
        timeline.append({"event": "ED Departure", "time": row['edouttime']})

    # Discharge
    timeline.append({"event": "Hospital Discharge", "time": row['dischtime']})

    # Death
    if pd.notna(row['deathtime']):
        timeline.append({"event": "Death", "time": row['deathtime']})

    # ----- VITALS -----
    vitals = []

    if pd.notna(row['hr_max']) and row['hr_max'] > 100:
        vitals.append({"event": f"High Heart Rate (HR={row['hr_max']})", "time": row['admittime']})

    if pd.notna(row['o2_min']) and row['o2_min'] < 92:
        vitals.append({"event": f"Low Oxygen (O2={row['o2_min']})", "time": row['admittime']})

    if pd.notna(row['pain_max']) and row['pain_max'] >= 6:
        vitals.append({"event": f"High Pain (Pain={row['pain_max']})", "time": row['admittime']})

    # ----- LABS -----
    labs = []

    if pd.notna(row['lab_max_WBC']) and row['lab_max_WBC'] > 11:
        labs.append({"event": f"High WBC (max={row['lab_max_WBC']})", "time": row['admittime']})

    if pd.notna(row['lab_min_Hemoglobin']) and row['lab_min_Hemoglobin'] < 10:
        labs.append({"event": f"Low Hemoglobin (min={row['lab_min_Hemoglobin']})", "time": row['admittime']})

    if pd.notna(row['lab_min_Platelets']) and row['lab_min_Platelets'] < 150:
        labs.append({"event": f"Low Platelets (min={row['lab_min_Platelets']})", "time": row['admittime']})

    if pd.notna(row['lab_max_CRP']) and row['lab_max_CRP'] > 10:
        labs.append({"event": f"High CRP (max={row['lab_max_CRP']})", "time": row['admittime']})

    # Combine all events
    full = pd.DataFrame(timeline + vitals + labs)
    full = full.sort_values("time").reset_index(drop=True)

    return full

# Test the function
test_timeline = generate_timeline(df, patient_id)
test_timeline


Unnamed: 0,event,time
0,ED Registration,2183-10-17 22:11:00
1,Hospital Admission,2183-10-17 23:20:00
2,High WBC (max=20.7),2183-10-17 23:20:00
3,ED Departure,2183-10-18 00:47:00
4,Hospital Discharge,2183-10-20 18:47:00


The continuity-of-care timeline organizes each patient’s key clinical events in chronological order, making it easy to understand their entire hospital journey at a glance. By combining ED arrival, admission, discharge, vitals abnormalities, and lab alerts, the timeline highlights important changes in the patient’s condition over time. This helps clinicians rapidly assess clinical progression, identify deterioration patterns, and ensure no important event is missed. It also supports downstream AI models by giving a structured, time-aware view of patient status for better decision-making.