In [260]:
import pandas as pd
import os

# Configuration for limited RAM/Disk environment
HOSP = '../data_samples/hosp/'
ICU = '../data_samples/icu/'
NOTES = '../data_samples/note/'  # Adjust if your notes are elsewhere

In [261]:
os.listdir('../data_samples')

['note', 'hosp', 'icu', 'mimic-iv-bhc.csv']

## Pillars Formatting

In [262]:
def load_discharges(subject_ids=None):
    return filtered_read_csv(NOTES + 'discharge.csv', subject_ids)

def filtered_read_csv(file_path, subject_ids, usecols=None, **kwargs):
    """
    Helper to read large compressed CSVs in chunks and filter by subject_id.
    Uses 'usecols' to minimize memory footprint.
    """
    if usecols is not None and 'subject_id' not in usecols:
        usecols.append('subject_id')

    chunk_list = []
    # compression='infer' is default and handles .gz automatically
    try:
        for chunk in pd.read_csv(file_path, chunksize=250_000, usecols=usecols, engine='c', **kwargs):
            if subject_ids is not None:
                filtered_chunk = chunk[chunk['subject_id'].isin(subject_ids)]
            else:
                filtered_chunk = chunk
            
            if not filtered_chunk.empty:
                chunk_list.append(filtered_chunk)
    except FileNotFoundError:
        print(f"Warning: File not found at {file_path}")
        return pd.DataFrame()

    return pd.concat(chunk_list, ignore_index=True) if chunk_list else pd.DataFrame()

def load_identity_pillar(subject_ids):

    admissions_cols = ['subject_id', 'hadm_id', 'admittime', 'race', 'admission_type', 'admission_location', 'insurance', 'marital_status']
    admissions = filtered_read_csv(HOSP + 'admissions.csv', subject_ids, usecols=admissions_cols)

    patients_cols = ['subject_id', 'gender', 'anchor_age', 'anchor_year', 'dod']
    patients = filtered_read_csv(HOSP + 'patients.csv', subject_ids, usecols=patients_cols)
    if patients.empty:
        patients = pd.DataFrame(columns=patients_cols)

    omr_cols = ['subject_id', 'chartdate', 'result_name', 'result_value']
    omr = filtered_read_csv(HOSP + 'omr.csv', subject_ids, usecols=omr_cols)
    if omr.empty:
        omr = pd.DataFrame(columns=omr_cols)

    admissions['admittime'] = pd.to_datetime(admissions['admittime'])
    patients['anchor_year'] = patients['anchor_year'].astype(int)

    identity_df = admissions.merge(
        patients[['subject_id', 'gender', 'anchor_age', 'anchor_year', 'dod']], 
        on='subject_id', how='left'
    )

    identity_df['admission_year'] = identity_df['admittime'].dt.year
    identity_df['age_at_admission'] = (identity_df['anchor_age'] + 
                                       (identity_df['admission_year'] - identity_df['anchor_year']))

    relevant_omr_names = ['Weight (Lbs)', 'Height (Inches)', 'BMI (kg/m2)', 'Blood Pressure']
    baseline_omr = omr[omr['result_name'].isin(relevant_omr_names)].copy()
    baseline_omr = baseline_omr.sort_values('chartdate').groupby(['subject_id', 'result_name']).head(1)
    
    omr_pivot = baseline_omr.pivot(index='subject_id', columns='result_name', values='result_value').reset_index()
    identity_df = identity_df.merge(omr_pivot, on='subject_id', how='left')
    identity_df['timestamp'] = identity_df['admittime']
    
    identity_df = identity_df.rename(columns={
        'Weight (Lbs)': 'weight_baseline', 'Height (Inches)': 'height_baseline',
        'BMI (kg/m2)': 'bmi_baseline', 'Blood Pressure': 'blood_pressure_baseline',
    })

    relevant_cols = [
        'subject_id', 'hadm_id', 'timestamp', 'age_at_admission', 'gender', 'race',
        'admission_type', 'admission_location', 'insurance', 'marital_status',
        'weight_baseline', 'height_baseline', 'bmi_baseline', 'blood_pressure_baseline', 'dod'
    ]
    return identity_df[[c for c in relevant_cols if c in identity_df.columns]]

def load_logistics_pillar(subject_ids):
    transfers = filtered_read_csv(HOSP + 'transfers.csv', subject_ids, usecols=['subject_id', 'hadm_id', 'careunit', 'intime', 'outtime', 'eventtype'])
    services = filtered_read_csv(HOSP + 'services.csv', subject_ids, usecols=['subject_id', 'hadm_id', 'curr_service', 'transfertime'])
    admissions = filtered_read_csv(HOSP + 'admissions.csv', subject_ids, usecols=['subject_id', 'hadm_id', 'admission_location', 'discharge_location'])

    transfers['intime'], transfers['outtime'] = pd.to_datetime(transfers['intime']), pd.to_datetime(transfers['outtime'])
    services['transfertime'] = pd.to_datetime(services['transfertime'])
    
    logistics_df = transfers.dropna(subset=['hadm_id']).merge(
        admissions[['subject_id', 'hadm_id', 'admission_location', 'discharge_location']], on='hadm_id', how='left'
    )

    services = services.dropna(subset=['hadm_id'])
    logistics_df = logistics_df.dropna(subset=['hadm_id'])
    logistics_df['hadm_id'] = logistics_df['hadm_id'].astype('int64')

    services = services.sort_values('transfertime')
    logistics_df = pd.merge_asof(
        logistics_df.sort_values('intime'), services,
        left_on='intime', right_on='transfertime', by='hadm_id', direction='backward'
    )

    logistics_df = logistics_df.dropna(subset=['careunit'])
    logistics_df['stay_duration_hours'] = (logistics_df['outtime'] - logistics_df['intime']).dt.total_seconds() / 3600
    logistics_df['timestamp'] = logistics_df['intime']
    
    return logistics_df

def load_monitoring_pillar(subject_ids):
    d_items = pd.read_csv(ICU + 'd_items.csv')
    # vital_itemids = [220045, 220179, 220180, 220210, 223761]
    # vitals_dict = d_items[d_items['itemid'].isin(vital_itemids)][['itemid', 'label', 'lownormalvalue', 'highnormalvalue']]
    vitals_dict = d_items[['itemid', 'label', 'lownormalvalue', 'highnormalvalue']]

    # Process Chartevents (ICU)
    icu_vitals_list = []
    icu_cols = ['subject_id', 'hadm_id', 'itemid', 'charttime', 'valuenum', 'valueuom']
    for chunk in pd.read_csv(ICU + 'chartevents.csv', chunksize=200_000, usecols=icu_cols):
        f_chunk = chunk[(chunk['subject_id'].isin(subject_ids))]
        if not f_chunk.empty:
            icu_vitals_list.append(f_chunk)
    
    if len(icu_vitals_list) > 0:
        icu_vitals = pd.concat(icu_vitals_list, ignore_index=True)
        
        icu_vitals = icu_vitals.merge(vitals_dict, on='itemid', how='inner')
        if not icu_vitals.empty:
            icu_vitals['timestamp'] = pd.to_datetime(icu_vitals['charttime'])
    else:
        icu_vitals = pd.DataFrame()

    # Process OMR (Ward)
    omr = filtered_read_csv(HOSP + 'omr.csv', subject_ids, usecols=['subject_id', 'chartdate', 'result_name', 'result_value'])
    admissions = filtered_read_csv(HOSP + 'admissions.csv', subject_ids, usecols=['subject_id', 'hadm_id', 'admittime', 'dischtime'])

    ward_vitals = omr[omr['result_name'].str.contains('Blood Pressure', na=False)].copy()
    ward_vitals['timestamp'] = pd.to_datetime(ward_vitals['chartdate'])
    ward_vitals = ward_vitals.merge(admissions, on='subject_id', how='left')
    
    mask = (ward_vitals['timestamp'] >= pd.to_datetime(ward_vitals['admittime']).dt.normalize()) & \
           (ward_vitals['timestamp'] <= pd.to_datetime(ward_vitals['dischtime']).dt.normalize())
    ward_vitals = ward_vitals[mask].rename(columns={'result_name': 'label', 'result_value': 'valuenum'})

    monitoring_df = pd.concat([icu_vitals, ward_vitals], axis=0, ignore_index=True)
    return monitoring_df.sort_values(['subject_id', 'timestamp'])

def load_investigations_pillar(subject_ids):
    d_labitems = pd.read_csv(HOSP + 'd_labitems.csv')
    lab_cols = ['subject_id', 'hadm_id', 'itemid', 'charttime', 'valuenum', 'valueuom', 'flag']
    
    labs_df = filtered_read_csv(HOSP + 'labevents.csv', subject_ids, usecols=lab_cols)
    if not labs_df.empty:
        labs_df['timestamp'] = pd.to_datetime(labs_df['charttime'])
        labs_final = labs_df.merge(d_labitems[['itemid', 'label', 'fluid', 'category']], on='itemid', how='left')
        labs_final['type'] = 'LAB'
    else:
        labs_final = pd.DataFrame()

    micro_df = filtered_read_csv(HOSP + 'microbiologyevents.csv', subject_ids)
    if not micro_df.empty:
        micro_df['timestamp'] = pd.to_datetime(micro_df['charttime'])
        micro_final = micro_df.rename(columns={'spec_type_desc': 'label'})
        micro_final['type'] = 'MICRO'
    else:
        micro_final = pd.DataFrame()

    df = pd.concat([labs_final, micro_final], axis=0, ignore_index=True).sort_values(['subject_id', 'timestamp'])
    df = df.dropna(subset=['hadm_id'])

    return df

def load_interventions_pillar(subject_ids):
    
    d_items = pd.read_csv(ICU + 'd_items.csv')
    icu_procs = filtered_read_csv(ICU + 'procedureevents.csv', subject_ids)
    if not icu_procs.empty:
        icu_procs['timestamp'] = pd.to_datetime(icu_procs['starttime'])
        icu_procs = icu_procs.merge(d_items[['itemid', 'label']], on='itemid', how='left')
        icu_procs['proc_type'] = 'ICU_BEDSIDE'

    d_icd_procs = pd.read_csv(HOSP + 'd_icd_procedures.csv')
    billed_procs = filtered_read_csv(HOSP + 'procedures_icd.csv', subject_ids)
    admissions = filtered_read_csv(HOSP + 'admissions.csv', subject_ids, usecols=['hadm_id', 'admittime', 'subject_id'])
    
    if not billed_procs.empty:
        billed_procs = billed_procs.merge(d_icd_procs, on=['icd_code', 'icd_version'], how='left')
        billed_procs = billed_procs.merge(admissions, on='hadm_id', how='left')
        billed_procs['timestamp'] = pd.to_datetime(billed_procs['admittime'])
        billed_procs['proc_type'] = 'BILLED_SURGICAL'
        billed_procs = billed_procs.rename(columns={'long_title': 'label'})

    if icu_procs.empty and billed_procs.empty:

        return pd.DataFrame(columns=['subject_id', 'hadm_id', 'timestamp', 'label', 'proc_type'])
    
    combined = pd.concat([icu_procs, billed_procs], axis=0, ignore_index=True).sort_values(['subject_id', 'timestamp'])

    return combined

def load_inputs_pillar(subject_ids):
    d_items = pd.read_csv(ICU + 'd_items.csv')
    icu_cols = ['subject_id', 'hadm_id', 'itemid', 'starttime', 'amount', 'amountuom', 'rate', 'rateuom']
    icu_inputs = filtered_read_csv(ICU + 'inputevents.csv', subject_ids, usecols=icu_cols)
    
    if not icu_inputs.empty:
        icu_inputs['timestamp'] = pd.to_datetime(icu_inputs['starttime'])
        icu_inputs = icu_inputs.merge(d_items[['itemid', 'label']], on='itemid', how='left')
        icu_inputs['input_type'] = 'ICU_INPUT'

    rx_cols = ['subject_id', 'hadm_id', 'starttime', 'drug', 'dose_val_rx', 'dose_unit_rx', 'route']
    ward_inputs = filtered_read_csv(HOSP + 'prescriptions.csv', subject_ids, usecols=rx_cols)
    if not ward_inputs.empty:
        ward_inputs['timestamp'] = pd.to_datetime(ward_inputs['starttime'])
        ward_inputs = ward_inputs.rename(columns={'drug': 'label', 'dose_val_rx': 'amount', 'dose_unit_rx': 'amountuom'})
        ward_inputs['input_type'] = 'WARD_PRESCRIPTION'

    return pd.concat([icu_inputs, ward_inputs], axis=0, ignore_index=True).sort_values(['subject_id', 'timestamp'])

def load_conclusion_pillar(subject_ids):
    d_diag = pd.read_csv('~/Personal/UFMG/Mestrado/dissertacao/mimic-iv-3.1/hosp/' + 'd_icd_diagnoses.csv.gz')
    diagnoses = filtered_read_csv(HOSP + 'diagnoses_icd.csv', subject_ids)
    admissions = filtered_read_csv(HOSP + 'admissions.csv', subject_ids, usecols=['hadm_id', 'dischtime', 'subject_id'])

    required_cols = ['subject_id', 'hadm_id', 'timestamp', 'diagnosis_label', 'seq_num', 'icd_code']

    if not diagnoses.empty:
        diagnoses['icd_code'] = diagnoses['icd_code'].astype(str).str.strip()
        d_diag['icd_code'] = d_diag['icd_code'].astype(str).str.strip()
        
        outcomes = diagnoses.merge(d_diag, on=['icd_code', 'icd_version'], how='left')
        outcomes = outcomes.merge(admissions, on=['hadm_id', 'subject_id'], how='left')
        
        outcomes['timestamp'] = pd.to_datetime(outcomes['dischtime'])
        outcomes = outcomes.rename(columns={'long_title': 'diagnosis_label'})
        
        return outcomes[required_cols].sort_values(['subject_id', 'seq_num'])
    
    return pd.DataFrame(columns=required_cols)


## Text Construction

In [263]:
def textual_context_builder(): ## Unused
    # Placeholder for your specific logic
    return "Sample context string"


def logistics_text(df) -> str:
    text = ""
    transfer_flag = False

    for row in df.itertuples():
        if row.eventtype == 'admit':
            text += f"The patient was formally admitted to {row.careunit} via {row.admission_location}. "
        
        elif row.eventtype == 'ED':
            text += f"The patient initially presented at the Emergency Department ({row.careunit}). "
            
        elif row.eventtype == 'transfer':
            if not transfer_flag:
                text += "\nInternal movements within the hospital included:"
                transfer_flag = True
            duration_str = f" for {int(row.stay_duration_hours)} hours" if hasattr(row, 'stay_duration_hours') else ""
            text += f"\n - Moved to {row.careunit}{duration_str}."
            
        elif row.eventtype == 'discharge':
            loc = getattr(row, 'discharge_location', 'Home')
            text += f"\nFinally, the patient was discharged {loc}."
    return text

def monitoring_text(df) -> str:

    if len(df) < 1 or df.empty:
        return ''

    df = df.dropna(subset=['valuenum']).sort_values('timestamp')

    text = "Most recent vital signs mesured:"
    for i in df.itertuples():
        unit = getattr(i, 'valueuom', '')
        unit_str = "" if pd.isna(unit) else str(unit)
        text += f'\n - {i.label} - {i.valuenum}{unit_str}'   

    return text

def investigations_text(df) -> str:
    if df.empty:
        return ""
    
    # Ensure timestamps are handled for sorting
    if 'timestamp' in df.columns:
        df = df.sort_values('timestamp')

    sections = []
    # --- 1. LAB EVENTS (Grouped by Label) ---
    # We only care about abnormal labs to keep the context concise
    labs = df[(df['type'] == 'LAB') & (df['flag'] == 'abnormal')]
    if not labs.empty:
        lab_lines = []
        for label, group in labs.groupby('label', sort=False):
            values = []
            for r in group.itertuples():
                val = r.valuenum
                unit = getattr(r, 'valueuom', '')
                unit_str = str(unit) if pd.notna(unit) else ""
                
                values.append(f"{val}{unit_str}")
            lab_lines.append(f"- {label}: {', '.join(values)}")
        
        sections.append("Abnormal Lab Findings:\n" + "\n".join(lab_lines))

    # --- 2. MICROBIOLOGY (Grouped by Specimen & Test) ---
    micro = df[df['type'] == 'MICRO']
    if not micro.empty:
        micro_lines = []
        # Group by specimen and test name
        grouped_micro = micro.groupby(['label', 'test_name'], sort=False)
        
        for (spec_type, test_name), group in grouped_micro:
            results = []
            for row in group.itertuples():
                # Priority 1: Organism Name
                if not pd.isna(getattr(row, 'org_name', None)):
                    res = f"Positive for {row.org_name}"
                    results.append(res)
                # Priority 2: Comments (cleaned)
                elif not pd.isna(getattr(row, 'comments', None)) and str(row.comments).strip() not in ["", "___"]:
                    clean_comment = str(row.comments).split('.')[0].strip()
                    results.append(clean_comment)
                else:
                    results.append("Pending/Negative")

            unique_results = ", ".join(dict.fromkeys(results)) # preserves order, removes duplicates
            micro_lines.append(f"- {spec_type} ({test_name}): {unique_results}")
        
        sections.append("Microbiology & Immunology:\n" + "\n".join(micro_lines))

    return "\n".join(sections)

def interventions_text(df) -> str:
    if df.empty:
        return ""

    sections = []

    # 1. Billed / Surgical Procedures (Major events)
    surgical = df[df['proc_type'] == 'BILLED_SURGICAL'].copy()
    if not surgical.empty:
        # Deduplicate and remove NaNs
        s_list = [str(p).strip() for p in surgical['label'].unique() if pd.notna(p)]
        if s_list:
            sections.append(f"Major/Billed Procedures: {'; '.join(s_list)}.")

    # 2. ICU Bedside Procedures (Nursing/Bedside events)
    bedside = df[df['proc_type'] == 'ICU_BEDSIDE'].copy()
    if not bedside.empty:
        # Group by label to show status if relevant, or just a unique list
        b_list = [str(p).strip() for p in bedside['label'].unique() if pd.notna(p)]
        if b_list:
            sections.append(f"ICU Bedside Interventions: {', '.join(b_list)}.")

    return "\n".join(sections)

def medication_text(df):
    if df is None or df.empty:
        return ""
    
    df = df.sort_values('timestamp').copy()
    
    admin_entries = []

    for row in df.itertuples():
        label = str(getattr(row, 'label', 'Unknown Medication'))
        route = str(getattr(row, 'route', 'unspecified route'))

        amount = getattr(row, 'amount', None)
        uom = getattr(row, 'amountuom', '')
        rate = getattr(row, 'rate', None)
        rate_uom = getattr(row, 'rateuom', '')

        # Build the dose string
        if pd.notna(rate) and str(rate) != 'nan':
            dose_info = f"at {rate} {rate_uom}"
        elif pd.notna(amount) and str(amount) != 'nan':
            dose_info = f"dose of {amount} {uom}"
        else:
            dose_info = "administered"

        dose_info = dose_info.replace('nan', '').strip()
        
        admin_entries.append(f"{label} {dose_info} via {route}")

    if not admin_entries:
        return ""

    text = "Medication Administration History:\n- " + "\n- ".join(admin_entries)
    return text

def conclusion_text(df):
    if df is None or df.empty:
        return ""
    diag_df = df.dropna(subset=['diagnosis_label']).sort_values('seq_num').copy()
    
    if diag_df.empty:
        return ""

    primary = diag_df.iloc[0]['diagnosis_label']
    secondary = diag_df.iloc[1:]['diagnosis_label'].unique().tolist()
    
    text_parts = [f"Primary Diagnosis: {primary}."]
    
    if secondary:
        clean_secondary = [str(d) for d in secondary if pd.notna(d)]
        if clean_secondary:
            text_parts.append(f"Secondary Conditions: {', '.join(clean_secondary)}.")

    return "\n".join(text_parts)

def context_builder_v2(window_cuts):
    """
    Inputs: window_cuts -> dict of DataFrames (identity, logistics, monitoring, etc.)
    already sliced for one specific patient and one specific time window.
    Output: A single stitched string.
    """
    paragraphs = []

    # 1. Identity (The Intro)
    # We assume identity is passed in the dict; usually contains 1 row per admission
    if 'identity' in window_cuts and not window_cuts['identity'].empty:
        id_df = window_cuts['identity']
        row = id_df.iloc[0] # Take the most recent/relevant identity row
        paragraphs.append(
            f"Patient is a {row.age_at_admission} year old {row.gender} ({row.race}). "
            f"They were admitted via {row.admission_location} by {row.admission_type}. "
            f"Baseline patient data shows: \nweight {row.weight_baseline}, height {row.height_baseline}, bmi {row.bmi_baseline}, blood pressure {row.blood_pressure_baseline}"
        )

    # # # 2. Logistics (The Journey)
    if 'logistics' in window_cuts and not window_cuts['logistics'].empty:
        paragraphs.append(logistics_text(window_cuts['logistics']))
        

    # 3. Monitoring (The Vitals)
    if 'monitoring' in window_cuts and not window_cuts['monitoring'].empty:
        mon = window_cuts['monitoring'].sort_values('timestamp')
        paragraphs.append(monitoring_text(mon))
        

    # 4. Investigations (The Labs)
    if 'investigations' in window_cuts and not window_cuts['investigations'].empty:
        inv = window_cuts['investigations']
        paragraphs.append(investigations_text(inv))

    # 5. Inputs (The Meds) --> Exception
    if 'inputs' in window_cuts and not window_cuts['inputs'].empty:
        meds = window_cuts['inputs']
        paragraphs.append(medication_text(meds))

    # 6. Interventions (The Actions) --> Exception
    if 'interventions' in window_cuts and not window_cuts['interventions'].empty:
        procs = window_cuts['interventions']
        paragraphs.append(interventions_text(procs))

    # 7. Conclusion (The Diagnoses)
    if 'conclusion' in window_cuts and not window_cuts['conclusion'].empty:
        diag_para = conclusion_text(window_cuts['conclusion'])
        if diag_para:
            paragraphs.append(diag_para)

    # Stitch all paragraphs into one clean block
    return '\n'.join(paragraphs)

In [264]:
def load_clinical_context(subject_ids):

    context = dict()

    context['identity'] = load_identity_pillar(subject_ids)
    print('identity pillar has been loaded')
    context['logistics'] = load_logistics_pillar(subject_ids)
    print('logistics pillar has been loaded')
    context['monitoring'] = load_monitoring_pillar(subject_ids)
    print('monitoring pillar has been loaded')
    context['investigations'] = load_investigations_pillar(subject_ids)
    print('investigations pillar has been loaded')
    context['interventions'] = load_interventions_pillar(subject_ids)
    print('interventions pillar has been loaded')
    context['inputs'] = load_inputs_pillar(subject_ids)
    print('inputs pillar has been loaded')
    context['conclusion'] = load_conclusion_pillar(subject_ids) # Exception
    print('conclusion pillar has been loaded')

    return context

def data_preparation(subject_ids=None):

    print('Loading discharges...')
    discharges = load_discharges(subject_ids)
    if 'storetime' in discharges.columns:
        discharges['storetime'] = pd.to_datetime(discharges['storetime'])
        discharges = discharges.sort_values(['subject_id', 'storetime'])
    
    print('Discharges were loaded.\nLoading the context...')
    clinical_data_dict = load_clinical_context(subject_ids) 
    cases_data = []

    for subject_id, group in discharges.groupby('subject_id'):
        subj_context = {k: df[df['subject_id'] == subject_id].copy() for k, df in clinical_data_dict.items()}
        prev_timestamp = pd.Timestamp.min
        for case in group.itertuples(index=False):
            current_timestamp = case.storetime
            window_cuts = {}
            
            for category, df in subj_context.items():
                if 'timestamp' in df.columns:
                    df['timestamp'] = pd.to_datetime(df['timestamp'])
                    mask = (df['timestamp'] <= current_timestamp) & (df['timestamp'] > prev_timestamp)
                    window_cuts[category] = df[mask]
            
            context_textual = context_builder_v2(window_cuts)
            cases_data.append({
                'subject_id': subject_id,
                'hadm_id': getattr(case, 'hadm_id', None),
                'context_textual': context_textual,
                'aaaa': window_cuts,
            })

            prev_timestamp = current_timestamp

    return pd.DataFrame(cases_data)

In [265]:
df = data_preparation([10000032]) # , 10000117  [10000032, 10001217, 10001725]

Loading discharges...
Discharges were loaded.
Loading the context...
identity pillar has been loaded
logistics pillar has been loaded
monitoring pillar has been loaded
investigations pillar has been loaded
interventions pillar has been loaded
inputs pillar has been loaded


  monitoring_df = pd.concat([icu_vitals, ward_vitals], axis=0, ignore_index=True)


conclusion pillar has been loaded


In [271]:
print(df.iloc[0]['context_textual'])

Patient is a 52 year old F (WHITE). They were admitted via TRANSFER FROM HOSPITAL by URGENT. Baseline patient data shows: 
weight 94, height 60, bmi 18.0, blood pressure 110/65
The patient was formally admitted to Transplant via TRANSFER FROM HOSPITAL. 
Finally, the patient was discharged HOME.
Abnormal Lab Findings:
- Urobilinogen: 4.0mg/dL
- Calcium, Total: 7.8mg/dL
- Creatinine: 0.3mg/dL
- Urea Nitrogen: 25.0mg/dL
- Asparate Aminotransferase (AST): 98.0IU/L
- INR(PT): 1.5
- Alanine Aminotransferase (ALT): 88.0IU/L
- MCH: 33.4pg
- MCV: 99.0fL
- Platelet Count: 71.0K/uL
- Red Blood Cells: 3.8m/uL
- PT: 16.6sec
- RBC, Ascites: 8800.0#/uL
- Eosinophils: 1.0%
- Mesothelial Cell: 2.0%
- Macrophage: 35.0%
- Lymphocytes: 49.0%
- Polys: 13.0%
Microbiology & Immunology:
- URINE (URINE CULTURE): MIXED BACTERIAL FLORA ( >= 3 COLONY TYPES), CONSISTENT WITH SKIN AND/OR GENITAL CONTAMINATION
- SWAB (R/O VANCOMYCIN RESISTANT ENTEROCOCCUS): No VRE isolated
- PERITONEAL FLUID (GRAM STAIN): 2+   (1-5 

In [267]:
for i in range(len(df)):
    print(df.iloc[i]['context_textual'])

Patient is a 52 year old F (WHITE). They were admitted via TRANSFER FROM HOSPITAL by URGENT. Baseline patient data shows: 
weight 94, height 60, bmi 18.0, blood pressure 110/65
The patient was formally admitted to Transplant via TRANSFER FROM HOSPITAL. 
Finally, the patient was discharged HOME.
Abnormal Lab Findings:
- Urobilinogen: 4.0mg/dL
- Calcium, Total: 7.8mg/dL
- Creatinine: 0.3mg/dL
- Urea Nitrogen: 25.0mg/dL
- Asparate Aminotransferase (AST): 98.0IU/L
- INR(PT): 1.5
- Alanine Aminotransferase (ALT): 88.0IU/L
- MCH: 33.4pg
- MCV: 99.0fL
- Platelet Count: 71.0K/uL
- Red Blood Cells: 3.8m/uL
- PT: 16.6sec
- RBC, Ascites: 8800.0#/uL
- Eosinophils: 1.0%
- Mesothelial Cell: 2.0%
- Macrophage: 35.0%
- Lymphocytes: 49.0%
- Polys: 13.0%
Microbiology & Immunology:
- URINE (URINE CULTURE): MIXED BACTERIAL FLORA ( >= 3 COLONY TYPES), CONSISTENT WITH SKIN AND/OR GENITAL CONTAMINATION
- SWAB (R/O VANCOMYCIN RESISTANT ENTEROCOCCUS): No VRE isolated
- PERITONEAL FLUID (GRAM STAIN): 2+   (1-5 

In [268]:
df.iloc[0]['aaaa']['inputs']

Unnamed: 0,subject_id,hadm_id,starttime,itemid,amount,amountuom,rate,rateuom,timestamp,label,input_type,route
9,10000032,22595853,2180-05-07 00:00:00,,3.0,mL,,,2180-05-07 00:00:00,Sodium Chloride 0.9% Flush,WARD_PRESCRIPTION,IV
13,10000032,22595853,2180-05-07 00:00:00,,0.5,mL,,,2180-05-07 00:00:00,Influenza Vaccine Quadrivalent,WARD_PRESCRIPTION,IM
17,10000032,22595853,2180-05-07 00:00:00,,5000.0,UNIT,,,2180-05-07 00:00:00,Heparin,WARD_PRESCRIPTION,SC
7,10000032,22595853,2180-05-07 01:00:00,,20.0,mg,,,2180-05-07 01:00:00,Furosemide,WARD_PRESCRIPTION,PO/NG
8,10000032,22595853,2180-05-07 01:00:00,,40.0,mEq,,,2180-05-07 01:00:00,Potassium Chloride,WARD_PRESCRIPTION,PO
10,10000032,22595853,2180-05-07 01:00:00,,400.0,mg,,,2180-05-07 01:00:00,Raltegravir,WARD_PRESCRIPTION,PO
12,10000032,22595853,2180-05-07 01:00:00,,500.0,mg,,,2180-05-07 01:00:00,Acetaminophen,WARD_PRESCRIPTION,PO/NG
14,10000032,22595853,2180-05-07 01:00:00,,2.0,PUFF,,,2180-05-07 01:00:00,Albuterol Inhaler,WARD_PRESCRIPTION,IH
15,10000032,22595853,2180-05-07 01:00:00,,1.0,TAB,,,2180-05-07 01:00:00,Emtricitabine-Tenofovir (Truvada),WARD_PRESCRIPTION,PO
16,10000032,22595853,2180-05-07 01:00:00,,50.0,mg,,,2180-05-07 01:00:00,Spironolactone,WARD_PRESCRIPTION,PO/NG


In [269]:
df.iloc[2]['aaaa']['conclusion']

Unnamed: 0,subject_id,hadm_id,timestamp,diagnosis_label,seq_num,icd_code
26,10000032,29079034,2180-07-25 17:55:00,Other iatrogenic hypotension,1,45829
27,10000032,29079034,2180-07-25 17:55:00,Chronic hepatitis C with hepatic coma,2,07044
28,10000032,29079034,2180-07-25 17:55:00,Cachexia,3,7994
29,10000032,29079034,2180-07-25 17:55:00,Hyposmolality and/or hyponatremia,4,2761
30,10000032,29079034,2180-07-25 17:55:00,Other ascites,5,78959
31,10000032,29079034,2180-07-25 17:55:00,Hyperpotassemia,6,2767
32,10000032,29079034,2180-07-25 17:55:00,Tobacco use disorder,7,3051
33,10000032,29079034,2180-07-25 17:55:00,Asymptomatic human immunodeficiency virus [HIV...,8,V08
34,10000032,29079034,2180-07-25 17:55:00,Do not resuscitate status,9,V4986
35,10000032,29079034,2180-07-25 17:55:00,"Other dependence on machines, supplemental oxygen",10,V462


In [270]:
if 'logistics' in window_cuts and not window_cuts['logistics'].empty:
        log = window_cuts['logistics'].sort_values('timestamp')
        units = " -> ".join(log['careunit'].unique())
        paragraphs.append(f"Clinical course involved the following units: {units}.")

NameError: name 'window_cuts' is not defined