In [None]:
import re
from pathlib import Path

import numpy as np
import pandas as pd

In [None]:
MIMIC_III_DIR = (
    Path.cwd().parent / "data" / "physionet.org" / "files" / "mimiciii" / "1.4"
)

In [None]:
physician_notes = pd.read_csv(MIMIC_III_DIR / "physician_notes.csv")
patient_df = pd.read_csv(MIMIC_III_DIR / "PATIENTS.csv")
admissions_df = pd.read_csv(MIMIC_III_DIR / "ADMISSIONS.csv")
procedures_df = pd.read_csv(MIMIC_III_DIR / "PROCEDUREEVENTS_MV.csv")
items_df = pd.read_csv(MIMIC_III_DIR / "D_ITEMS.csv")

In [None]:
hadm_id, _ = next(iter(physician_notes.groupby("HADM_ID")))

In [None]:
subject_id = admissions_df[admissions_df["HADM_ID"] == hadm_id]["SUBJECT_ID"].values[0]
subject_id

In [None]:
sample_patient_row = patient_df[patient_df["SUBJECT_ID"] == subject_id]
dob = sample_patient_row["DOB"].values[0].split(" ")[0]
gender = sample_patient_row["GENDER"].values[0]
other_identifier = [subject_id]
dob, gender

In [None]:
admissions_df["ADMITTIME"] = pd.to_datetime(
    admissions_df["ADMITTIME"], format="%Y-%m-%d %H:%M:%S"
)
admissions_df["DISCHTIME"] = pd.to_datetime(
    admissions_df["DISCHTIME"], format="%Y-%m-%d %H:%M:%S"
)

In [None]:
admission = admissions_df[admissions_df["HADM_ID"] == hadm_id]
admission_method = admission["ADMISSION_TYPE"].values[0]
source_of_admission = admission["ADMISSION_LOCATION"].values[0]
date_time_of_admission = admission["ADMITTIME"].values[0]
admission_method, source_of_admission, date_time_of_admission

In [None]:
procedures_df.head()

In [None]:
procedures_df.columns

In [None]:
def location_to_anatomical_site(location: str) -> str:
    if location == "nan":
        return ""
    return re.sub("(?i)^(RIGHT|LEFT|RU|RL|LU|LL|R|L) ", "", location)


def location_to_laterality(location: str) -> str:
    if re.match("(?i)^(RIGHT|RU|RL|R) ", location):
        return "Right"
    elif re.match("(?i)^(LEFT|LU|LL|L) ", location):
        return "Left"
    else:
        return ""


location_to_laterality("Lumbar Arm Upper")

In [None]:
filtered_procedures_df = pd.merge(
    procedures_df, items_df[["ITEMID", "LABEL"]], on="ITEMID", how="inner"
)
filtered_procedures_df["LOCATION"] = filtered_procedures_df["LOCATION"].astype(str)
filtered_procedures_df["ANATOMICAL_SITE"] = filtered_procedures_df["LOCATION"].apply(
    location_to_anatomical_site
)
filtered_procedures_df["LATERALITY"] = filtered_procedures_df["LOCATION"].apply(
    location_to_laterality
)
filtered_procedures_df = filtered_procedures_df[
    ["HADM_ID", "LABEL", "ANATOMICAL_SITE", "LATERALITY", "ORDERCATEGORYNAME"]
]
filtered_procedures_df = filtered_procedures_df.drop_duplicates()
filtered_procedures_df.head()

In [None]:
procedure_name_to_info = {}
for _, procedure_series in filtered_procedures_df[
    filtered_procedures_df["HADM_ID"] == hadm_id
].iterrows():
    if procedure_series["LABEL"] not in procedure_name_to_info:
        procedure_name_to_info[procedure_series["LABEL"]] = {
            "anatomical_site": procedure_series["ANATOMICAL_SITE"],
            "laterality": procedure_series["LATERALITY"],
        }
    else:
        if procedure_series["ANATOMICAL_SITE"]:
            procedure_name_to_info[procedure_series["LABEL"]][
                "anatomical_site"
            ] += f" and {procedure_series['ANATOMICAL_SITE']}"
        if procedure_series["LATERALITY"]:
            procedure_name_to_info[procedure_series["LABEL"]][
                "laterality"
            ] += f" and {procedure_series['LATERALITY']}"
procedures = [
    {
        "name": procedure_name,
        "anatomical_site": info["anatomical_site"],
        "laterality": info["laterality"],
    }
    for procedure_name, info in procedure_name_to_info.items()
]
procedures

In [None]:
prescriptions_df = pd.read_csv(MIMIC_III_DIR / "PRESCRIPTIONS.csv")

In [None]:
prescriptions_df["STARTDATE"] = prescriptions_df["STARTDATE"].apply(
    lambda x: str(x).split(" ")[0]
)
prescriptions_df["ENDDATE"] = prescriptions_df["ENDDATE"].apply(
    lambda x: str(x).split(" ")[0]
)

In [None]:
prescriptions_df.head()

In [None]:
discharge_date = np.datetime_as_string(admission["DISCHTIME"].values[0], unit="D")
admission_date = np.datetime_as_string(admission["ADMITTIME"].values[0], unit="D")
admission_date, discharge_date

In [None]:
hadm_id_prescriptions_df = prescriptions_df[prescriptions_df["HADM_ID"] == hadm_id]
len(hadm_id_prescriptions_df)

In [None]:
hadm_id_prescriptions_df = hadm_id_prescriptions_df[
    ~(
        (hadm_id_prescriptions_df["STARTDATE"] > admission_date)
        & (hadm_id_prescriptions_df["ENDDATE"] < discharge_date)
    )
]
len(hadm_id_prescriptions_df)

In [None]:
medication_item_entry = []
medication_discontinued_item_entry = []
for drug_name, drug_df in hadm_id_prescriptions_df.groupby("DRUG"):
    most_recent_prescription = drug_df.sort_values("STARTDATE", ascending=False).iloc[0]

    if most_recent_prescription["ENDDATE"] < discharge_date:
        medication_discontinued_item_entry.append(
            {
                "name_of_discontinued_medication": drug_name,
                "status": "",
                "indication": "",
                "date_of_latest_change": drug_df["ENDDATE"].max(),
                "description_of_amendment": "Stopped",
                "comment": "",
            }
        )
    else:
        medication_item = {
            "medication_name": most_recent_prescription["DRUG"],
            "form": (
                most_recent_prescription["FORM_UNIT_DISP"]
                if most_recent_prescription["FORM_UNIT_DISP"] != "mL"
                else ""
            ),
            "quantity supplied": [
                f'{most_recent_prescription["FORM_VAL_DISP"]} {most_recent_prescription["FORM_UNIT_DISP"]}'
            ],
            "route": [most_recent_prescription["ROUTE"]],
            "site": "",
            "method": "",
            "dose_directions_description": "",
            "dose_amount_description": (
                f'{most_recent_prescription["DOSE_VAL_RX"]} {most_recent_prescription["DOSE_UNIT_RX"]}'
            ),
            "dose_timing_description": "",
            "dose_direction_duration": "",
            "additional_instruction": "",
        }
        if len(drug_df) > 1:
            medication_item["medication_change_summary_cluster"] = {
                "status": "",
                "indication": "",
                "date_of_latest_change": most_recent_prescription["STARTDATE"],
                "description_of_amendment": "",
            }

        medication_item_entry.append(medication_item)
medication_item_entry[:2]

In [None]:
medication_discontinued_item_entry