In [None]:
import re
from pathlib import Path
from typing import Dict, List

import pandas as pd

In [None]:
MIMIC_III_DIR = (
    Path.cwd().parent / "data" / "physionet.org" / "files" / "mimiciii" / "1.4"
)

In [None]:
physician_notes = pd.read_csv(MIMIC_III_DIR / "physician_notes.csv")
hadm_id, _ = next(iter(physician_notes.groupby("HADM_ID")))

In [None]:
class MIMICTableToPRSBFiller:
    def __init__(self, mimic_dir: Path) -> None:
        self.patient_df = pd.read_csv(
            mimic_dir / "PATIENTS.csv", usecols=["SUBJECT_ID", "DOB", "GENDER"]
        )
        self.admissions_df = pd.read_csv(
            mimic_dir / "ADMISSIONS.csv",
            usecols=[
                "HADM_ID",
                "ADMITTIME",
                "DISCHTIME",
                "SUBJECT_ID",
                "ADMISSION_TYPE",
                "ADMISSION_LOCATION",
            ],
        )
        self.procedures_df = pd.read_csv(
            mimic_dir / "PROCEDUREEVENTS_MV.csv",
            usecols=["HADM_ID", "ITEMID", "LOCATION"],
        )
        self.items_df = pd.read_csv(
            mimic_dir / "D_ITEMS.csv", usecols=["ITEMID", "LABEL"]
        )
        self.prescriptions_df = pd.read_csv(
            mimic_dir / "PRESCRIPTIONS.csv",
            usecols=[
                "HADM_ID",
                "DRUG",
                "STARTDATE",
                "ENDDATE",
                "DOSE_VAL_RX",
                "DOSE_UNIT_RX",
                "ROUTE",
                "FORM_VAL_DISP",
                "FORM_UNIT_DISP",
            ],
        )

        self.admissions_df["ADMITTIME"] = pd.to_datetime(
            self.admissions_df["ADMITTIME"], format="%Y-%m-%d %H:%M:%S"
        )
        self.admissions_df["DISCHTIME"] = pd.to_datetime(
            self.admissions_df["DISCHTIME"], format="%Y-%m-%d %H:%M:%S"
        )
        self.procedures_df = pd.merge(
            self.procedures_df,
            self.items_df[["ITEMID", "LABEL"]],
            on="ITEMID",
            how="inner",
        )
        self.procedures_df["LOCATION"] = self.procedures_df["LOCATION"].astype(str)
        self.procedures_df["ANATOMICAL_SITE"] = self.procedures_df["LOCATION"].apply(
            self._location_to_anatomical_site
        )
        self.procedures_df["LATERALITY"] = self.procedures_df["LOCATION"].apply(
            self._location_to_laterality
        )
        self.procedures_df = self.procedures_df[
            ["HADM_ID", "LABEL", "ANATOMICAL_SITE", "LATERALITY"]
        ]
        self.procedures_df = self.procedures_df.drop_duplicates()

        # Remove times from dates as all set to 00:00:00
        self.prescriptions_df["STARTDATE"] = self.prescriptions_df["STARTDATE"].apply(
            lambda x: str(x).split(" ")[0]
        )
        self.prescriptions_df["ENDDATE"] = self.prescriptions_df["ENDDATE"].apply(
            lambda x: str(x).split(" ")[0]
        )

    @staticmethod
    def _location_to_anatomical_site(location: str) -> str:
        if location == "nan":
            return ""
        return re.sub("(?i)^(RIGHT|LEFT|RU|RL|LU|LL|R|L) ", "", location)

    @staticmethod
    def _location_to_laterality(location: str) -> str:
        if re.match("(?i)^(RIGHT|RU|RL|R) ", location):
            return "Right"
        elif re.match("(?i)^(LEFT|LU|LL|L) ", location):
            return "Left"
        else:
            return ""

    def fill_patient_demographics(self, hadm_id: str) -> Dict:
        hadm_id_admissions_series = self.admissions_df[
            self.admissions_df["HADM_ID"] == hadm_id
        ].iloc[0]
        subject_id = hadm_id_admissions_series["SUBJECT_ID"]
        hadm_id_patient_series = self.patient_df[
            self.patient_df["SUBJECT_ID"] == subject_id
        ].iloc[0]

        return {
            "date_of_birth": hadm_id_patient_series["DOB"].split(" ")[0],
            "gender": hadm_id_patient_series["GENDER"],
            "other_identifier": [subject_id],
        }

    def fill_admission_details(self, hadm_id: str) -> Dict:
        hadim_admission_series = self.admissions_df[
            self.admissions_df["HADM_ID"] == hadm_id
        ].iloc[0]
        return {
            "admission_method": hadim_admission_series["ADMISSION_TYPE"],
            "source_of_admission": hadim_admission_series["ADMISSION_LOCATION"],
            "date_time_of_admission": hadim_admission_series["ADMITTIME"].strftime(
                "%Y-%m-%d %H:%M:%S"
            ),
        }

    def fill_procedures(self, hadm_id: str) -> List[Dict]:
        hadim_procedures_df = self.procedures_df[
            self.procedures_df["HADM_ID"] == hadm_id
        ]
        return [
            {
                "procedure_name": procedure_name,
                "anatomical_site": " and".join(
                    site for site in procedure_df["ANATOMICAL_SITE"].values if site
                ),
                "laterality": " and".join(
                    lat for lat in procedure_df["LATERALITY"].values if lat
                ),
            }
            for procedure_name, procedure_df in hadim_procedures_df.groupby("LABEL")
        ]

    def _fill_discontinued_medication_item(
        self, most_recent_prescription: pd.Series
    ) -> Dict:
        return {
            "name_of_discontinued_medication": most_recent_prescription["DRUG"],
            "status": "Discontinued",
            "indication": "",
            "date_of_latest_change": most_recent_prescription["ENDDATE"],
            "description_of_amendment": "",
            "comment": "",
        }

    def _fill_medication_item(self, most_recent_prescription: pd.Series) -> Dict:
        return {
            "medication_name": most_recent_prescription["DRUG"],
            "form": most_recent_prescription["FORM_UNIT_DISP"],
            "quantity supplied": [
                f'{most_recent_prescription["FORM_VAL_DISP"]} {most_recent_prescription["FORM_UNIT_DISP"]}'
            ],
            "route": [most_recent_prescription["ROUTE"]],
            "site": "",
            "method": "",
            "dose_amount_description": (
                f'{most_recent_prescription["DOSE_VAL_RX"]} {most_recent_prescription["DOSE_UNIT_RX"]}'
            ),
            "dose_timing_description": "",
            "dose_direction_duration": "",
            "additional_instruction": "",
        }

    def _fill_medication_change_summary_cluster(
        self, most_recent_prescription: pd.Series, status: str
    ) -> Dict:
        return {
            "status": status,
            "indication": "",
            "date_of_latest_change": most_recent_prescription["STARTDATE"],
            "description_of_amendment": "",
        }

    def fill_medications_and_medical_devices(self, hadm_id: str) -> Dict:
        hadm_id_admissions_series = self.admissions_df[
            self.admissions_df["HADM_ID"] == hadm_id
        ].iloc[0]
        admission_date = hadm_id_admissions_series["ADMITTIME"].strftime("%Y-%m-%d")
        discharge_date = hadm_id_admissions_series["DISCHTIME"].strftime("%Y-%m-%d")

        hadm_id_prescriptions_df = self.prescriptions_df[
            self.prescriptions_df["HADM_ID"] == hadm_id
        ]

        medication_item_entry = []
        medication_discontinued_item_entry = []
        for _, drug_df in hadm_id_prescriptions_df.groupby("DRUG"):
            initial_prescription = drug_df.sort_values(
                "STARTDATE", ascending=True
            ).iloc[0]
            most_recent_prescription = drug_df.sort_values(
                "ENDDATE", ascending=False
            ).iloc[0]

            # Discharge summary should NOT include details of medications
            # that were both started and stopped in hospitals
            if (
                initial_prescription["STARTDATE"] > admission_date
                and most_recent_prescription["ENDDATE"] < discharge_date
            ):
                continue
            # Medications that were current at the time of admission which
            # were discontinued either during the admission or at the time of discharge
            elif most_recent_prescription["ENDDATE"] < discharge_date:
                medication_discontinued_item_entry.append(
                    self._fill_discontinued_medication_item(most_recent_prescription)
                )
            else:
                medication_item = self._fill_medication_item(most_recent_prescription)

                # Medicine present on discharge but not on admission
                if initial_prescription["STARTDATE"] != admission_date:
                    medication_item["medication_change_summary_cluster"] = (
                        self._fill_medication_change_summary_cluster(
                            most_recent_prescription, "Added"
                        )
                    )
                # Medicine present on both admission and discharge but with amendment(s) since admission
                elif len(drug_df) > 1:
                    medication_item["medication_change_summary_cluster"] = (
                        self._fill_medication_change_summary_cluster(
                            most_recent_prescription, "Amended"
                        )
                    )
                medication_item_entry.append(medication_item)
        return {
            "medication_item_entry": medication_item_entry,
            "medication_discontinued_item_entry": medication_discontinued_item_entry,
        }

    def fill_prsb_discharge_summary(self, hadm_id: str) -> Dict:
        return {
            "patient_demographics": self.fill_patient_demographics(hadm_id),
            "admission_details": self.fill_admission_details(hadm_id),
            "procedures": self.fill_procedures(hadm_id),
            "medications_and_medical_devices": (
                self.fill_medications_and_medical_devices(hadm_id)
            ),
        }

In [None]:
test = MIMICTableToPRSBFiller(MIMIC_III_DIR)

In [None]:
test.fill_prsb_discharge_summary(hadm_id)

In [None]:
hadim_admission_series = test.admissions_df[
    test.admissions_df["HADM_ID"] == hadm_id
].iloc[0]
hadim_admission_series["ADMITTIME"].strftime("%Y-%m-%d")

In [None]:
hadm_id_patient = test.admissions_df[test.admissions_df["HADM_ID"] == hadm_id]
hadm_id_patient

In [None]:
.iloc[0]

subject_id = hadm_id_admissions_df["SUBJECT_ID"].values[0]

hadm_id_patient = test.patient_df[test.patient_df["SUBJECT_ID"] == subject_id].iloc[0]