In [None]:
import json
import pandas as pd

In [37]:
def extract_patient_data(patient_json):
    extracted_data = {
        "PatientID": patient_json.get("id", ""),
        "BirthDate": patient_json.get("birthDate", ""),
        "DeceasedDateTime": patient_json.get("deceasedDateTime", ""),
        "MaritalStatus": patient_json.get("maritalStatus", {}).get("coding", [{}])[0].get("code", ""),
        "Race": None,
        "Ethnicity": None,
        "Language": None
    }

    # Extract race and ethnicity from extensions
    if "extension" in patient_json:
        for ext in patient_json["extension"]:
            if "url" in ext:
                if "us-core-race" in ext["url"]:  # Check if it's the race field
                    extracted_data["Race"] = ext.get("extension", [{}])[1].get("valueString", "")
                if "us-core-ethnicity" in ext["url"]:  # Check if it's the ethnicity field
                    extracted_data["Ethnicity"] = ext.get("extension", [{}])[1].get("valueString", "")

    # Extract communication language
    if "communication" in patient_json:
        extracted_data["Language"] = (
            patient_json.get("communication", [{}])[0]
            .get("language", {})
            .get("coding", [{}])[0]
            .get("code", "")
        )

    return extracted_data

# Load JSON file
with open("./data/Patient.json", "r") as f:
    patients_data = json.load(f)

# Extract required fields
cleaned_patients = [extract_patient_data(patient) for patient in patients_data]

# Convert to DataFrame
df = pd.DataFrame(cleaned_patients)

# Save the cleaned data
df.to_csv("csv/cleaned_patients.csv", index=False)


In [39]:
def extract_condition_data(condition_data_json):
    extracted_data = {
        "ConditionID": condition_data_json.get("id", ""),
        "PatientID": condition_data_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "Category": condition_data_json.get("category", [{}])[0].get("coding", [{}])[0].get("code", ""),
        "ConditionDescription": condition_data_json.get("code", {}).get("coding", [{}])[0].get("display", ""),
    }
    return extracted_data

with open("./data/Condition.json", "r") as f:
    conditions_data = json.load(f)

cleaned_conditions = [extract_condition_data(condition) for condition in conditions_data]

df_conditions = pd.DataFrame(cleaned_conditions)

df_conditions.to_csv("csv/cleaned_conditions.csv", index=False)

# df_merged = df_patients.merge(df_conditions, on="PatientID", how="left")

# df_merged.to_csv("merged_patients_conditions.csv", index=False)




In [40]:
def extract_procedure_data(procedure_json):
    return {
        "ProcedureID": procedure_json.get("id", ""),
        "PatientID": procedure_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterID": procedure_json.get("encounter", {}).get("reference", "").replace("Encounter/", ""),
        "ProcedureCode": procedure_json.get("code", {}).get("coding", [{}])[0].get("code", ""),
        "ProcedureDescription": procedure_json.get("code", {}).get("coding", [{}])[0].get("display", ""),
        "Status": procedure_json.get("status", ""),
        "PerformedDate": procedure_json.get("performedDateTime", "")
    }

with open("./data/Procedure.json", "r") as f:
    procedures_data = json.load(f)

cleaned_procedures = [extract_procedure_data(procedure) for procedure in procedures_data]

df_procedures = pd.DataFrame(cleaned_procedures)

df_procedures.to_csv("csv/cleaned_procedures.csv", index=False)

In [41]:
def extract_procedure_icu_data(procedure_json):
    return {
        "ProcedureICUID": procedure_json.get("id", ""),
        "PatientID": procedure_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterID": procedure_json.get("encounter", {}).get("reference", "").replace("Encounter/", ""),
        "ProcedureCategory": procedure_json.get("category", {}).get("coding", [{}])[0].get("code", ""),
        "ProcedureCode": procedure_json.get("code", {}).get("coding", [{}])[0].get("code", ""),
        "ProcedureDescription": procedure_json.get("code", {}).get("coding", [{}])[0].get("display", ""),
        "PerformedStartDate": procedure_json.get("performedPeriod", {}).get("start", ""),
        "PerformedEndDate": procedure_json.get("performedPeriod", {}).get("end", ""),
        "BodySite": procedure_json.get("bodySite", [{}])[0].get("coding", [{}])[0].get("code", "")
    }

# Load ICU Procedure JSON file
with open("./data/ProcedureICU.json", "r") as f:
    procedures_icu_data = json.load(f)

# Extract useful fields
cleaned_procedures_icu = [extract_procedure_icu_data(procedure) for procedure in procedures_icu_data]

# Convert to DataFrame
df_procedures_icu = pd.DataFrame(cleaned_procedures_icu)

# Save to CSV
df_procedures_icu.to_csv("csv/cleaned_procedures_icu.csv", index=False)

In [49]:
def extract_encounter_data(encounter_json):
    return {
        "EncounterID": encounter_json.get("id", ""),
        "PatientID": encounter_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterClass": encounter_json.get("class", {}).get("code", ""),
        "EncounterType": encounter_json.get("type", [{}])[0].get("coding", [{}])[0].get("display", ""),
        "ServiceType": encounter_json.get("serviceType", {}).get("coding", [{}])[0].get("code", ""),
        "AdmissionSource": encounter_json.get("hospitalization", {}).get("admitSource", {}).get("coding", [{}])[0].get("code", ""),
        "DischargeDisposition": encounter_json.get("hospitalization", {}).get("dischargeDisposition", {}).get("coding", [{}])[0].get("code", ""),
        "EncounterStartDate": encounter_json.get("period", {}).get("start", ""),
        "EncounterEndDate": encounter_json.get("period", {}).get("end", "")
    }

# Load Encounter JSON file
with open("./data/Encounter.json", "r") as f:
    encounters_data = json.load(f)

# Extract useful fields
cleaned_encounters = [extract_encounter_data(encounter) for encounter in encounters_data]

# Convert to DataFrame
df_encounters = pd.DataFrame(cleaned_encounters)

# Save to CSV
df_encounters.to_csv("csv/cleaned_encounters.csv", index=False)


In [43]:
def extract_medication_data(medication_json):
    medication_name = None
    medication_ndc = None
    medication_formulary_cd = None
    ingredients = []

    if "identifier" in medication_json:
        for identifier in medication_json["identifier"]:
            if identifier["system"] == "http://fhir.mimic.mit.edu/CodeSystem/medication-name":
                medication_name = identifier["value"]
            elif identifier["system"] == "http://fhir.mimic.mit.edu/CodeSystem/medication-ndc":
                medication_ndc = identifier["value"]
            elif identifier["system"] == "http://fhir.mimic.mit.edu/CodeSystem/medication-formulary-drug-cd":
                medication_formulary_cd = identifier["value"]

    # Extract ingredient references if available
    if "ingredient" in medication_json:
        ingredients = [ingredient["itemReference"]["reference"] for ingredient in medication_json["ingredient"]]

    return {
        "MedicationID": medication_json.get("id", ""),
        "MedicationName": medication_name,
        "MedicationNDC": medication_ndc,
        "MedicationFormularyCode": medication_formulary_cd,
        "Ingredients": ", ".join(ingredients) if ingredients else None
    }

# Load Medication JSON file
with open("./data/Medication.json", "r") as f:
    medications_data = json.load(f)

# Extract useful fields
cleaned_medications = [extract_medication_data(medication) for medication in medications_data]

# Convert to DataFrame
df_medications = pd.DataFrame(cleaned_medications)

# Save to CSV
df_medications.to_csv("csv/cleaned_medications.csv", index=False)


In [44]:
def extract_medication_administration_data(med_admin_json):
    return {
        "MedicationAdminID": med_admin_json.get("id", ""),
        "PatientID": med_admin_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterID": med_admin_json.get("context", {}).get("reference", "").replace("Encounter/", ""),
        "Status": med_admin_json.get("status", ""),
        "MedicationCode": med_admin_json.get("medicationCodeableConcept", {}).get("coding", [{}])[0].get("code", ""),
        "EffectiveDate": med_admin_json.get("effectiveDateTime", ""),
        "DosageValue": med_admin_json.get("dosage", {}).get("dose", {}).get("value", ""),
        "DosageUnit": med_admin_json.get("dosage", {}).get("dose", {}).get("unit", ""),
        "Method": med_admin_json.get("dosage", {}).get("method", {}).get("coding", [{}])[0].get("code", "")
    }

with open("./data/MedicationAdministration.json", "r") as f:
    med_admin_data = json.load(f)

# Extract useful fields
cleaned_med_admin = [extract_medication_administration_data(med_admin) for med_admin in med_admin_data]

# Convert to DataFrame
df_med_admin = pd.DataFrame(cleaned_med_admin)

# Save to CSV
df_med_admin.to_csv("csv/cleaned_medication_administration.csv", index=False)


In [46]:
def extract_medication_dispense_data(med_disp_json):
    return {
        "MedicationDispenseID": med_disp_json.get("id", ""),
        "PatientID": med_disp_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterID": med_disp_json.get("context", {}).get("reference", "").replace("Encounter/", ""),
        "Status": med_disp_json.get("status", ""),
        "MedicationName": med_disp_json.get("medicationCodeableConcept", {}).get("coding", [{}])[0].get("code", ""),
        "AuthorizingPrescription": med_disp_json.get("authorizingPrescription", [{}])[0].get("reference", ""),
        "Route": med_disp_json.get("dosageInstruction", [{}])[0].get("route", {}).get("coding", [{}])[0].get("code", ""),
        "Frequency": med_disp_json.get("dosageInstruction", [{}])[0].get("timing", {}).get("code", {}).get("coding", [{}])[0].get("code", "")
    }

# Load MedicationDispense JSON file
with open("./data/MedicationDispense.json", "r") as f:
    med_disp_data = json.load(f)

# Extract useful fields
cleaned_med_disp = [extract_medication_dispense_data(med_disp) for med_disp in med_disp_data]

# Convert to DataFrame
df_med_disp = pd.DataFrame(cleaned_med_disp)

# Save to CSV
df_med_disp.to_csv("csv/cleaned_medication_dispense.csv", index=False)

In [47]:
def extract_medication_request_data(med_req_json):
    return {
        "MedicationRequestID": med_req_json.get("id", ""),
        "PatientID": med_req_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterID": med_req_json.get("encounter", {}).get("reference", "").replace("Encounter/", ""),
        "Status": med_req_json.get("status", ""),
        "Intent": med_req_json.get("intent", ""),
        "MedicationReferenceID": med_req_json.get("medicationReference", {}).get("reference", ""),
        "AuthoredDate": med_req_json.get("authoredOn", ""),
        "DosageDescription": med_req_json.get("dosageInstruction", [{}])[0].get("text", ""),
        "Route": med_req_json.get("dosageInstruction", [{}])[0].get("route", {}).get("coding", [{}])[0].get("code", ""),
        "DoseQuantity": med_req_json.get("dosageInstruction", [{}])[0].get("doseAndRate", [{}])[0].get("doseQuantity", {}).get("value", ""),
        "DoseUnit": med_req_json.get("dosageInstruction", [{}])[0].get("doseAndRate", [{}])[0].get("doseQuantity", {}).get("unit", ""),
        "Frequency": med_req_json.get("dosageInstruction", [{}])[0].get("timing", {}).get("code", {}).get("coding", [{}])[0].get("code", ""),
        "DispenseValidityStart": med_req_json.get("dispenseRequest", {}).get("validityPeriod", {}).get("start", ""),
        "DispenseValidityEnd": med_req_json.get("dispenseRequest", {}).get("validityPeriod", {}).get("end", "")
    }

with open("./data/MedicationRequest.json", "r") as f:
    med_req_data = json.load(f)

cleaned_med_req = [extract_medication_request_data(med_req) for med_req in med_req_data]

# Convert to DataFrame
df_med_req = pd.DataFrame(cleaned_med_req)

# Save to CSV
df_med_req.to_csv("csv/cleaned_medication_request.csv", index=False)

In [48]:
def extract_medication_admin_icu_data(med_admin_json):
    return {
        "MedicationICUAdminID": med_admin_json.get("id", ""),
        "PatientID": med_admin_json.get("subject", {}).get("reference", "").replace("Patient/", ""),
        "EncounterID": med_admin_json.get("context", {}).get("reference", "").replace("Encounter/", ""),
        "Status": med_admin_json.get("status", ""),
        "Category": med_admin_json.get("category", {}).get("coding", [{}])[0].get("code", ""),
        "MedicationCode": med_admin_json.get("medicationCodeableConcept", {}).get("coding", [{}])[0].get("code", ""),
        "MedicationName": med_admin_json.get("medicationCodeableConcept", {}).get("coding", [{}])[0].get("display", ""),
        "EffectiveStartDate": med_admin_json.get("effectiveDateTime", med_admin_json.get("effectivePeriod", {}).get("start", "")),
        "EffectiveEndDate": med_admin_json.get("effectivePeriod", {}).get("end", ""),
        "AdministrationMethod": med_admin_json.get("dosage", {}).get("method", {}).get("coding", [{}])[0].get("code", ""),
        "DosageValue": med_admin_json.get("dosage", {}).get("dose", {}).get("value", ""),
        "DosageUnit": med_admin_json.get("dosage", {}).get("dose", {}).get("unit", ""),
        "RateQuantity": med_admin_json.get("dosage", {}).get("rateQuantity", {}).get("value", ""),
        "RateUnit": med_admin_json.get("dosage", {}).get("rateQuantity", {}).get("unit", "")
    }

with open("./data/MedicationAdministrationICU.json", "r") as f:
    med_admin_icu_data = json.load(f)

# Extract useful fields
cleaned_med_admin_icu = [extract_medication_admin_icu_data(med_admin) for med_admin in med_admin_icu_data]

# Convert to DataFrame
df_med_admin_icu = pd.DataFrame(cleaned_med_admin_icu)

# Save to CSV
df_med_admin_icu.to_csv("csv/cleaned_medication_administration_icu.csv", index=False)
