In [1]:
#Importing Libraries

import json
import csv
from datetime import datetime

In [2]:
# Function to read and print sample JSON objects from a file
def print_sample_json(file_path, num_samples=3):
    with open(file_path, 'r') as file:
        for i, line in enumerate(file):
            if i >= num_samples:
                break
            print(json.loads(line))
            print()

# Print the first JSON object from each file
print("Patient.ndjson:")
print_sample_json("Patient.ndjson")

print("\nCondition.ndjson:")
print_sample_json("Condition.ndjson")

print("\nEncounter.ndjson:")
print_sample_json("Encounter.ndjson")

print("\nEncounterICU.ndjson:")
print_sample_json("EncounterICU.ndjson")


Patient.ndjson:
{'resourceType': 'Patient', 'id': '0a8eebfd-a352-522e-89f0-1d4a13abdebc', 'meta': {'versionId': '1', 'lastUpdated': '2022-05-24T15:14:55.471-04:00', 'source': '#V0XlSRZTewCRRSjY', 'profile': ['http://fhir.mimic.mit.edu/StructureDefinition/mimic-patient']}, 'text': {'status': 'generated', 'div': '<div xmlns="http://www.w3.org/1999/xhtml"><div class="hapiHeaderText"><b>PATIENT_10000032 </b></div><table class="hapiPropertyTable"><tbody><tr><td>Identifier</td><td>10000032</td></tr><tr><td>Date of birth</td><td><span>06 May 2128</span></td></tr></tbody></table></div>'}, 'extension': [{'url': 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race', 'extension': [{'url': 'ombCategory', 'valueCoding': {'system': 'urn:oid:2.16.840.1.113883.6.238', 'code': '2106-3', 'display': 'White'}}, {'url': 'text', 'valueString': 'White'}]}, {'url': 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity', 'extension': [{'url': 'ombCategory', 'valueCoding': {'system': 'urn:

In [3]:
# Function to read ndjson file and return a list of JSON objects
def read_ndjson(file_path):
    with open(file_path, 'r') as file:
        return [json.loads(line) for line in file]

# Function to convert ISO 8601 datetime string to Unix timestamp
def iso8601_to_unix(iso_string):
    dt = datetime.fromisoformat(iso_string)
    return int(dt.timestamp())

# Read Patient.ndjson to create a dictionary with patient_id as key and patient details as value
def read_patients(file_path):
    patients = {}
    patient_data = read_ndjson(file_path)
    for patient in patient_data:
        patients[patient['id']] = patient
    return patients

# Read Patient.ndjson to create a dictionary with patient_id as key and patient details as value
patients = read_patients("Patient.ndjson")

# Initialize a dictionary to store conditions associated with each patient
patient_conditions = {}

# Read Condition.ndjson and organize conditions by patient_id
condition_data = read_ndjson("Condition.ndjson")
for condition in condition_data:
    patient_id = condition['subject']['reference'].split('/')[-1]
    if patient_id in patients:  # Check if patient ID exists in the patients dictionary
        if patient_id not in patient_conditions:
            patient_conditions[patient_id] = []
        patient_conditions[patient_id].append(condition)

# Read Encounter.ndjson and EncounterICU.ndjson to create a dictionary with encounter_id as key and encounter details as value
encounters = {}
encounter_data = read_ndjson("Encounter.ndjson")
encounter_icu_data = read_ndjson("EncounterICU.ndjson")
for encounter in encounter_data:
    encounters[encounter['id']] = encounter
for encounter in encounter_icu_data:
    encounters[encounter['id']] = encounter

# Initialize a dictionary to store estimated time for each condition
condition_estimated_time = {}

# Read Condition.ndjson and assign estimated time for each condition using corresponding encounter
for condition in condition_data:
    encounter_id = condition['encounter']['reference'].split('/')[-1]
    if encounter_id in encounters:
        start_time = encounters[encounter_id]['period']['start']
        condition_estimated_time[condition['id']] = iso8601_to_unix(start_time)
    else:
        condition_estimated_time[condition['id']] = None

# Write data to CSV file
with open('patient_conditions.csv', mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=['pid', 'time', 'code', 'description'])
    writer.writeheader()
    for patient_id, conditions in patient_conditions.items():
        for condition in conditions:
            time = condition_estimated_time[condition['id']]
            code = condition['code']['coding'][0]['code']
            description = condition['code']['coding'][0]['display']
            writer.writerow({'pid': patient_id, 'time': time, 'code': code, 'description': description})


In [4]:
import pandas as pd
df = pd.read_csv('patient_conditions.csv')
df.head()

Unnamed: 0,pid,time,code,description
0,b410dd44-7d65-56f9-974f-2751e8aa80e2,5616018000,Z8546,Personal history of malignant neoplasm of pros...
1,b410dd44-7d65-56f9-974f-2751e8aa80e2,5387190060,V1072,Personal history of hodgkin's disease
2,b410dd44-7d65-56f9-974f-2751e8aa80e2,5639393940,Z7902,Long term (current) use of antithrombotics/ant...
3,b410dd44-7d65-56f9-974f-2751e8aa80e2,5426582400,49390,"Asthma, unspecified type, unspecified"
4,b410dd44-7d65-56f9-974f-2751e8aa80e2,5426582400,2724,Other and unspecified hyperlipidemia
