In [1]:
import json
import csv
from datetime import datetime

# Step 1: Read and Understand JSON Data

# Function to read and parse JSONL files
def read_jsonl(file_path):
    with open(file_path, 'r') as file:
        for line in file:
            yield json.loads(line)

# Step 2: Creating Array of Conditions for Each Patient

patient_conditions = {}
for condition in read_jsonl(r"F:\mugs_assignment\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\Condition.ndjson"):
    patient_id = condition['subject']['reference'].split('/')[-1]
    if patient_id not in patient_conditions:
        patient_conditions[patient_id] = []
    patient_conditions[patient_id].append(condition)

# Step 3: Assigning Estimated Time for Conditions

encounter_start_times = {}
for encounter in read_jsonl(r"F:\mugs_assignment\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\Encounter.ndjson"):
    encounter_id = encounter['id']
    start_time = datetime.strptime(encounter['period']['start'], '%Y-%m-%dT%H:%M:%S%z')
    encounter_start_times[encounter_id] = start_time

for encounter in read_jsonl(r"F:\mugs_assignment\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\EncounterICU.ndjson"):
    encounter_id = encounter['id']
    start_time = datetime.strptime(encounter['period']['start'], '%Y-%m-%dT%H:%M:%S%z')
    encounter_start_times[encounter_id] = start_time

# Step 4: Creating CSV File
with open('output_file.csv', 'w', newline='') as csvfile:
    fieldnames = ['pid', 'time', 'code', 'description']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    
    for patient_id, conditions in patient_conditions.items():
        for condition in conditions:
            encounter_id = condition['encounter']['reference'].split('/')[-1]
            if encounter_id in encounter_start_times:
                start_time = encounter_start_times[encounter_id]
                condition_code = condition['code']['coding'][0]['code']
                condition_description = condition['code']['coding'][0]['display']
                writer.writerow({'pid': patient_id, 'time': start_time.timestamp(), 'code': condition_code, 'description': condition_description})
