In [1]:
import os
import json
import random
from collections import defaultdict

# **Folder tempat data FHIR yang sudah diproses**
data_folder = "synthea/output/fhir"
output_folder = "synthea/output/processed/"
os.makedirs(output_folder, exist_ok=True)

# **Mengumpulkan semua file dalam folder**
json_files = [f for f in os.listdir(data_folder) if f.endswith(".json")]

# **Dictionary untuk menyimpan resource berdasarkan tipe**
resources = defaultdict(list)

# **Membaca Semua File dan Memisahkan Resource**
for file in json_files:
    file_path = os.path.join(data_folder, file)
    print(f"📂 Membaca file: {file}")

    with open(file_path, "r", encoding="utf-8") as f:
        try:
            data = json.load(f)

            # Jika JSON adalah FHIR Bundle, cek `entry`
            if "entry" in data:
                for entry in data["entry"]:
                    resource = entry["resource"]
                    resource_type = resource["resourceType"]
                    resources[resource_type].append(resource)
            else:
                # Jika JSON langsung berupa satu resource
                resource_type = data["resourceType"]
                resources[resource_type].append(data)

        except json.JSONDecodeError as e:
            print(f"⚠️ Error membaca JSON di {file}: {e}")

# **Mendapatkan Daftar Pasien**
patients = resources["Patient"]
patient_ids = [p["id"] for p in patients]
patient_data = {p["id"]: p for p in patients}  # Simpan detail pasien

print(f"\n✅ Jumlah pasien ditemukan: {len(patients)}")


📂 Membaca file: Adell482_Runolfsdottir785_7da148be-b73e-73e3-ed5c-67d7c712a253.json
📂 Membaca file: Alease857_Jerde200_d4f1d88b-aecc-493e-2977-44a72e0de2d9.json
📂 Membaca file: Alesha810_Sanford861_9f7675c1-1f29-10ac-92e5-8aaf367f05c3.json
📂 Membaca file: Alfred550_Ruecker817_839e461d-9a4d-a110-1fe9-97bd16378bfd.json
📂 Membaca file: Allene83_Cathryn51_Kerluke267_7e101445-eafd-cd17-0e6b-57f85baa3f44.json
📂 Membaca file: Alvaro283_Robel940_c498075d-c7cc-69ba-23c3-0e6a6c188592.json
📂 Membaca file: Angelo118_Halvorson124_3ada63c8-2ff1-1032-9ed5-39b5fba54c89.json
📂 Membaca file: Anitra287_Elena945_Doyle959_06671679-d2c8-8426-da09-7017cc0bda53.json
📂 Membaca file: Berna338_Paulita78_Bode78_4f370d5a-e3c8-369d-d70e-1f57ff23406a.json
📂 Membaca file: Bev675_Bins636_876d955b-0c84-3d32-cba7-f342a8432b47.json
📂 Membaca file: Brett333_Rutherford999_423b2ac4-4424-4faa-743c-cf41352c3845.json
📂 Membaca file: Burma963_Terry864_86f20cc9-62f7-e23e-c66b-a8acc5819976.json
📂 Membaca file: Caitlin552_Gislason

In [2]:

# **Probabilitas penyakit keturunan berdasarkan data epidemiologi**
disease_probabilities = {
    "Diabetes": 0.125,
    "Hypertension": 0.35,
    "Cancer": 0.075,
    "Heart Disease": 0.225,
    "Alzheimer": 0.03,
    "Asthma": 0.10
}

# **Fungsi untuk Membuat RelatedPerson**
def create_related_person(patient_id, related_id, relation_code, relation_display):
    related_patient = patient_data[related_id]
    
    return {
        "resourceType": "RelatedPerson",
        "id": f"urn:uuid:{related_id}",
        "patient": {"reference": f"Patient/{patient_id}"},
        "relationship": [{
            "coding": [{
                "system": "http://terminology.hl7.org/CodeSystem/v3-RoleCode",
                "code": relation_code,
                "display": relation_display
            }],
            "text": relation_display
        }],
        "name": related_patient.get("name", [{"use": "official", "family": "Unknown"}]),
        "gender": related_patient.get("gender", "unknown"),
        "birthDate": related_patient.get("birthDate", "unknown")
    }

# **Fungsi untuk Membuat FamilyMemberHistory**
def create_family_member_history(patient_id, related_id, relation_code, relation_display):
    inherited_conditions = []
    
    for disease, probability in disease_probabilities.items():
        if random.random() <= probability:
            inherited_conditions.append(disease)
    
    if not inherited_conditions:
        return None, None

    family_history = {
        "resourceType": "FamilyMemberHistory",
        "id": f"family-{related_id}-",
        "patient": {"reference": f"Patient/{patient_id}"},
        "relationship": {
            "coding": [{
                "system": "http://terminology.hl7.org/CodeSystem/v3-RoleCode",
                "code": relation_code,
                "display": relation_display
            }]
        },
        "condition": [{
            "code": {
                "coding": [{
                    "system": "http://snomed.info/sct",
                    "code": "22298006",
                    "display": disease
                }],
                "text": disease
            }
        } for disease in inherited_conditions]
    }

    return family_history, inherited_conditions

# **Fungsi untuk Membuat Condition untuk RelatedPerson**
def create_condition_for_related_person(related_id, disease):
    return {
        "resourceType": "Condition",
        "id": f"condition-related-{related_id}-{disease.lower().replace(' ', '-')}",
        "subject": {"reference": f"urn:uuid:{related_id}"},
        "code": {
            "coding": [{
                "system": "http://snomed.info/sct",
                "code": "22298006",
                "display": disease
            }],
            "text": disease
        }
    }



In [3]:
# **Menambahkan `FamilyMemberHistory`, `RelatedPerson`, dan `Condition` ke dataset**
used_relationships = set()

for patient in patients:
    patient_id = patient["id"]
    
    possible_relations = [p for p in patient_ids if p != patient_id and (patient_id, p) not in used_relationships]
    if not possible_relations:
        continue

    family_members = []

    rand_parent = random.random()
    if rand_parent <= 0.7:
        family_members.append(("FTH", "Father"))
        family_members.append(("MTH", "Mother"))
    elif rand_parent <= 0.9:
        family_members.append(random.choice([("FTH", "Father"), ("MTH", "Mother")]))

    rand_sibling = random.random()
    if rand_sibling <= 0.5:
        num_siblings = random.randint(1, 2)
        for _ in range(num_siblings):
            family_members.append(random.choice([("BRO", "Brother"), ("SIS", "Sister")]))

    chosen_family_members = random.sample(possible_relations, min(len(family_members), len(possible_relations)))

    for related_person_id, (relation_code, relation_display) in zip(chosen_family_members, family_members):
        if (related_person_id, patient_id) not in used_relationships:
            used_relationships.add((patient_id, related_person_id))
            used_relationships.add((related_person_id, patient_id))

            related_person = create_related_person(patient_id, related_person_id, relation_code, relation_display)
            family_member_history, inherited_diseases = create_family_member_history(patient_id, related_person_id, relation_code, relation_display)

            resources["RelatedPerson"].append(related_person)
            if family_member_history:
                resources["FamilyMemberHistory"].append(family_member_history)
            if inherited_diseases:
                for disease in inherited_diseases:
                    condition_for_related = create_condition_for_related_person(related_person_id, disease)
                    print(patient_id, related_person_id, disease)
                    resources["Condition"].append(condition_for_related)

print(f"\n✅ `RelatedPerson` ditambahkan: {len(resources['RelatedPerson'])}")
print(f"✅ `FamilyMemberHistory` ditambahkan: {len(resources['FamilyMemberHistory'])}")
print(f"✅ `Condition` untuk RelatedPerson ditambahkan: {len(resources['Condition'])}")



7da148be-b73e-73e3-ed5c-67d7c712a253 3a644dcd-672c-9579-cdeb-65ce6783da97 Asthma
7da148be-b73e-73e3-ed5c-67d7c712a253 8463087b-be64-1139-b779-97d09881e034 Hypertension
7da148be-b73e-73e3-ed5c-67d7c712a253 8463087b-be64-1139-b779-97d09881e034 Heart Disease
d4f1d88b-aecc-493e-2977-44a72e0de2d9 00a4d481-551d-9741-dd8f-fa88fe29ab79 Hypertension
d4f1d88b-aecc-493e-2977-44a72e0de2d9 8c97920a-fc41-8150-f54e-9dcfc1f48fef Diabetes
d4f1d88b-aecc-493e-2977-44a72e0de2d9 8c97920a-fc41-8150-f54e-9dcfc1f48fef Hypertension
9f7675c1-1f29-10ac-92e5-8aaf367f05c3 2b27a9c6-3b32-83fe-c4eb-ff271de3536b Cancer
9f7675c1-1f29-10ac-92e5-8aaf367f05c3 6afaf446-c5f5-8967-0c74-9174ec37994d Diabetes
9f7675c1-1f29-10ac-92e5-8aaf367f05c3 921dde19-cf57-df8d-5079-556b79c1c12b Asthma
839e461d-9a4d-a110-1fe9-97bd16378bfd 24b71f9a-cda7-8c08-2df2-4f9c9ae5db55 Heart Disease
839e461d-9a4d-a110-1fe9-97bd16378bfd 9f2b7772-a77d-9323-806a-e15deeb08d98 Cancer
c498075d-c7cc-69ba-23c3-0e6a6c188592 be222f9e-05e3-7c64-349b-02949d6222c7

In [4]:
resources["Condition"]

[{'resourceType': 'Condition',
  'id': 'ded1426d-62e2-77ad-0c8b-5b34075c89a9',
  'meta': {'profile': ['http://hl7.org/fhir/us/core/StructureDefinition/us-core-condition-encounter-diagnosis']},
  'clinicalStatus': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/condition-clinical',
     'code': 'resolved'}]},
  'verificationStatus': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/condition-ver-status',
     'code': 'confirmed'}]},
  'category': [{'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/condition-category',
      'code': 'encounter-diagnosis',
      'display': 'Encounter Diagnosis'}]}],
  'code': {'coding': [{'system': 'http://snomed.info/sct',
     'code': '314529007',
     'display': 'Medication review due (situation)'}],
   'text': 'Medication review due (situation)'},
  'subject': {'reference': 'urn:uuid:7da148be-b73e-73e3-ed5c-67d7c712a253'},
  'encounter': {'reference': 'urn:uuid:879a2d85-2d6a-6d2b-c3e0-8ab3527f2f47'},
  'onsetDateTi

In [5]:
# **Simpan Semua Resource ke File JSON Terpisah**
for resource_type, resource_list in resources.items():
    output_path = os.path.join(output_folder, f"{resource_type}.json")
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(resource_list, f, indent=4)
    print(f"✅ Data {resource_type} disimpan di {output_path}")

✅ Data Patient disimpan di synthea/output/processed/Patient.json
✅ Data Encounter disimpan di synthea/output/processed/Encounter.json
✅ Data Condition disimpan di synthea/output/processed/Condition.json
✅ Data DiagnosticReport disimpan di synthea/output/processed/DiagnosticReport.json
✅ Data DocumentReference disimpan di synthea/output/processed/DocumentReference.json
✅ Data Claim disimpan di synthea/output/processed/Claim.json
✅ Data ExplanationOfBenefit disimpan di synthea/output/processed/ExplanationOfBenefit.json
✅ Data Observation disimpan di synthea/output/processed/Observation.json
✅ Data Immunization disimpan di synthea/output/processed/Immunization.json
✅ Data Procedure disimpan di synthea/output/processed/Procedure.json
✅ Data SupplyDelivery disimpan di synthea/output/processed/SupplyDelivery.json
✅ Data MedicationRequest disimpan di synthea/output/processed/MedicationRequest.json
✅ Data CareTeam disimpan di synthea/output/processed/CareTeam.json
✅ Data CarePlan disimpan di s