In [0]:
%restart_python

In [0]:
%pip install faker
from faker import Faker
import json
import uuid
import time
import os

fake = Faker()
landing_path = "/Volumes/healthcare_catalog/raw_data/landing_zone/"
os.makedirs(landing_path, exist_ok=True)

def generate_healthcare_bundle():
    # 1. Generating Patient Data
    patient_id = str(uuid.uuid4())[:8].upper()
    patient = {
        "patient_id": patient_id,
        "name": fake.name(),
        "dob": str(fake.date_of_birth(minimum_age = 18, maximum_age = 90)),
        "gender": fake.random_element(["Male", "Female"])
    }

    #2. Encounters - a patient can have 1 to 2 visits
    encounters = []
    for _ in range(fake.random_int(min=1, max=2)):
        encounter_id = str(uuid.uuid4())[:8].upper()
        admission_date = fake.date_this_month()

        encounter = {
            "encounter_id": encounter_id,
            "patient_id": patient_id,
            "admission_date": str(admission_date),
            "encounter_type": fake.random_element(["Inpatient", "Outpatient", "Emergency"]),
            "diagnosis": fake.random_element(["Hypertension", "Diabetes", "Asthma", "COVID-19"])
        }

        procedure = []
        for _ in range(fake.random_int(min=1, max=3)):
            procedure.append({
                "procedure_id" : str(uuid.uuid4())[:8].upper(),
                "encounter_id": encounter_id,
                "procedure_code": f"CPT-{fake.random_int(min=10000, max=99999)}",
                "description": fake.bs().title()
            })

        encounter["procedure"] = procedure
        encounters.append(encounter)

    # Final data
    bundle = {
        "patient": patient,
        "encounters": encounters,
        "metadata": {"source" : "Northtell_Hospital", "event_time" : time.strftime('%Y-%m-%d %H:%M:%S')}
    }

    #Save as JSON
    file_name = f"bundle_{uuid.uuid4().hex[:6]}.json"
    full_path = os.path.join(landing_path, file_name) # Best practice for paths
    
    with open(full_path, "w") as f:
        json.dump(bundle, f)
    
    print(f"âœ… Generated and dropped: {full_path}")

# Run this 5 times to create a batch of 5 files
for _ in range(5):
    generate_healthcare_bundle()

In [0]:
dbutils.fs.ls("/Volumes/healthcare_catalog/raw_data/landing_zone/")