In [0]:
# 99_bootstrap_raw_patients_encounters.ipynb
# Perform one-time setup for Patients and Encounters data.
# Creates the raw input folders and moves in the initial test files.

# 1. Manually upload the test files `patients_part_1.csv` and `encounters_part.avro` to DBFS.

UPLOADS_DIR = "dbfs:/FileStore/tables/"
RAW_PATIENTS_DIR = "dbfs:/kardia/raw/patients/"
RAW_ENCOUNTERS_DIR = "dbfs:/kardia/raw/encounters/"

# Step 1 – Create destination folders if missing
for dir_path in (RAW_PATIENTS_DIR, RAW_ENCOUNTERS_DIR):
    dbutils.fs.mkdirs(dir_path)

# Step 2 – Define test files and destinations
INITIAL_TEST_FILES = [
    ("patients_part_1.csv", RAW_PATIENTS_DIR),
    ("encounters_part_1.avro", RAW_ENCOUNTERS_DIR),
]

# Step 3 – Copy files only if they don't already exist in destination
for fname, dest_dir in INITIAL_TEST_FILES:
    src_path = UPLOADS_DIR + fname
    dst_path = dest_dir + fname

    # List existing files in destination folder
    existing = [f.name for f in dbutils.fs.ls(dest_dir)]

    if fname in existing:
        print(f"Skipped (already exists): {dst_path}")
    else:
        try:
            dbutils.fs.cp(src_path, dst_path)
            print(f"Bootstrapped: {fname} to {dst_path}")
        except Exception as e:
            print(f"Failed to copy {fname}: {e}")