In [None]:
# bootstrap_dir.ipynb
# Creates the five raw-input folders in ADLS
# and drops in the first sample file for each dataset.

from pathlib import Path

# 1  Fixed ADLS path prefix
lake_root = "abfss://lake@kardiaadlsdemo.dfs.core.windows.net"

# 2  Create the raw/source folders (idempotent)
for ds in ("encounters", "claims", "patients", "providers", "feedback"):
    raw_dir = f"{lake_root}/{ds}"
    dbutils.fs.mkdirs(raw_dir)
    print("Ensured", raw_dir)

# 3  Copy one local sample into each raw folder
repo_root = Path().cwd()
samples = {
    "encounters": ("data/encounters/encounters_part_1.avro",  "avro"),
    "claims":     ("data/claims/claims_part_1.parquet",       "parquet"),
    "feedback":   ("data/feedback/feedback_part_1.jsonl",     "json"),
    "patients":   ("data/patients/patients_part_1.csv",       "csv"),
    "providers":  ("data/providers/providers_part_1.tsv",     "tsv"),
}

for ds, (rel_path, fmt) in samples.items():
    src_file   = repo_root / rel_path
    target_dir = f"{lake_root}/{ds}"

    if not src_file.exists():
        print(f"SKIP {ds}: {src_file} not found")
        continue

    print(f"→ {ds}: loading {src_file.name} and writing to {target_dir}")

    if fmt == "parquet":
        (spark.read.parquet(str(src_file))
              .write.mode("overwrite").parquet(target_dir))

    elif fmt == "avro":
        (spark.read.format("avro").load(str(src_file))
              .write.mode("overwrite").format("avro").save(target_dir))

    elif fmt == "json":
        (spark.read.option("multiLine", False).json(str(src_file))
              .write.mode("overwrite").json(target_dir))

    elif fmt == "csv":
        (spark.read.option("header", True).csv(str(src_file))
              .write.mode("overwrite").option("header", True).csv(target_dir))

    elif fmt == "tsv":
        (spark.read.option("header", True).option("sep", "\t").csv(str(src_file))
              .write.mode("overwrite").option("header", True).option("sep", "\t").csv(target_dir))

print("\nBootstrap complete – raw folders ready for Auto Loader.")