In [0]:
# 99_bootstrap_raw_dirs_and_files.ipynb
# One-time setup script for a fresh Databricks workspace.
# Run immediately after starting the cluster (no init script yet).
#
# - Verifies presence of test files
# - Copies claims_10.avro into the raw ingestion directory
# - Saves Postgres password to DBFS for use in init script and JDBC

RAW_CLAIMS             = "dbfs:/kardia/raw/claims/"
UPLOADS                = "dbfs:/FileStore/tables/"
UPLOADS_CLAIMS_AVRO    = f"{UPLOADS}claims_10.avro"
UPLOADS_PROVIDERS_CSV  = f"{UPLOADS}providers_10.csv"
DBFS_SECRET_DIR        = "dbfs:/secrets"
DBFS_PW_FILE           = f"{DBFS_SECRET_DIR}/pg_pw"
DEMO_PASSWORD          = "demo123"

# 1. Copy claims Avro into raw folder
dbutils.fs.mkdirs(RAW_CLAIMS)
dbutils.fs.cp(UPLOADS_CLAIMS_AVRO, f"{RAW_CLAIMS}claims_10.avro", recurse=True)
print(f"Copied: {UPLOADS_CLAIMS_AVRO} → {RAW_CLAIMS}claims_10.avro")

# 2. Verify providers CSV exists
if not any(f.name == "providers_10.csv" for f in dbutils.fs.ls(UPLOADS)):
    raise FileNotFoundError("providers_10.csv not found in /FileStore/tables/")
print(f"providers_10.csv found at {UPLOADS_PROVIDERS_CSV}")

# 3. Save password to DBFS secret file
dbutils.fs.mkdirs(DBFS_SECRET_DIR)
dbutils.fs.put(DBFS_PW_FILE, DEMO_PASSWORD, overwrite=True)
print(f"Password saved to: {DBFS_PW_FILE}")
