In [0]:
# Databricks Notebook: setup_kardia_autoloader_env.ipynb
# -------------------------------------------------------
# Manual setup script — not part of the automated Bronze–Silver–Gold pipeline.
#
# Creates required DBFS folders for Auto Loader:
# - Raw landing zones: /kardia/raw/patients/, /kardia/raw/encounters/
# - Schema tracking:   /kardia/_schemas/
# - Stream checkpoints: /kardia/_checkpoints/
#
# Copies small test files from the Git-backed Repos directory into DBFS.
# Optionally moves uploaded files (e.g., from /FileStore) into raw folders.
#
# Run after deploying a new workspace or when testing ingestion locally.

# 1. Create raw landing folders
dbutils.fs.mkdirs("dbfs:/kardia/raw/patients/")
dbutils.fs.mkdirs("dbfs:/kardia/raw/encounters/")

# 2. Copy test files from your Repos-mapped local Git repo into DBFS
repo_base = "file:/Workspace/Users/matthew.databrickslab2@outlook.com/kardiaflow/data/raw"

dbutils.fs.cp(f"{repo_base}/ehr/patients_10.csv",   "dbfs:/kardia/raw/patients/", recurse=True)
dbutils.fs.cp(f"{repo_base}/ehr/encounters_10.csv", "dbfs:/kardia/raw/encounters/", recurse=True)

# 3. Create schema + checkpoint roots
dbutils.fs.mkdirs("dbfs:/kardia/_schemas/")
dbutils.fs.mkdirs("dbfs:/kardia/_checkpoints/")

# 4. Confirm file uploads worked
print("Patients test files:")
display(dbutils.fs.ls("dbfs:/kardia/raw/patients/"))

print("Encounters test files:")
display(dbutils.fs.ls("dbfs:/kardia/raw/encounters/"))


In [0]:
# Step 1: Upload a new file via UI (e.g., patients_11.csv)

# Step 2: Copy it into the Auto Loader landing folder
dbutils.fs.cp("dbfs:/FileStore/tables/patients_more_10.csv", "dbfs:/kardia/raw/patients/", recurse=True)

# Confirm the file is now in the right place
display(dbutils.fs.ls("dbfs:/kardia/raw/patients/"))
