In [0]:
# 99_move_new_patient_encounter_files_to_raw.ipynb
# Move newly uploaded Patients and Encounters test files into their raw folders for Auto Loader.
# Matches any file that begins with 'patients_part_' or 'encounters_part_'.
# Files that are already present in the destination folder will be skipped.

# 1. Upload Patient and Encounter test files to DBFS.

UPLOADS_DIR = "dbfs:/FileStore/tables/"
RAW_PATIENTS_DIR = "dbfs:/kardia/raw/patients/"
RAW_ENCOUNTERS_DIR  = "dbfs:/kardia/raw/encounters/"

# 2. Define the mapping from filename prefix to destination raw folder
PREFIX_MAP = {
    "patients_part_": RAW_PATIENTS_DIR,
    "encounters_part_": RAW_ENCOUNTERS_DIR,
}

def _exists(dir_path: str, fname: str) -> bool:
    """Check if a file with the given name already exists in the destination directory."""
    return any(obj.name == fname for obj in dbutils.fs.ls(dir_path))

# 3. For each uploaded file, check its prefix and move it to the appropriate raw folder.
for obj in dbutils.fs.ls(UPLOADS_DIR):
    dest_dir = next((path for prefix, path in PREFIX_MAP.items()
                     if obj.name.startswith(prefix)), None)

    if not dest_dir:
        continue # This file doesn't match any expected prefix

    dest_path = dest_dir + obj.name
    if _exists(dest_dir, obj.name):
        print(f"Skipped (already exists): {obj.name}")
    else:
        dbutils.fs.cp(obj.path, dest_path)
        print(f"Moved: {obj.name} to {dest_path}")