In [None]:
# raw_file_ops/move_new_files.ipynb
# 1. Install helpers once per cluster/session (Option A)
%pip install -q git+https://github.com/okv627/KardiaFlow@main#subdirectory=src

from kflow.config import raw_path, adls_raw_path
from kflow.display_utils import banner
from kflow.adls import set_sas

# ADLS auth
ADLS_ACCOUNT = "kardiaadlsdemo"
sas_token    = dbutils.secrets.get("kardia","adls_raw_sas")
set_sas(ADLS_ACCOUNT, sas_token)

In [None]:
# 3. Config
UPLOADS_DIR = "dbfs:/FileStore/tables/"

# prefix : (destination_dir, allowed_suffixes)
PREFIX_MAP = {
    "patients_part_":   (raw_path("patients"),    (".csv",)),
    "encounters_part_": (raw_path("encounters"),  (".avro",)),
    "claims_part_":     (raw_path("claims"),      (".parquet",)),
    "providers_part_":  (adls_raw_path("providers"), (".tsv", ".avro")),
    "feedback_part_":   (adls_raw_path("feedback"),  (".jsonl",)),
}

In [None]:
# 4. Helpers
def _exists(dir_path: str, fname: str) -> bool:
    try:
        return fname in [f.name for f in dbutils.fs.ls(dir_path)]
    except Exception:
        return False

In [None]:
# 5. Execution
moved = skipped = 0
for obj in dbutils.fs.ls(UPLOADS_DIR):
    fname = obj.name
    target_dir = None
    for prefix, (dest, exts) in PREFIX_MAP.items():
        if fname.startswith(prefix) and fname.lower().endswith(exts):
            target_dir = dest
            break
    if not target_dir:
        continue
    if _exists(target_dir, fname):
        skipped += 1
        print(f"Skipped (already exists): {fname}")
    else:
        dbutils.fs.cp(obj.path, target_dir + fname)
        moved += 1
        print(f"Moved: {fname} to {target_dir}")

banner(f"Move complete. Moved: {moved}, skipped: {skipped}", ok=True)