In [0]:
%run ./02_autoloader_ingestion

In [0]:
%run ./04_custom_ingestion

In [0]:
# Databricks notebook source
# MAGIC %run "./02_autoloader_ingestion"
'''
log("üöÄ Starting Retail Org Auto Loader ingestion pipeline...")

# Step 1: Identify valid and unsupported folders/files
supported_sources, unsupported_sources = list_source_items(SOURCE_BASE)
log(f"Found {len(supported_sources)} supported and {len(unsupported_sources)} unsupported sources.")

# Step 2: Ingest supported datasets
for src in supported_sources:
    folder_name = src.rstrip("/").split("/")[-1]
    tgt = f"{VOLUME_BASE}{folder_name}_autoloader/"
    log(f"Ingesting {folder_name} -> {tgt}")
    ingest_with_autoloader(src, tgt)

# Step 3: Copy unsupported (non-data) files for archival
for src in unsupported_sources:
    folder_name = src.rstrip("/").split("/")[-1]
    tgt = f"{VOLUME_BASE}{folder_name}_raw_copy/"
    log(f"Copying unsupported files from {src} -> {tgt}")
    try:
        dbutils.fs.cp(src, tgt, recurse=True)
        log(f"‚úÖ Copied {src} successfully.")
    except Exception as e:
        log(f"‚ùå Failed to copy {src}: {e}")

log("üéâ All ingestion and copies completed.")'''


In [0]:
display(dbutils.fs.ls("dbfs:/databricks-datasets/retail-org/sales_orders/"))

In [0]:
# Databricks notebook source
# MAGIC %run "./02_autoloader_ingestion"
# MAGIC %run "./04_custom_ingestion"  # üëà Added this line to include custom ingestion logic

# COMMAND ----------

log("üöÄ Starting Retail Org ingestion pipeline (Auto Loader + Custom PySpark)...")

# Step 1Ô∏è‚É£: Identify valid and unsupported folders/files
supported_sources, unsupported_sources = list_source_items(SOURCE_BASE)
log(f"Found {len(supported_sources)} supported and {len(unsupported_sources)} unsupported sources.")

# Step 2Ô∏è‚É£: AUTOLOADER INGESTION
for src in supported_sources:
    folder_name = src.rstrip("/").split("/")[-1]
    tgt = f"{VOLUME_BASE}{folder_name}_autoloader/"
    log(f"‚ö° Auto Loader Ingesting: {folder_name} -> {tgt}")
    try:
        ingest_with_autoloader(src, tgt)
    except Exception as e:
        log(f"‚ùå Auto Loader failed for {src}: {e}")

# Step 3Ô∏è‚É£: CUSTOM INGESTION (manual PySpark logic)
# You can specify a few folders you want to test custom ingestion for
custom_folders = [
    "customers", 
    "products",
    "suppliers"
]

for folder_name in custom_folders:
    src = f"{SOURCE_BASE}{folder_name}/"
    tgt = f"{VOLUME_BASE}{folder_name}_custom_ingest/"
    log(f"üî• Custom PySpark ingestion for {folder_name} -> {tgt}")
    try:
        custom_ingest(src, tgt, load_type="full")  # You can change to incremental if needed
    except Exception as e:
        log(f"‚ùå Custom ingestion failed for {src}: {e}")

# Step 4Ô∏è‚É£: Copy unsupported (non-data) files for archival
for src in unsupported_sources:
    folder_name = src.rstrip("/").split("/")[-1]
    tgt = f"{VOLUME_BASE}{folder_name}_raw_copy/"
    log(f"üìÇ Copying unsupported files from {src} -> {tgt}")
    try:
        dbutils.fs.cp(src, tgt, recurse=True)
        log(f"‚úÖ Copied {src} successfully.")
    except Exception as e:
        log(f"‚ùå Failed to copy {src}: {e}")

log("üéâ All Auto Loader and Custom Ingestion tasks completed successfully.")
