# Lakeflow Jobs Meta - Orchestrator Example

This notebook demonstrates how to use the Lakeflow Jobs Meta framework to create and manage metadata-driven Databricks Lakeflow Jobs.

## ⚠️ Before Running
**Update the configuration below to match your environment:**
- `DEFAULT_CONTROL_TABLE`: Your catalog and schema
- `DEFAULT_YAML_PATH`: Path to your YAML file
- `DEFAULT_QUERIES_PATH`: Path for SQL queries
- Update paths in your YAML metadata file

## Features
- Supports multiple task types: Notebook, SQL Query, SQL File
- Dynamic job generation from metadata
- Job lifecycle management (create/update/track)
- Execution order and dependency management
- Optional continuous monitoring


In [None]:
%pip install --upgrade databricks-sdk

In [None]:
dbutils.library.restartPython()

In [0]:
# Import framework modules
import sys
import os
import logging

# Set logging level to INFO for everything (keeps noise down)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

# Set DEBUG only for lakeflow_jobs_meta.orchestrator
logging.getLogger('lakeflow_jobs_meta.orchestrator').setLevel(logging.DEBUG)

# Dynamically detect project root from notebook location
try:
    # Get notebook path from Databricks context
    notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
    # Extract project root directory (go up from examples/)
    project_root = os.path.dirname(os.path.dirname(notebook_path))
    sys.path.insert(0, project_root)
    logger_tmp = logging.getLogger(__name__)
    logger_tmp.info(f"✅ Added project root to path: {project_root}")
except Exception as e:
    # Fallback: Try current directory
    current_dir = os.path.abspath('.')
    if os.path.exists(os.path.join(current_dir, 'lakeflow_jobs_meta')):
        sys.path.insert(0, current_dir)
    else:
        # If package is installed, this is fine
        pass

# Import framework
from lakeflow_jobs_meta import JobOrchestrator, MetadataMonitor
from lakeflow_jobs_meta.constants import SUPPORTED_TASK_TYPES

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

logger.info(f"Supported task types: {', '.join(SUPPORTED_TASK_TYPES)}")

In [None]:
DEFAULT_CONTROL_TABLE = "fe_ppark_demo.lakeflow_jobs_metadata.jobs_metadata_control_table"
DEFAULT_YAML_PATH = "/Workspace/Users/peter.park@databricks.com/lakeflow_jobs_meta/examples/metadata_examples.yaml"
# Or use a folder path (loads all YAML files):
# DEFAULT_YAML_PATH = "/Workspace/Users/peter.park@databricks.com/lakeflow_jobs_meta/examples/"
# Or use a volume path:
# DEFAULT_YAML_PATH = "/Volumes/fe_ppark_demo/lakeflow_jobs_metadata/config_folder/"
DEFAULT_QUERIES_PATH = "/Workspace/Users/peter.park@databricks.com/queries"
DEFAULT_WAREHOUSE_ID = "4b9b953939869799"

# Create widgets for configuration
dbutils.widgets.text("control_table", DEFAULT_CONTROL_TABLE, "Control Table")
dbutils.widgets.text("yaml_path", DEFAULT_YAML_PATH, "YAML Path (file, folder, or volume)")
dbutils.widgets.text("default_warehouse_id", DEFAULT_WAREHOUSE_ID, "Default SQL Warehouse ID (optional, for SQL tasks)")
dbutils.widgets.text("default_queries_path", DEFAULT_QUERIES_PATH, "Default Queries Save Path (optional)")

# Get widget values
CONTROL_TABLE = dbutils.widgets.get("control_table")
YAML_PATH = dbutils.widgets.get("yaml_path") or None
DEFAULT_WAREHOUSE_ID = dbutils.widgets.get("default_warehouse_id") or None
DEFAULT_QUERIES_PATH = dbutils.widgets.get("default_queries_path") or None

# Validate required parameter
if not CONTROL_TABLE:
    raise ValueError("control_table widget is required. Please set it in the widget or via base_parameters.")

logger.info(f"Configuration:")
logger.info(f"  Control Table: {CONTROL_TABLE}")
logger.info(f"  YAML Path: {YAML_PATH or 'Not configured (will process all jobs in control table)'}")
if YAML_PATH:
    if YAML_PATH.startswith("/Volumes/"):
        logger.info(f"    Path Type: Unity Catalog Volume")
        logger.info(f"    Behavior: Load from volume and process ONLY those jobs")
    elif YAML_PATH.endswith((".yaml", ".yml")):
        logger.info(f"    Path Type: YAML File")
        logger.info(f"    Behavior: Load from YAML file and process ONLY those jobs")
    else:
        logger.info(f"    Path Type: Folder (will load all YAML files)")
        logger.info(f"    Behavior: Load all YAML files from folder and process ONLY those jobs")
else:
    logger.info(f"    Behavior: Process ALL jobs in control table")
logger.info(f"  Default Warehouse ID: {DEFAULT_WAREHOUSE_ID or 'Not configured (SQL tasks must specify warehouse_id)'}")
logger.info(f"  Default Queries Path: {DEFAULT_QUERIES_PATH or 'Not configured'}")

In [None]:

# Initialize JobOrchestrator
orchestrator = JobOrchestrator(
    control_table=CONTROL_TABLE,
    default_warehouse_id=DEFAULT_WAREHOUSE_ID,
    default_queries_path=DEFAULT_QUERIES_PATH
)

In [None]:
# Create and update jobs
# If yaml_path is provided: Loads metadata and processes ONLY those jobs
# If yaml_path is NOT provided: Processes ALL jobs in control table
# Supports YAML files, folders (recursive), and Unity Catalog volumes

jobs = orchestrator.create_or_update_jobs(
    yaml_path=YAML_PATH,  # Automatically detects YAML file, folder, or volume
    default_pause_status=False  # False = new manual jobs auto-run, scheduled jobs active; True = no auto-run, scheduled jobs paused
)

logger.info(f"✅ Managed {len(jobs)} jobs successfully")

In [0]:
# Uncomment to enable continuous monitoring
# This will check for metadata changes every 60 seconds and auto-update jobs
# Note: For volume_path, use a Unity Catalog volume path (e.g., "/Volumes/catalog/schema/volume")

# volume_path = YAML_PATH if YAML_PATH and YAML_PATH.startswith("/Volumes/") else None
# if volume_path:
#     monitor = MetadataMonitor(
#         control_table=CONTROL_TABLE,
#         check_interval_seconds=60,
#         volume_path=volume_path,  # Watch Unity Catalog volume for YAML files
#         auto_update_jobs=True
#     )
#     monitor.run_continuous(max_iterations=None)  # None = run indefinitely
# else:
#     logger.warning("Volume path not configured. Monitoring requires a Unity Catalog volume path.")