# Lakeflow Job Meta - Orchestrator Example

This notebook demonstrates how to use the Lakeflow Job Meta framework to create and manage metadata-driven Databricks Lakeflow Jobs.

## Features
- Supports multiple task types: Notebook, SQL Query, SQL File
- Dynamic job generation from metadata
- Job lifecycle management (create/update/track)
- Execution order and dependency management
- Optional continuous monitoring


In [0]:
# Import framework modules
import sys
import os
import logging

# Dynamically detect project root from notebook location
try:
    # Get notebook path from Databricks context
    notebook_path = dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()
    # Extract project root directory (go up from examples/)
    project_root = os.path.dirname(os.path.dirname(notebook_path))
    sys.path.insert(0, project_root)
    logger_tmp = logging.getLogger(__name__)
    logger_tmp.info(f"✅ Added project root to path: {project_root}")
except Exception as e:
    # Fallback: Try current directory
    current_dir = os.path.abspath('.')
    if os.path.exists(os.path.join(current_dir, 'lakeflow_job_meta')):
        sys.path.insert(0, current_dir)
    else:
        # If package is installed, this is fine
        pass

# Import framework
from lakeflow_job_meta import JobOrchestrator, MetadataManager, MetadataMonitor
from lakeflow_job_meta.constants import SUPPORTED_TASK_TYPES

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

logger.info(f"Supported task types: {', '.join(SUPPORTED_TASK_TYPES)}")

# MAGIC %md
# ## Configuration Widgets
# 
# Configure parameters using Databricks widgets. These can also be set via base_parameters when running as a job.

# COMMAND ----------

# Create widgets for configuration
dbutils.widgets.text("control_table", "", "Control Table (required)")
dbutils.widgets.text("yaml_path", "", "YAML Path (optional)")
dbutils.widgets.text("volume_path", "", "Volume Path (optional)")
dbutils.widgets.text("sync_yaml", "false", "Sync YAML (true/false)")

# Get widget values
CONTROL_TABLE = dbutils.widgets.get("control_table")
YAML_PATH = dbutils.widgets.get("yaml_path") or None
VOLUME_PATH = dbutils.widgets.get("volume_path") or None
SYNC_YAML = dbutils.widgets.get("sync_yaml").lower() == "true"

# Validate required parameter
if not CONTROL_TABLE:
    raise ValueError("control_table widget is required. Please set it in the widget or via base_parameters.")

logger.info(f"Configuration:")
logger.info(f"  Control Table: {CONTROL_TABLE}")
logger.info(f"  YAML Path: {YAML_PATH or 'Not configured'}")
logger.info(f"  Volume Path: {VOLUME_PATH or 'Not configured'}")
logger.info(f"  Sync YAML: {SYNC_YAML}")

# Initialize MetadataManager and JobOrchestrator
metadata_manager = MetadataManager(CONTROL_TABLE)
orchestrator = JobOrchestrator(CONTROL_TABLE)

# Ensure control table exists
metadata_manager.ensure_exists()

# Optionally load YAML if file exists and SYNC_YAML is enabled
if SYNC_YAML and YAML_PATH and os.path.exists(YAML_PATH):
    try:
        sources_loaded = metadata_manager.load_yaml(YAML_PATH)
        logger.info(f"✅ Loaded {sources_loaded} sources from YAML: {YAML_PATH}")
    except Exception as e:
        logger.warning(f"⚠️ Failed to load YAML: {str(e)}. Using existing table data.")
elif SYNC_YAML and YAML_PATH and not os.path.exists(YAML_PATH):
    logger.info(f"ℹ️ YAML file not found at {YAML_PATH}. Using existing table data.")

In [0]:
# Execute the orchestrator
# This will read from the control table and create/update jobs for all modules

jobs = orchestrator.run_all_modules(
    auto_run=True,  # Automatically run jobs after creation
    yaml_path=YAML_PATH if SYNC_YAML else None,
    sync_yaml=False  # Already synced above if needed
)

logger.info(f"✅ Managed {len(jobs)} jobs successfully")


## Optional: Continuous Monitoring Mode

If you want to continuously monitor for metadata changes, uncomment and run the cell below.

This will:
- Watch the control table for changes
- Optionally sync YAML files from a Unity Catalog volume
- Automatically update jobs when metadata changes


In [None]:
# Uncomment to enable continuous monitoring
# This will check for metadata changes every 60 seconds and auto-update jobs
# The CONTROL_TABLE and VOLUME_PATH are taken from widgets configured above

# monitor = MetadataMonitor(
#     control_table=CONTROL_TABLE,
#     check_interval_seconds=60,
#     volume_path=VOLUME_PATH,  # Optional: watch Unity Catalog volume for YAML files
#     auto_update_jobs=True
# )
# monitor.run_continuous(max_iterations=None)  # None = run indefinitely
