# Week 6 ‚Äî Dataflows, Lineage, Orchestration & Automation

## Learning Objectives
By the end of this session, you will:
- üîç Master Unity Catalog lineage for end-to-end data traceability
- üöÄ Build declarative pipelines using Delta Live Tables
- üìä Implement comprehensive data quality expectations
- üîÑ Design multi-task workflows with Databricks Jobs
- üåê Establish Git-based development and deployment workflows
- ‚úÖ Apply observability best practices for production systems

## 1. Unity Catalog Lineage & Data Governance

### Exploring Data Lineage

In [None]:
# Query table-level lineage
lineage_df = spark.sql("""
    SELECT 
        source_table_full_name,
        target_table_full_name,
        source_type,
        target_type,
        created_at
    FROM system.access.table_lineage 
    WHERE target_table_full_name LIKE '%orders%'
    ORDER BY created_at DESC
    LIMIT 10
""")

display(lineage_df)

In [None]:
# Query column-level lineage
column_lineage = spark.sql("""
    SELECT 
        source_table_full_name,
        source_column_name,
        target_table_full_name,
        target_column_name,
        transformation_type
    FROM system.access.column_lineage 
    WHERE target_table_full_name LIKE '%customer%'
    LIMIT 20
""")

display(column_lineage)

### Lineage Analysis Functions

In [None]:
def analyze_table_dependencies(table_name):
    """Analyze upstream and downstream dependencies"""
    
    # Get upstream dependencies
    upstream = spark.sql(f"""
        SELECT source_table_full_name, source_type
        FROM system.access.table_lineage 
        WHERE target_table_full_name = '{table_name}'
    """)
    
    # Get downstream dependencies  
    downstream = spark.sql(f"""
        SELECT target_table_full_name, target_type
        FROM system.access.table_lineage 
        WHERE source_table_full_name = '{table_name}'
    """)
    
    return {
        'upstream_count': upstream.count(),
        'downstream_count': downstream.count(),
        'upstream_tables': [row.source_table_full_name for row in upstream.collect()],
        'downstream_tables': [row.target_table_full_name for row in downstream.collect()]
    }

# Test the function
# dependencies = analyze_table_dependencies('your_catalog.your_schema.your_table')
# print(dependencies)

In [None]:
def perform_impact_analysis(source_table):
    """Perform comprehensive impact analysis"""
    
    impact_query = f"""
    WITH RECURSIVE lineage_tree AS (
        -- Base case: direct dependencies
        SELECT target_table_full_name as table_name, 1 as level
        FROM system.access.table_lineage 
        WHERE source_table_full_name = '{source_table}'
        
        UNION ALL
        
        -- Recursive case: indirect dependencies
        SELECT tl.target_table_full_name, lt.level + 1
        FROM system.access.table_lineage tl
        JOIN lineage_tree lt ON tl.source_table_full_name = lt.table_name
        WHERE lt.level < 10  -- Prevent infinite recursion
    )
    SELECT table_name, level, COUNT(*) as impact_count
    FROM lineage_tree 
    GROUP BY table_name, level
    ORDER BY level, table_name
    """
    
    return spark.sql(impact_query)

# Test impact analysis
# impact_result = perform_impact_analysis('your_catalog.your_schema.source_table')
# display(impact_result)

### Data Classification and Governance

In [None]:
# Apply data classification tags (example - adjust table name)
# spark.sql("""
#     ALTER TABLE your_catalog.your_schema.customer_table 
#     SET TAGS ('classification' = 'PII', 'retention' = '7_years')
# """)

# Query tables by classification
classified_tables = spark.sql("""
    SELECT table_name, table_catalog, table_schema, table_comment
    FROM system.information_schema.tables 
    WHERE table_comment LIKE '%PII%' OR table_comment LIKE '%sensitive%'
    LIMIT 10
""")

display(classified_tables)

In [None]:
# Monitor data access patterns
access_audit = spark.sql("""
    SELECT 
        user_identity.email,
        request_params.full_name_arg,
        event_time,
        action_name
    FROM system.access.audit 
    WHERE action_name = 'read' 
    AND event_time >= current_date() - INTERVAL 7 DAYS
    ORDER BY event_time DESC
    LIMIT 20
""")

display(access_audit)

## 2. Delta Live Tables (DLT) Pipeline Development

### Creating Sample Data for DLT Demo

In [None]:
# Create sample bronze data for DLT demonstration
from pyspark.sql.functions import *
from pyspark.sql.types import *
import random
from datetime import datetime, timedelta

# Generate sample orders data
sample_orders = []
for i in range(1000):
    sample_orders.append((
        f"order_{i+1}",
        f"customer_{random.randint(1, 100)}",
        f"product_{random.randint(1, 50)}",
        round(random.uniform(10, 1000), 2),
        random.randint(1, 5),
        (datetime.now() - timedelta(days=random.randint(0, 365))).date(),
        random.choice(["pending", "completed", "cancelled"]),
        datetime.now()
    ))

schema = StructType([
    StructField("order_id", StringType(), False),
    StructField("customer_id", StringType(), False),
    StructField("product_id", StringType(), True),
    StructField("amount", DoubleType(), False),
    StructField("quantity", IntegerType(), False),
    StructField("order_date", DateType(), False),
    StructField("status", StringType(), False),
    StructField("created_at", TimestampType(), False)
])

bronze_orders_df = spark.createDataFrame(sample_orders, schema)

# Save as bronze table
bronze_orders_df.write.format("delta").mode("overwrite").saveAsTable("bronze.sample_orders")

print(f"Created bronze.sample_orders with {bronze_orders_df.count()} records")
display(bronze_orders_df.limit(10))

### DLT Pipeline SQL Examples

**Note**: The following SQL would be used in a DLT pipeline notebook:

In [None]:
# DLT SQL Examples (for reference - would be in DLT pipeline)
dlt_bronze_sql = """
-- Bronze Layer
CREATE OR REFRESH STREAMING LIVE TABLE bronze_orders
COMMENT "Raw order data from source systems"
AS SELECT 
    *,
    current_timestamp() as ingestion_time
FROM STREAM(bronze.sample_orders)
"""

dlt_silver_sql = """
-- Silver Layer with Data Quality
CREATE OR REFRESH LIVE TABLE silver_orders (
    CONSTRAINT valid_order_id EXPECT (order_id IS NOT NULL) ON VIOLATION DROP ROW,
    CONSTRAINT positive_amount EXPECT (amount > 0) ON VIOLATION FAIL UPDATE,
    CONSTRAINT valid_date EXPECT (order_date <= current_date()) ON VIOLATION DROP ROW
)
COMMENT "Cleaned and validated order data"
AS SELECT 
    order_id,
    customer_id,
    CAST(amount AS DECIMAL(10,2)) as amount,
    CAST(order_date AS DATE) as order_date,
    status,
    created_at
FROM LIVE.bronze_orders
WHERE order_id IS NOT NULL
"""

dlt_gold_sql = """
-- Gold Layer Business Metrics
CREATE OR REFRESH LIVE TABLE gold_customer_metrics
COMMENT "Customer lifetime value and behavior metrics"
AS SELECT 
    customer_id,
    COUNT(*) as total_orders,
    SUM(amount) as total_revenue,
    AVG(amount) as avg_order_value,
    MIN(order_date) as first_order_date,
    MAX(order_date) as last_order_date,
    DATEDIFF(MAX(order_date), MIN(order_date)) as customer_lifespan_days
FROM LIVE.silver_orders
GROUP BY customer_id
"""

print("DLT SQL Examples:")
print("1. Bronze Layer:", dlt_bronze_sql[:100] + "...")
print("2. Silver Layer:", dlt_silver_sql[:100] + "...")
print("3. Gold Layer:", dlt_gold_sql[:100] + "...")

### Python DLT Implementation Example

In [None]:
# Python DLT Example (for reference - would be in DLT pipeline)
dlt_python_example = """
import dlt
from pyspark.sql.functions import *

@dlt.table(
    comment="Enhanced customer data with derived attributes",
    table_properties={
        "quality": "silver",
        "pipelines.autoOptimize.managed": "true"
    }
)
@dlt.expect_or_fail("valid_email", "email RLIKE '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'")
@dlt.expect_or_drop("future_signup", "signup_date <= current_date()")
def silver_customers():
    return (
        dlt.read("bronze_customers")
        .withColumn("email_domain", regexp_extract("email", "@(.+)", 1))
        .withColumn("customer_age_days", datediff(current_date(), "signup_date"))
        .withColumn("customer_segment", 
            when(col("total_orders") > 10, "High Value")
            .when(col("total_orders") > 5, "Medium Value")
            .otherwise("New Customer")
        )
    )
"""

print("Python DLT Implementation Example:")
print(dlt_python_example)

### Simulating Silver and Gold Layer Processing

In [None]:
# Simulate Silver layer processing with data quality checks
bronze_df = spark.table("bronze.sample_orders")

# Apply data quality transformations
silver_df = bronze_df.filter(
    (col("order_id").isNotNull()) &
    (col("amount") > 0) &
    (col("order_date") <= current_date())
).withColumn(
    "amount_decimal", col("amount").cast("decimal(10,2)")
).select(
    "order_id", "customer_id", "product_id", 
    col("amount_decimal").alias("amount"),
    "quantity", "order_date", "status", "created_at"
)

# Save silver table
silver_df.write.format("delta").mode("overwrite").saveAsTable("silver.orders")

print(f"Silver layer: {silver_df.count()} records (filtered from {bronze_df.count()})")
display(silver_df.limit(10))

In [None]:
# Create Gold layer customer metrics
gold_customer_metrics = silver_df.groupBy("customer_id").agg(
    count("*").alias("total_orders"),
    sum("amount").alias("total_revenue"),
    avg("amount").alias("avg_order_value"),
    min("order_date").alias("first_order_date"),
    max("order_date").alias("last_order_date")
).withColumn(
    "customer_lifespan_days",
    datediff(col("last_order_date"), col("first_order_date"))
).withColumn(
    "customer_segment",
    when(col("total_revenue") > 1000, "High Value")
    .when(col("total_revenue") > 500, "Medium Value")
    .otherwise("Standard")
)

# Save gold table
gold_customer_metrics.write.format("delta").mode("overwrite").saveAsTable("gold.customer_metrics")

print(f"Gold layer: {gold_customer_metrics.count()} customer records")
display(gold_customer_metrics.orderBy(col("total_revenue").desc()).limit(10))

## 3. Databricks Jobs & Workflow Orchestration

### Job Configuration Examples

In [None]:
# Multi-task job configuration example
job_config = {
    "name": "customer_analytics_pipeline",
    "email_notifications": {
        "on_failure": ["data-team@company.com"],
        "on_success": ["stakeholders@company.com"]
    },
    "timeout_seconds": 3600,
    "max_concurrent_runs": 1,
    "tasks": [
        {
            "task_key": "ingest_raw_data",
            "notebook_task": {
                "notebook_path": "/pipelines/01_data_ingestion",
                "base_parameters": {"source_table": "bronze.orders"}
            },
            "new_cluster": {
                "spark_version": "13.3.x-scala2.12",
                "node_type_id": "i3.xlarge",
                "num_workers": 2
            }
        },
        {
            "task_key": "process_silver_layer",
            "depends_on": [{"task_key": "ingest_raw_data"}],
            "notebook_task": {
                "notebook_path": "/pipelines/02_silver_processing"
            }
        },
        {
            "task_key": "create_gold_metrics",
            "depends_on": [{"task_key": "process_silver_layer"}],
            "notebook_task": {
                "notebook_path": "/pipelines/03_gold_aggregations"
            }
        }
    ]
}

print("Job Configuration:")
import json
print(json.dumps(job_config, indent=2))

In [None]:
# Task dependency management
task_dependencies = {
    "bronze_ingestion": [],  # No dependencies
    "silver_customers": ["bronze_ingestion"],
    "silver_orders": ["bronze_ingestion"],
    "gold_customer_metrics": ["silver_customers", "silver_orders"],
    "gold_revenue_analysis": ["silver_orders"],
    "final_reporting": ["gold_customer_metrics", "gold_revenue_analysis"]
}

def create_job_with_dependencies(tasks, dependencies):
    """Create job configuration with proper task dependencies"""
    
    job_tasks = []
    for task_name, deps in dependencies.items():
        task_config = {
            "task_key": task_name,
            "notebook_task": {"notebook_path": f"/pipelines/{task_name}"},
            "depends_on": [{"task_key": dep} for dep in deps]
        }
        job_tasks.append(task_config)
    
    return {"tasks": job_tasks}

# Generate job configuration
dependency_job = create_job_with_dependencies([], task_dependencies)
print("Task Dependencies:")
for task in dependency_job["tasks"]:
    deps = [dep["task_key"] for dep in task["depends_on"]]
    print(f"{task['task_key']}: depends on {deps if deps else 'nothing'}")

### Scheduling and Error Handling

In [None]:
# Scheduling configurations
scheduling_configs = {
    "daily_schedule": {
        "quartz_cron_expression": "0 0 2 * * ?",  # Daily at 2 AM
        "timezone_id": "UTC"
    },
    "business_hours_schedule": {
        "quartz_cron_expression": "0 0 9-17 * * MON-FRI",  # Hourly during business hours
        "timezone_id": "America/New_York"
    },
    "table_trigger": {
        "table_update": {
            "table_names": ["bronze.orders", "bronze.customers"],
            "condition": "ANY"
        }
    }
}

# Error handling configurations
error_handling_configs = {
    "retry_config": {
        "max_retries": 3,
        "min_retry_interval_millis": 60000,  # 1 minute
        "retry_on_timeout": True
    },
    "notification_config": {
        "email_notifications": {
            "on_start": ["team@company.com"],
            "on_success": ["stakeholders@company.com"],
            "on_failure": ["oncall@company.com", "team@company.com"]
        }
    }
}

print("Scheduling Configurations:")
print(json.dumps(scheduling_configs, indent=2))
print("\nError Handling Configurations:")
print(json.dumps(error_handling_configs, indent=2))

## 4. Environment Configuration & Git Integration

### Environment-Specific Configurations

In [None]:
# Environment configuration examples
environment_configs = {
    "dev": {
        "environment": "development",
        "database": {
            "catalog": "dev_catalog",
            "schema": "analytics"
        },
        "storage": {
            "root_table": "bronze.dev_orders"
        },
        "compute": {
            "cluster_size": "small",
            "auto_terminate": True
        },
        "notifications": {
            "enabled": False
        }
    },
    "prod": {
        "environment": "production",
        "database": {
            "catalog": "prod_catalog",
            "schema": "analytics"
        },
        "storage": {
            "root_table": "bronze.orders"
        },
        "compute": {
            "cluster_size": "large",
            "auto_terminate": False
        },
        "notifications": {
            "enabled": True,
            "channels": ["#data-alerts"]
        }
    }
}

def load_environment_config(env_name):
    """Load environment-specific configuration"""
    return environment_configs.get(env_name, environment_configs["dev"])

# Example usage
current_env = "dev"  # This would come from a widget or parameter
config = load_environment_config(current_env)

print(f"Configuration for {current_env} environment:")
print(json.dumps(config, indent=2))

## 5. Observability & Monitoring

### System Tables Monitoring

In [None]:
# Monitor job execution patterns
job_metrics = spark.sql("""
    SELECT 
        job_name,
        DATE(start_time) as execution_date,
        COUNT(*) as total_runs,
        SUM(CASE WHEN result_state = 'SUCCESS' THEN 1 ELSE 0 END) as successful_runs,
        AVG(execution_duration) as avg_duration_seconds,
        MAX(execution_duration) as max_duration_seconds
    FROM system.lakeflow.job_runs 
    WHERE start_time >= current_date() - INTERVAL 7 DAYS
    GROUP BY job_name, DATE(start_time)
    ORDER BY execution_date DESC, job_name
    LIMIT 20
""")

print("Job Execution Metrics:")
display(job_metrics)

In [None]:
# Monitor DLT pipeline health
dlt_metrics = spark.sql("""
    SELECT 
        pipeline_name,
        update_id,
        state,
        start_time,
        end_time,
        (end_time - start_time) as duration_seconds
    FROM system.lakeflow.pipeline_updates 
    WHERE start_time >= current_date() - INTERVAL 24 HOURS
    ORDER BY start_time DESC
    LIMIT 10
""")

print("DLT Pipeline Metrics:")
display(dlt_metrics)

In [None]:
# Track data quality results
quality_trends = spark.sql("""
    SELECT 
        dataset_name,
        expectation_name,
        DATE(timestamp) as check_date,
        COUNT(*) as total_checks,
        SUM(CASE WHEN passed THEN 1 ELSE 0 END) as passed_checks,
        (SUM(CASE WHEN passed THEN 1 ELSE 0 END) * 100.0 / COUNT(*)) as pass_rate
    FROM system.lakeflow.data_quality_results
    WHERE timestamp >= current_date() - INTERVAL 30 DAYS
    GROUP BY dataset_name, expectation_name, DATE(timestamp)
    HAVING pass_rate < 95  -- Alert on low pass rates
    ORDER BY check_date DESC, pass_rate ASC
    LIMIT 10
""")

print("Data Quality Trends (Issues Only):")
display(quality_trends)

### Custom Monitoring Functions

In [None]:
def create_pipeline_health_dashboard():
    """Create comprehensive pipeline health metrics"""
    
    # Job success rates
    job_health = spark.sql("""
        WITH job_stats AS (
            SELECT 
                job_name,
                COUNT(*) as total_runs,
                SUM(CASE WHEN result_state = 'SUCCESS' THEN 1 ELSE 0 END) as success_count
            FROM system.lakeflow.job_runs 
            WHERE start_time >= current_date() - INTERVAL 7 DAYS
            GROUP BY job_name
        )
        SELECT 
            job_name,
            total_runs,
            success_count,
            (success_count * 100.0 / total_runs) as success_rate,
            CASE 
                WHEN (success_count * 100.0 / total_runs) >= 95 THEN 'Healthy'
                WHEN (success_count * 100.0 / total_runs) >= 80 THEN 'Warning'
                ELSE 'Critical'
            END as health_status
        FROM job_stats
        ORDER BY success_rate ASC
    """)
    
    return job_health

# Generate pipeline health report
health_report = create_pipeline_health_dashboard()
print("Pipeline Health Dashboard:")
display(health_report)

In [None]:
def generate_health_alerts(health_df):
    """Generate alerts for pipeline health issues"""
    
    critical_jobs = health_df.filter(col("health_status") == "Critical")
    warning_jobs = health_df.filter(col("health_status") == "Warning")
    
    alerts = []
    
    for job in critical_jobs.collect():
        alerts.append({
            "severity": "CRITICAL",
            "message": f"Job {job.job_name} has {job.success_rate:.1f}% success rate",
            "job_name": job.job_name,
            "success_rate": job.success_rate
        })
    
    for job in warning_jobs.collect():
        alerts.append({
            "severity": "WARNING",
            "message": f"Job {job.job_name} has {job.success_rate:.1f}% success rate",
            "job_name": job.job_name,
            "success_rate": job.success_rate
        })
    
    return alerts

# Generate alerts
alerts = generate_health_alerts(health_report)
print(f"Generated {len(alerts)} alerts:")
for alert in alerts:
    print(f"[{alert['severity']}] {alert['message']}")

## 6. Troubleshooting & Debugging

### Common Issue Diagnosis

In [None]:
def diagnose_lineage_issues(table_name):
    """Diagnose common lineage tracking problems"""
    
    # Check if table exists in Unity Catalog
    table_exists = spark.sql(f"""
        SELECT COUNT(*) as count 
        FROM system.information_schema.tables 
        WHERE table_name = '{table_name}'
    """).collect()[0].count > 0
    
    if not table_exists:
        return "Table not registered in Unity Catalog"
    
    # Check for recent lineage updates
    recent_lineage = spark.sql(f"""
        SELECT MAX(created_at) as last_update
        FROM system.access.table_lineage 
        WHERE target_table_full_name LIKE '%{table_name}%'
    """).collect()[0].last_update
    
    if recent_lineage is None:
        return "No lineage information found - check if operations use Unity Catalog"
    
    return "Lineage tracking appears normal"

# Test lineage diagnosis
diagnosis = diagnose_lineage_issues("orders")
print(f"Lineage diagnosis: {diagnosis}")

In [None]:
def debug_dlt_pipeline_failures(pipeline_id):
    """Debug common DLT pipeline issues"""
    
    # Get recent pipeline events
    pipeline_events = spark.sql(f"""
        SELECT event_type, message, timestamp, details
        FROM system.lakeflow.pipeline_events 
        WHERE pipeline_id = '{pipeline_id}'
        AND timestamp >= current_timestamp() - INTERVAL 24 HOURS
        ORDER BY timestamp DESC
        LIMIT 100
    """)
    
    # Analyze error patterns
    error_summary = spark.sql(f"""
        SELECT 
            event_type,
            COUNT(*) as error_count,
            COLLECT_LIST(message) as error_messages
        FROM system.lakeflow.pipeline_events 
        WHERE pipeline_id = '{pipeline_id}'
        AND event_type LIKE '%ERROR%'
        AND timestamp >= current_timestamp() - INTERVAL 24 HOURS
        GROUP BY event_type
        ORDER BY error_count DESC
    """)
    
    return pipeline_events, error_summary

# Example usage (would need actual pipeline_id)
print("DLT debugging function ready - use with actual pipeline_id")

In [None]:
def troubleshoot_job_failures(job_name, hours_back=24):
    """Analyze job failure patterns and root causes"""
    
    failure_analysis = spark.sql(f"""
        SELECT 
            run_id,
            start_time,
            end_time,
            result_state,
            error_message
        FROM system.lakeflow.job_runs 
        WHERE job_name = '{job_name}'
        AND start_time >= current_timestamp() - INTERVAL {hours_back} HOURS
        AND result_state IN ('FAILED', 'TIMEOUT', 'CANCELLED')
        ORDER BY start_time DESC
    """)
    
    # Common failure patterns
    failure_patterns = spark.sql(f"""
        SELECT 
            CASE 
                WHEN error_message LIKE '%OutOfMemoryError%' THEN 'Memory Issues'
                WHEN error_message LIKE '%timeout%' THEN 'Timeout Issues'
                WHEN error_message LIKE '%TableNotFoundException%' THEN 'Table Availability'
                WHEN error_message LIKE '%AnalysisException%' THEN 'Schema Issues'
                ELSE 'Other'
            END as failure_category,
            COUNT(*) as occurrence_count,
            COLLECT_LIST(DISTINCT error_message) as sample_errors
        FROM system.lakeflow.job_runs 
        WHERE job_name = '{job_name}'
        AND start_time >= current_timestamp() - INTERVAL {hours_back} HOURS
        AND result_state = 'FAILED'
        GROUP BY failure_category
        ORDER BY occurrence_count DESC
    """)
    
    return failure_analysis, failure_patterns

# Example usage (would need actual job_name)
print("Job troubleshooting function ready - use with actual job_name")

## 7. Summary & Best Practices

### Key Takeaways

In [None]:
# Summary of what we've covered
summary = {
    "Unity Catalog Lineage": {
        "capabilities": [
            "Table-level and column-level lineage tracking",
            "Impact analysis for changes",
            "Data classification and governance",
            "Access audit and monitoring"
        ],
        "key_tables": [
            "system.access.table_lineage",
            "system.access.column_lineage",
            "system.access.audit"
        ]
    },
    "Delta Live Tables": {
        "benefits": [
            "Declarative pipeline development",
            "Built-in data quality expectations",
            "Automatic dependency resolution",
            "Comprehensive monitoring and observability"
        ],
        "expectation_types": [
            "@dlt.expect - warn on violation",
            "@dlt.expect_or_drop - drop invalid rows",
            "@dlt.expect_or_fail - fail pipeline on violation"
        ]
    },
    "Databricks Jobs": {
        "orchestration_features": [
            "Multi-task workflows with dependencies",
            "Flexible scheduling options",
            "Error handling and retry policies",
            "Comprehensive notifications"
        ],
        "monitoring_tables": [
            "system.lakeflow.job_runs",
            "system.lakeflow.pipeline_updates",
            "system.lakeflow.pipeline_events"
        ]
    },
    "Best Practices": [
        "Use Unity Catalog for comprehensive governance",
        "Implement proper data quality expectations in DLT",
        "Design robust error handling and retry logic",
        "Monitor pipeline health and performance regularly",
        "Use environment-specific configurations",
        "Implement proper Git workflows for deployment"
    ]
}

print("Week 6 Summary - Dataflows, Lineage, Orchestration & Automation")
print("=" * 70)
print(json.dumps(summary, indent=2))

## üéØ Week 6 Completion Checklist

- ‚úÖ Explored Unity Catalog lineage capabilities
- ‚úÖ Created sample DLT pipeline with data quality expectations
- ‚úÖ Designed multi-task job configurations
- ‚úÖ Implemented monitoring and observability patterns
- ‚úÖ Developed troubleshooting and debugging approaches
- ‚úÖ Established best practices for production deployment

**Key Skills Acquired:**
- Data lineage analysis and impact assessment
- Declarative pipeline development with DLT
- Workflow orchestration and error handling
- Production monitoring and alerting
- Environment management and deployment automation