##### Local Environment Setup

In [1]:

import os
import sys
# Set JAVA env variable
os.environ["JAVA_HOME"] = r"C:\Program Files\Eclipse Adoptium\jdk-11.0.26.4-hotspot"
# Set Hadoop environment variables 
os.environ['HADOOP_HOME'] = r'C:\hadoop'
os.environ['PATH'] = os.environ['HADOOP_HOME'] + r'\bin;' + os.environ['PATH']
# Set the Python executable path explicitly
os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

# Get Pipeline Tools Path
sys.path.append(R'C:\GitHub\Tools\de')


##### Libraries

In [2]:
import time
import logging
from datetime import datetime

import numpy as np
import pandas as pd

from typing import Dict


from pyspark.sql import SparkSession
from pyspark.sql import DataFrame
from pyspark.sql import functions as F
from pyspark.sql.types import (StructType, StructField, StringType, 
                            DoubleType, IntegerType, TimestampType, 
                            DateType)
from delta.tables import DeltaTable


In [3]:
from de_pipeline_tools import *

##### Spark Session

In [4]:
spark = initialize_local_spark_delta_lake("Financial Data Pipeline")

2025-04-24 15:43:32,260 - INFO - ---Spark session initialized with Delta Lake support---


In [5]:
# Create a database in the Hive warehouse if doesn't exist
spark.sql("CREATE DATABASE IF NOT EXISTS de_pipelines LOCATION 'C:/hive-warehouse/de_pipelines'")

DataFrame[]


## SCD Type 1:
##### Intial Creation (Overwrite)

##### Inputs/Outputs

In [6]:
# Intial Creation
data_dir = "../../data"
file_list = [
    "financial_transactions_20250409_113413.csv",
    "financial_transactions_20250410_113413.csv"
]

abs_file_list = []
for file in file_list:
    # Join data_dir with filename
    relative_path = os.path.join(data_dir, file)
    # Convert to absolute path
    abs_path = os.path.abspath(relative_path)
    abs_file_list.append(abs_path)

bronze_table = 'de_pipelines.financial_osb_bronze_type1'
silver_table = 'de_pipelines.financial_osb_silver_type1'
gold_table   = 'de_pipelines.financial_osb_gold_type1'


In [7]:
def get_schema():
    return StructType([
    StructField("transaction_id", StringType(), False),
    StructField("timestamp", StringType(), True), #load in as string type, handle in silver step     
    StructField("customer_id", StringType(), True),      
    StructField("account_number", StringType(), True),   
    StructField("transaction_type", StringType(), True), 
    StructField("amount", DoubleType(), True),           
    StructField("currency", StringType(), True),         
    StructField("balance_after", DoubleType(), True),    
    StructField("status", StringType(), True),           
    StructField("merchant", StringType(), True),         
    StructField("category", StringType(), True),         
    StructField("location", StringType(), True)          
])

#### Define: Validation Rules, Transformations, Write Executions

In [8]:
def bronze_writer(df: DataFrame, table_name, bronze_transform=None):
    """Write data to bronze layer with optional transformation"""
    try:
        if bronze_transform:
            logger.info("Applying transformation within bronze writer")
            transformed_df = bronze_transform(df)
        else:
            transformed_df = df
            
        (transformed_df.write
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .saveAsTable(table_name)
        )
    
    except Exception as e:
        logger.error(f"Error in bronze_writer: {str(e)}")
        raise


    return None

In [9]:
# Define bronze validation rules
bronze_validation_rules = [
    {
        "name": "has_transaction_id",
        "condition": "transaction_id IS NOT NULL",
        "description": "Transaction ID must be present"
    },
    {
        "name": "valid_amount",
        "condition": "amount IS NOT NULL AND amount > 0",
        "description": "Amount must be positive if not null"
    },
    {
        "name": "valid_timestamp",
        "condition": "timestamp IS NOT NULL AND timestamp <= current_timestamp()",
        "description": "Timestamp must not be in the future"
    }
]

In [10]:
def silver_transform(df:DataFrame) -> DataFrame:

    # Clean any non-timestamp characters first
    df = df.withColumn(
        "timestamp", 
        F.regexp_replace(F.col("timestamp"), "[^0-9\\-: ]", "")
    )

    # Cast to timestamp type
    df = df.withColumn("timestamp", F.col("timestamp").cast("timestamp"))

    # Remove duplicates
    df = df.dropDuplicates(subset=["transaction_id","timestamp"])

    # Standardize Data
    df = (df
            .withColumn("amount", F.abs(F.col("amount")))
            .withColumn("transaction_type", F.lower(F.col("transaction_type")))
            .withColumn("category", F.lower(F.col("category")))
            .withColumn("status", F.lower(F.col("status")))
    )

    # Filter Data
    # Address bronze layer data validation check concerns
    df = df.filter(
                    (F.col('transaction_id').isNotNull()) # transaction id must exist
                    & (F.col('account_number').isNotNull()) # account number must exist
                    & (F.col('amount') > 0) # amount must be positive
                    & ((F.col('timestamp') <= F.current_date()) # must be <= current date
                    |(F.col('timestamp').isNull()))# or must be Null, no future timestamps
    ) 
    
    # Split timestamp into date and time and year_month for paritioning
    df = (df
            .withColumn("transaction_date", F.to_date("timestamp"))
            .withColumn("transaction_time", F.date_format("timestamp", "HH:mm:ss"))
            .withColumn("year_month", F.date_format(F.col("transaction_date"), "yyyy-MM"))
    )
    
    # Add processing timestamp for bookkeeping
    df = (df
            .withColumn("processing_timestamp", F.current_timestamp())
    )

    return df

In [11]:
def silver_writer(df: DataFrame, table_name, silver_transform=None):
    """Write data to silver layer with optional transformation"""
    try:
        if silver_transform:
            logger.info("Applying transformation within silver writer")
            transformed_df = silver_transform(df)
        else:
            transformed_df = df
            
        (transformed_df.write
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .partitionBy("year_month")
        .saveAsTable(table_name)
        )
        
    except Exception as e:
        logger.error(f"Error in silver_writer: {str(e)}")
        raise
    


    return None

In [12]:
# Define silver validation rules
silver_validation_rules = [
    {
        "name": "valid_transaction_type",
        "condition": "transaction_type IN ('debit', 'credit', 'transfer', 'payment', 'withdrawal', 'deposit') OR transaction_type IS NULL",
        "description": "Transaction type must be one of the valid types"
    },
    {
        "name": "valid_status",
        "condition": "status IN ('completed', 'pending', 'failed', 'cancelled', 'refunded') OR status IS NULL",
        "description": "Status must be one of the valid statuses"
    },
    {
        "name": "valid_currency",
        "condition": "currency IS NOT NULL AND length(currency) = 3",
        "description": "Currency code should be 3 characters if present"
    },
    {
        "name": "valid_timestamp",
        "condition": "timestamp IS NOT NULL AND timestamp <= current_timestamp()",
        "description": "Timestamp must not be NULL or in the future"
    }
]

In [13]:
def gold_transform(df:DataFrame) -> Dict:
    gold_dfs = {}

    # Gold aggregation 1: Daily summary by category
    daily_category = (df
        .groupBy("transaction_date", "category")
        .agg(
            F.count("transaction_id").alias("transaction_count"),
            F.sum("amount").alias("total_amount"),
            F.avg("amount").alias("avg_amount"),
            F.min("amount").alias("min_amount"),
            F.max("amount").alias("max_amount"),
            F.countDistinct("customer_id").alias("unique_customers")
        )
        .withColumn("processing_timestamp", F.current_timestamp())
    )
    
    gold_dfs["daily_category"] = daily_category
    
    # Gold aggregation 2: Customer summary
    customer_summary = (df
        .groupBy("customer_id")
        .agg(
            F.count("transaction_id").alias("transaction_count"),
            F.sum("amount").alias("total_amount"),
            F.avg("amount").alias("avg_amount"),
            F.min("transaction_date").alias("first_transaction_date"),
            F.max("transaction_date").alias("last_transaction_date"),
            F.approx_count_distinct("category").alias("category_count")
        )
        .withColumn("processing_timestamp", F.current_timestamp())
        .withColumn("days_since_last_transaction", 
                    F.datediff(F.current_date(), F.col("last_transaction_date")))
    )
    
    gold_dfs["customer_summary"] = customer_summary
    
    # Gold aggregation 3: Transaction type summary
    transaction_type_summary = (df
        .groupBy("transaction_type")
        .agg(
            F.count("transaction_id").alias("transaction_count"),
            F.sum("amount").alias("total_amount"),
            F.avg("amount").alias("avg_amount")
        )
        .withColumn("processing_timestamp", F.current_timestamp())
    )
    
    gold_dfs["transaction_type_summary"] = transaction_type_summary

    return gold_dfs

In [14]:
def gold_writer(df: DataFrame, table_name: str):

    (df.write
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .saveAsTable(table_name)
    )

    return None

In [15]:
# Define gold validation rules
gold_validation_rules = [
    {
        "name": "positive_transaction_counts",
        "condition": "transaction_count > 0",
        "description": "Transaction counts should be positive"
    },
    {
        "name": "valid_total_amounts",
        "condition": "total_amount >= 0",
        "description": "Total amounts should not be negative"
    }
]

### Running Full Batch Pipeline

In [16]:
run_batch_de_pipeline(spark, 'csv', abs_file_list, get_schema(), 
                     bronze_table, silver_table, gold_table, 
                     bronze_transform=None, silver_transform=silver_transform, gold_transform=gold_transform,
                     bronze_writer=bronze_writer, silver_writer=silver_writer, gold_writer=gold_writer,
                     bronze_validation_rules=bronze_validation_rules, 
                     silver_validation_rules=silver_validation_rules, 
                     gold_validation_rules=gold_validation_rules,
                     pipeline_name='Financial_DE_Pipeline')

2025-04-24 15:43:54,487 - INFO - --Starting data pipeline execution with ID: Financial_DE_Pipeline_20250424_154354--
2025-04-24 15:43:54,489 - INFO - Starting bronze layer processing
2025-04-24 15:43:55,189 - INFO - Successfully read CSV data from: 
  - c:\GitHub\DE_Pipelines\data\financial_transactions_20250409_113413.csv
  - c:\GitHub\DE_Pipelines\data\financial_transactions_20250410_113413.csv
2025-04-24 15:43:55,190 - INFO - Writing to bronze table: de_pipelines.financial_osb_bronze_type1
2025-04-24 15:44:27,379 - INFO - Successfully wrote to bronze table: de_pipelines.financial_osb_bronze_type1
2025-04-24 15:44:27,384 - INFO - Write Metrics: 
[
  {
    "numOutputRows": "1000",
    "numOutputBytes": "52336",
    "numFiles": "2"
  }
]
2025-04-24 15:44:28,639 - INFO - Running data quality checks for bronze layer
2025-04-24 15:44:34,219 - INFO - Data Quality Metrics for bronze layer:
2025-04-24 15:44:34,221 - INFO -   - Shape: [15,1000] (approx. row count)
2025-04-24 15:44:34,222 - IN

{'status': 'success',
 'pipeline_id': 'Financial_DE_Pipeline_20250424_154354',
 'bronze_version': 3,
 'silver_version': 4,
 'timestamp': '2025-04-24T15:47:04.300840',
 'duration_seconds': 189.81175589561462,
 'metrics': {'pipeline_id': 'Financial_DE_Pipeline_20250424_154354',
  'start_time': '2025-04-24T15:43:54.487088',
  'stages': {'bronze': {'duration_seconds': 40.788586378097534,
    'version': 3,
    'status': 'success'},
   'bronze_optimize': {'layer': 'bronze',
    'duration_seconds': 11.606544971466064,
    'status': 'success'},
   'silver': {'duration_seconds': 40.32490658760071,
    'version': 4,
    'status': 'success',
    'source_bronze_version': 3},
   'silver_optimize': {'layer': 'silver',
    'duration_seconds': 12.360381364822388,
    'status': 'success'},
   'gold': {'duration_seconds': 78.08485698699951,
    'status': 'success',
    'source_silver_version': 4,
    'tables': ['daily_category',
     'customer_summary',
     'transaction_type_summary']},
   'gold_optimi

### Running Layers In Isolation

In [17]:
spark = initialize_local_spark_delta_lake("Financial Data Pipeline - Testing")

2025-04-24 15:47:05,349 - INFO - ---Spark session initialized with Delta Lake support---


#### Bronze

In [18]:
bronzedf, bronze_version = process_batch_bronze_layer(spark, 'csv', abs_file_list, get_schema(), bronze_table,
                               bronze_transform=None, validation_rules=bronze_validation_rules,
                               pipeline_id='test', mode='test', bronze_writer=None)

2025-04-24 15:47:06,861 - INFO - Starting bronze layer processing
2025-04-24 15:47:06,924 - INFO - Successfully read CSV data from: 
  - c:\GitHub\DE_Pipelines\data\financial_transactions_20250409_113413.csv
  - c:\GitHub\DE_Pipelines\data\financial_transactions_20250410_113413.csv
2025-04-24 15:47:06,928 - INFO - No transformation function defined
2025-04-24 15:47:06,929 - INFO - Running data quality checks for bronze layer
2025-04-24 15:47:11,475 - INFO - Data Quality Metrics for bronze layer:
2025-04-24 15:47:11,476 - INFO -   - Shape: [15,1000] (approx. row count)
2025-04-24 15:47:11,478 - INFO -   - Schema: 
root
 |-- transaction_id: string (nullable = true)
 |-- timestamp: string (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- account_number: string (nullable = true)
 |-- transaction_type: string (nullable = true)
 |-- amount: double (nullable = true)
 |-- currency: string (nullable = true)
 |-- balance_after: double (nullable = true)
 |-- status: string (nullab

In [19]:
bronzedf.show(5)

+--------------+-------------------+-----------+--------------+----------------+-------+--------+-------------+--------+--------------------+---------+---------------+--------------------+--------------------+--------+
|transaction_id|          timestamp|customer_id|account_number|transaction_type| amount|currency|balance_after|  status|            merchant| category|       location| ingestion_timestamp|         source_file|batch_id|
+--------------+-------------------+-----------+--------------+----------------+-------+--------+-------------+--------+--------------------+---------+---------------+--------------------+--------------------+--------+
|   TXN00000001|2023-05-05 03:42:36| CUST001179| ACCT-39450273|        interest|1378.52|     USD|      9544.56|reversed|                NULL|     NULL|Los Angeles, CA|2025-04-24 15:47:...|file:///c:/GitHub...|    test|
|   TXN00000002|2023-08-11 06:24:46| CUST001188| ACCT-55298556|          refund| 1626.4|     JPY|      9598.17|disputed|    

#### Silver

In [20]:
silverdf, silver_version = process_batch_silver_layer(spark, bronze_table, bronze_version=None, 
                                                   silver_table=silver_table, 
                                                   silver_transform=silver_transform, 
                                                   validation_rules=silver_validation_rules,
                                                   pipeline_id='test', mode='test', 
                                                   silver_writer=None)

2025-04-24 15:47:12,201 - INFO - Starting silver layer processing
2025-04-24 15:47:12,914 - INFO - Successfully read bronze data version 4
2025-04-24 15:47:13,062 - INFO - Transformation function applied in test mode
2025-04-24 15:47:13,064 - INFO - Running data quality checks for silver layer
2025-04-24 15:47:24,774 - INFO - Data Quality Metrics for silver layer:
2025-04-24 15:47:24,776 - INFO -   - Shape: [19,950] (approx. row count)
2025-04-24 15:47:24,777 - INFO -   - Schema: 
root
 |-- transaction_id: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- customer_id: string (nullable = true)
 |-- account_number: string (nullable = true)
 |-- transaction_type: string (nullable = true)
 |-- amount: double (nullable = true)
 |-- currency: string (nullable = true)
 |-- balance_after: double (nullable = true)
 |-- status: string (nullable = true)
 |-- merchant: string (nullable = true)
 |-- category: string (nullable = true)
 |-- location: string (nullable = true)
 

In [21]:
silverdf.show(5)

+--------------+-------------------+-----------+--------------+----------------+-------+--------+-------------+--------+--------------------+---------+---------------+--------------------+--------------------+--------------------+
|transaction_id|          timestamp|customer_id|account_number|transaction_type| amount|currency|balance_after|  status|            merchant| category|       location| ingestion_timestamp|         source_file|            batch_id|
+--------------+-------------------+-----------+--------------+----------------+-------+--------+-------------+--------+--------------------+---------+---------------+--------------------+--------------------+--------------------+
|   TXN00000001|2023-05-05 03:42:36| CUST001179| ACCT-39450273|        interest|1378.52|     USD|      9544.56|reversed|                NULL|     NULL|Los Angeles, CA|2025-04-24 15:44:...|file:///c:/GitHub...|Financial_DE_Pipe...|
|   TXN00000002|2023-08-11 06:24:46| CUST001188| ACCT-55298556|          ref

##### Investigating Silver Validation Check Failures:
Corrupt "status" column

In [22]:
silverdf.groupBy('status').count().show()

+---------+-----+
|   status|count|
+---------+-----+
| dqsputed|    1|
|  pgnding|    1|
|completed|  206|
|   failen|    1|
|   failed|  186|
|bompleted|    1|
|     NULL|   19|
| reyersed|    1|
|comcleted|    1|
| disputad|    1|
| dmsputed|    1|
| diiputed|    1|
|  pending|  171|
| disputed|  204|
| reverszd|    1|
| reversed|  204|
+---------+-----+



#### Gold

In [23]:
gold_dfs = process_batch_gold_layer(spark, silver_table, silver_version=None, 
                                 gold_table=gold_table, 
                                 gold_transform=gold_transform, 
                                 validation_rules=gold_validation_rules, 
                                 pipeline_id='test', 
                                 mode='test',
                                 gold_writer=None)

2025-04-24 15:47:38,415 - INFO - Starting gold layer processing
2025-04-24 15:47:39,148 - INFO - Successfully read silver data version 5
2025-04-24 15:47:39,278 - INFO - Transformation function applied
2025-04-24 15:47:39,282 - INFO - Running data quality checks for de_pipelines.financial_osb_gold_type1_daily_category layer
2025-04-24 15:47:51,276 - INFO - Data Quality Metrics for de_pipelines.financial_osb_gold_type1_daily_category layer:
2025-04-24 15:47:51,277 - INFO -   - Shape: [9,702] (approx. row count)
2025-04-24 15:47:51,278 - INFO -   - Schema: 
root
 |-- transaction_date: date (nullable = true)
 |-- category: string (nullable = true)
 |-- transaction_count: long (nullable = false)
 |-- total_amount: double (nullable = true)
 |-- avg_amount: double (nullable = true)
 |-- min_amount: double (nullable = true)
 |-- max_amount: double (nullable = true)
 |-- unique_customers: long (nullable = false)
 |-- processing_timestamp: timestamp (nullable = false)

2025-04-24 15:47:51,280 -

In [24]:
for table_name in gold_dfs.keys():
    gold_dfs[table_name].show(5)

+----------------+----------+-----------------+------------+----------+----------+----------+----------------+--------------------+
|transaction_date|  category|transaction_count|total_amount|avg_amount|min_amount|max_amount|unique_customers|processing_timestamp|
+----------------+----------+-----------------+------------+----------+----------+----------+----------------+--------------------+
|      2023-05-25|      fees|                1|       30.89|     30.89|     30.89|     30.89|               1|2025-04-24 15:48:...|
|      2023-09-25|    income|                1|      1384.1|    1384.1|    1384.1|    1384.1|               1|2025-04-24 15:48:...|
|      2023-06-22|      NULL|                1|       64.69|     64.69|     64.69|     64.69|               1|2025-04-24 15:48:...|
|      2023-12-18|investment|                1|      626.53|    626.53|    626.53|    626.53|               1|2025-04-24 15:48:...|
|      2023-08-28|   housing|                1|       37.49|     37.49|     


## SCD Type 1: 
##### Change Data Capture (Merge)

##### Inputs/Outputs

In [25]:
# Change Data Capture
file_list = [
    "financial_transactions_20250414_113413.csv",
    "financial_transactions_20250417_113413.csv"
]

abs_file_list = []
for file in file_list:
    # Join data_dir with filename
    relative_path = os.path.join(data_dir, file)
    # Convert to absolute path
    abs_path = os.path.abspath(relative_path)
    abs_file_list.append(abs_path)


#### Define: Validation Rules(Same), Transformations(Same), Write Executions (Merge)

In [26]:
def bronze_writer(df: DataFrame, table_name, bronze_transform=None):
    """Write data to bronze layer with optional transformation"""

    # Apply Pre-Transform Filters:

    # drop duplicates
    df = (df
            .orderBy(F.col("timestamp").desc())
            .dropDuplicates(subset=["transaction_id", "account_number"])  # Keeps only the latest record
        )


    try:
        if bronze_transform:
            logger.info("Applying transformation within bronze writer")
            transformed_df = bronze_transform(df)
        else:
            transformed_df = df

        # Merge the source and target DataFrame
        transformed_df.createOrReplaceTempView("source")

        # Execute merge
        spark.sql(f"""
            MERGE INTO {table_name} t
            USING source s
            ON s.transaction_id = t.transaction_id AND s.account_number = t.account_number
            WHEN MATCHED AND (t.amount != s.amount OR t.balance_after != s.balance_after) THEN 
                UPDATE SET *
            WHEN NOT MATCHED THEN
                INSERT *
        """)

    except Exception as e:
        logger.error(f"Error in bronze_writer: {str(e)}")
        raise

    return None

In [27]:
def silver_writer(df: DataFrame, table_name, silver_transform=None):

    # Apply Pre-Transform Filters:
    
    # get max ingested timestamp from silver table
    max_ts = spark.sql(f"SELECT MAX(ingestion_timestamp) FROM {table_name}").collect()[0][0]

    # filter incoming data to only include records with a later ingested timestamp
    df = df.filter(F.col("ingestion_timestamp") > max_ts)

    # drop duplicates
    df = (df
            .orderBy(F.col("timestamp").desc())
            .dropDuplicates(subset=["transaction_id", "account_number"])  # Keeps only the latest record
        )

    """Write data to silver layer with optional transformation"""
    try:
        if silver_transform:
            logger.info("Applying transformation within silver writer")
            transformed_df = silver_transform(df)
        else:
            transformed_df = df

        # Merge the source and target DataFrame
        transformed_df.createOrReplaceTempView("source")

        # Execute merge
        spark.sql(f"""
            MERGE INTO {table_name} t
            USING source s
            ON s.transaction_id = t.transaction_id 
                AND s.account_number = t.account_number 
            WHEN MATCHED AND (t.amount != s.amount OR t.balance_after != s.balance_after) THEN 
                UPDATE SET *
            WHEN NOT MATCHED THEN
                INSERT *
        """)
            

    except Exception as e:
        logger.error(f"Error in silver_writer: {str(e)}")
        raise



    return None

#### Running CDC Full Batch Pipeline (Bronze, Silver)

In [28]:
run_batch_de_pipeline(spark, 'csv', abs_file_list, get_schema(), 
                     bronze_table=bronze_table, silver_table=silver_table, gold_table=None, 
                     bronze_transform=None, silver_transform=silver_transform, gold_transform=None,
                     bronze_writer=bronze_writer, silver_writer=silver_writer, gold_writer=None,
                     bronze_validation_rules=bronze_validation_rules, 
                     silver_validation_rules=silver_validation_rules, 
                     gold_validation_rules=None,
                     pipeline_name='Financial_DE_Pipeline')

2025-04-24 15:48:32,863 - INFO - --Starting data pipeline execution with ID: Financial_DE_Pipeline_20250424_154832--
2025-04-24 15:48:32,864 - INFO - Starting bronze layer processing
2025-04-24 15:48:33,003 - INFO - Successfully read CSV data from: 
  - c:\GitHub\DE_Pipelines\data\financial_transactions_20250414_113413.csv
  - c:\GitHub\DE_Pipelines\data\financial_transactions_20250417_113413.csv
2025-04-24 15:48:33,006 - INFO - Writing to bronze table: de_pipelines.financial_osb_bronze_type1
2025-04-24 15:48:42,881 - INFO - Successfully wrote to bronze table: de_pipelines.financial_osb_bronze_type1
2025-04-24 15:48:42,883 - INFO - Write Metrics: 
[
  {
    "numOutputRows": "1540",
    "numTargetBytesAdded": "59467",
    "numTargetRowsInserted": "540",
    "numTargetFilesAdded": "1",
    "materializeSourceTimeMs": "1716",
    "numTargetRowsMatchedDeleted": "0",
    "numTargetFilesRemoved": "1",
    "numTargetRowsMatchedUpdated": "217",
    "executionTimeMs": "4798",
    "numTargetDelet

{'status': 'success',
 'pipeline_id': 'Financial_DE_Pipeline_20250424_154832',
 'bronze_version': 5,
 'silver_version': 6,
 'timestamp': '2025-04-24T15:49:22.222828',
 'duration_seconds': 49.357311964035034,
 'metrics': {'pipeline_id': 'Financial_DE_Pipeline_20250424_154832',
  'start_time': '2025-04-24T15:48:32.863503',
  'stages': {'bronze': {'duration_seconds': 15.616908073425293,
    'version': 5,
    'status': 'success'},
   'bronze_optimize': {'layer': 'bronze',
    'duration_seconds': 2.0020623207092285,
    'status': 'success'},
   'silver': {'duration_seconds': 21.71529245376587,
    'version': 6,
    'status': 'success',
    'source_bronze_version': 5},
   'silver_optimize': {'layer': 'silver',
    'duration_seconds': 10.017763137817383,
    'status': 'success'}},
  'status': 'success',
  'end_time': '2025-04-24T15:49:22.220815',
  'total_duration_seconds': 49.357311964035034}}