In [0]:
from pyspark.sql.functions import col, lit, current_timestamp, sum as _sum
from delta.tables import DeltaTable
from pydeequ.checks import Check, CheckLevel
from pydeequ.verification import VerificationSuite, VerificationResult
import os

In [0]:
# Import necessary libraries
from delta.tables import DeltaTable
from pyspark.sql import SparkSession

# Define the target table (third table)

# Read customer_stage_table and bookings_stage_table from Delta table
customer_stage_table = spark.read.format("delta").table("databricks2.zoom.zoom_staging_customer_delta")
bookings_stage_table = spark.read.format("delta").table("databricks2.zoom.zoom_staging_bookings_delta")

merged_table = "databricks2.zoom.zoom_merged_table"
merged_table_exists = spark._jsparkSession.catalog().tableExists(merged_table)

if not merged_table_exists:
    # Create the target table if it doesn't exist by combining the necessary fields
    merged_data = customer_stage_table.join(bookings_stage_table, "customer_id", "outer")
    
    # Write the combined data to the target table
    merged_data.write.format("delta").mode("overwrite").saveAsTable(merged_table)
    display(merged_table)

else:
    # Load the target Delta table
    print('Reading Delta Table')
    target_delta_table = DeltaTable.forName(spark, merged_table)

    # Perform the merge operation with the bookings data

    print(' Performing Merge')
    target_delta_table.alias("target") \
        .merge(
            bookings_stage_table.alias("source"),
            "target.booking_id = source.booking_id AND target.customer_id = source.customer_id"
        ) \
        .whenMatchedUpdate(
            condition = "target.booking_id = source.booking_id AND target.customer_id = source.customer_id AND source.book_status <> 'cancelled' OR target.book_status <> 'cancelled'",  # Matching condition
            set = {
                "target.car_id": "source.car_id",  # Update field(s) as needed
                "target.booking_date": "source.booking_date",
                "target.start_time": "source.start_time",
                "target.end_time": "source.end_time",
                "target.total_amount": "source.total_amount",
                "target.book_status": "source.book_status",
                "target.duration_book": "source.duration_book"
            }
        ) \
        .whenNotMatchedInsert(  
            values = {
                "booking_id": "source.booking_id",
                "customer_id": "source.customer_id",
                "car_id": "source.car_id",
                "booking_date": "source.booking_date",
                "start_time": "source.start_time",
                "end_time": "source.end_time",
                "total_amount": "source.total_amount",
                "book_status": "source.book_status",
                "duration_book": "source.duration_book"
                
            }
        ) \
        .whenMatchedDelete(
            condition = "source.book_status = 'cancelled' OR target.book_status = 'cancelled'"   # Delete if status is 'cancelled'
        ) \
        .execute()

        # Perform the merge operation with the customer data


Reading Delta Table
 Performing Merge


In [0]:
target_delta_table.alias("target") \
        .merge(
            customer_stage_table.alias("source"),
            "target.customer_id = source.customer_id"
        ) \
        .whenMatchedUpdate(
            condition = "target.customer_id = source.customer_id",  # Matching condition
            set = {
                "target.name": "source.name",  # Update customer fields as needed
                "target.email": "source.email",
                "target.phone_number": "source.phone_number",
                "target.signup_date": "source.signup_date",
                "target.customer_status": "source.customer_status",
                "target.days_difference": "source.days_difference"
            }
        ) \
        .whenNotMatchedInsert(
            values = {
                "customer_id": "source.customer_id",
                "name": "source.name",
                "email": "source.email",
                "phone_number": "source.phone_number",
                "signup_date": "source.signup_date",
                "customer_status": "source.customer_status",
                "days_difference": "source.days_difference"
        
            }
        ) \
        .execute()
