In [None]:

from pyspark.sql.functions import col, when, lit, length, concat, substring

try:

    # Drop the clone table if it exists
    spark.sql("DROP TABLE IF EXISTS purgo_playground.d_product_revenue_clone")

    # Create the clone table
    spark.sql("CREATE TABLE purgo_playground.d_product_revenue_clone AS SELECT * FROM purgo_playground.d_product_revenue")


    # Perform masking operation and update the clone table
    df_clone = spark.table("purgo_playground.d_product_revenue_clone")

    df_masked = df_clone.withColumn(
        "invoice_number",
        when(
            col("invoice_number").isNull(),  # Handle nulls
            lit(None).cast("string")  # Use string type for consistency
        ).otherwise(
            when(
                length(col("invoice_number").cast("string")) < 4,
                col("invoice_number").cast("string")  # Keep as is if less than 4 digits
            ).otherwise(
                concat(
                    substring(col("invoice_number").cast("string"), 1, length(col("invoice_number").cast("string")) - 4),
                    lit("****")
                )
            )
        )
    ).withColumn("invoice_number",col("invoice_number").cast("string")) #Ensuring the final invoice_number is of string type

    
    # Overwrite the clone table with the masked data
    df_masked.write.mode("overwrite").saveAsTable("purgo_playground.d_product_revenue_clone")


except Exception as e:
    print(f"An error occurred: {e}")


# Validation query (using CTE directly as requested)
with open('/tmp/data_validation_query.sql', 'w') as f:
    f.write("""
    WITH MaskedInvoiceNumbers AS (
        SELECT invoice_number
        FROM purgo_playground.d_product_revenue_clone
    ),
    OriginalInvoiceNumbers AS (
        SELECT invoice_number
        FROM purgo_playground.d_product_revenue
    )
    SELECT 
        mi.invoice_number AS masked_invoice_number,
        oi.invoice_number AS original_invoice_number
    FROM MaskedInvoiceNumbers mi
    JOIN OriginalInvoiceNumbers oi ON mi.invoice_number = oi.invoice_number; -- Check if the masked invoice numbers match expected format
    """)




