In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

In [0]:
next_layer = "silver"

In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("environment", "", "")
environment = dbutils.widgets.get("environment")


In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
# Dynamic select query to read the data for specific proc_date
dynamic_query = f""" CREATE OR REPLACE TEMP VIEW temp_cap_customer_add AS SELECT *
FROM udp_wcm_pro.udp_wcm_bronze_pro.cap_customer_add
WHERE ProcDate like '{str(proc_date).replace('-', '')}%'
"""

# Execute the query
spark.sql(dynamic_query)

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_realtime.capillary_customer_add_event (
  event_name STRING,
  event_id STRING,
  event_log_id STRING,
  org_id BIGINT,
  ref_id STRING,
  api_request_id STRING,
  created_at BIGINT,
  data STRUCT<loyaltyType: STRING, source: STRING, accountId: STRING, firstName: STRING, enteredAt: BIGINT, enteredBy: STRUCT<id: BIGINT, till: STRUCT<code: STRING, name: STRING>, store: STRUCT<code: STRING, name: STRING, externalId: STRING, externalId1: STRING, externalId2: STRING>>, customerIdentifiers: STRUCT<customerId: BIGINT, loyaltyType: STRING, instore: STRUCT<mobile: BIGINT, email: STRING, externalId: STRING>>, customFields: ARRAY<STRUCT<key: STRING, value: STRING>>, extendedFields: ARRAY<STRUCT<key: STRING, value: STRING>>>,
  loyalty_event_id STRING,
  proc_date TIMESTAMP)
TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'true',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'true'
)
""")

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW view_capillary_customer_add_event as
SELECT
eventName as event_name,
eventID as event_id,
eventLogId as event_log_id,
orgId as org_id,
refId as ref_id,
apiRequestId as api_request_id,
createdAt as created_at,
data as data,
loyaltyEventId as loyalty_event_id,
TO_TIMESTAMP(ProcDate, 'yyyyMMddHHmmss') AS proc_date
FROM temp_cap_customer_add
""")

In [0]:
df_raw = spark.sql("""
SELECT
  eventName AS event_name,
  eventID AS event_id,
  eventLogId AS event_log_id,
  orgId AS org_id,
  refId AS ref_id,
  apiRequestId AS api_request_id,
  createdAt AS created_at,
  data,
  loyaltyEventId AS loyalty_event_id,
  TO_TIMESTAMP(ProcDate, 'yyyyMMddHHmmss') AS proc_date
FROM temp_cap_customer_add
""")

# Reconstruct the nested data struct exactly as per the target table
df_fixed = df_raw.withColumn("data", struct(
    col("data.loyaltyType").alias("loyaltyType"),
    col("data.source").alias("source"),
    col("data.accountId").alias("accountId"),
    col("data.firstName").alias("firstName"),
    col("data.enteredAt").alias("enteredAt"),
    struct(
        col("data.enteredBy.id").alias("id"),
        struct(
            col("data.enteredBy.till.code").alias("code"),
            col("data.enteredBy.till.name").alias("name")
        ).alias("till"),
        struct(
            col("data.enteredBy.store.code").alias("code"),
            col("data.enteredBy.store.name").alias("name"),
            col("data.enteredBy.store.externalId").alias("externalId"),
            col("data.enteredBy.store.externalId1").alias("externalId1"),
            col("data.enteredBy.store.externalId2").alias("externalId2")
        ).alias("store")
    ).alias("enteredBy"),
    struct(
        col("data.customerIdentifiers.customerId").alias("customerId"),
        col("data.customerIdentifiers.loyaltyType").alias("loyaltyType"),
        struct(
            col("data.customerIdentifiers.instore.mobile").cast(LongType()).alias("mobile"),
            col("data.customerIdentifiers.instore.email").alias("email"),
            col("data.customerIdentifiers.instore.externalId").alias("externalId")
        ).alias("instore")
    ).alias("customerIdentifiers"),
    col("data.customFields").alias("customFields"),
    col("data.extendedFields").alias("extendedFields")
))

# Append to the Delta table
df_fixed.write.format("delta").mode("append").saveAsTable(f"{catalog_name}.udp_wcm_silver_realtime.capillary_customer_add_event")
