In [ ]:
# Welcome to your new notebook
# Type here in the cell editor to add code!
from pyspark.sql import functions as F

# 1) Read all ticket landing files
ticket_glob = "Files/landing/tickets/yyyy=*/mm=*/dd=*/partition=*/part-*.json"
raw_tickets = spark.read.option("multiLine", True).json(ticket_glob)

# 2) Transform to Bronze schema
tickets_df = (
    raw_tickets
    .withColumn("ticket_id", F.col("ticket_id"))
    .withColumn("customer_id", F.col("customer_id"))
    .withColumn("contact_name", F.col("contact_name"))
    .withColumn("contact_email", F.col("contact_email"))
    .withColumn("account_name", F.col("account_name"))
    .withColumn("opened_utc", F.to_timestamp(F.col("opened_utc"), "yyyy-MM-dd'T'HH:mm:ss'Z'"))
    .withColumn("title", F.col("title"))
    .withColumn("description", F.col("description"))
    .withColumn("category", F.col("category"))
    .select("ticket_id","customer_id","contact_name","contact_email","account_name","opened_utc","title","description","category")
    .dropDuplicates(["ticket_id"])
)

# 3) Save as Delta table in Bronze layer
tickets_df.write.format("delta").mode("overwrite").option("overwriteSchema","true").save("Tables/Bronze/tickets")

display(tickets_df)