In [0]:
from pyspark.sql.functions import col, explode, from_json, lit, current_timestamp, input_file_name, regexp_extract
from pyspark.sql.types import StructType, StructField, ArrayType, StringType, MapType

In [0]:
raw_json_path = "/Volumes/01_bronze/warcraftlogs/raw_api_calls/raw_events/*/*.json"

In [0]:
raw_json_df = spark.read.option("multiline", True).json(raw_json_path)

In [0]:
events_df = (
    raw_json_df
    .selectExpr("data.reportData.report.events.data as events")
    .withColumn("event", explode("events"))
    .select("event.*")  # flatten top-level event attributes
    .withColumn("bronze_ingestion_time", current_timestamp())
    .withColumn("source_file", raw_json_df["_metadata.file_path"])
    .withColumn("report_id", regexp_extract("source_file", r"/raw_events/([^/]+)/", 1))
)

In [0]:
events_df.write.mode("append").format("delta").saveAsTable("01_bronze.warcraftlogs.api_calls_events")