In [0]:
sql = """
CREATE VOLUME IF NOT EXISTS test_catalog.landing.parquet_source_volume;
"""
spark.sql(sql)

volume_path = "/Volumes/test_catalog/landing/parquet_source_volume"
files = dbutils.fs.ls(volume_path)

if len(files) == 0:
    dbutils.fs.cp(
        "dbfs:/mnt/dbacademy-datasets/data-engineer-learning-path/v02/ecommerce/raw/users-historical/",
        f"{volume_path}/sales_historical",
        recurse=True,
    )

In [0]:
sql = """
CREATE VOLUME IF NOT EXISTS test_catalog.landing.parquet_checkpoint_volume
"""
spark.sql(sql)

In [0]:
from pyspark.sql.functions import col, from_unixtime, current_timestamp
from pyspark.sql.types import DateType

# read from cliud storage
df = (spark
    .read
    .format("parquet")
    .option("rescuedDataColumn", "_rescued_data")
    .load("/Volumes/test_catalog/landing/parquet_source_volume/sales_historical")
)

# add metadata columns
df_with_meta = (
    df
    .withColumn("user_first_touch_date", from_unixtime(col("user_first_touch_timestamp") / 1_000_000).cast(DateType()))
    .withColumn("file_modification_time", col("_metadata.file_modification_time"))
    .withColumn("source_file", col("_metadata.file_name"))
    .withColumn("ingest_time", current_timestamp())
)

# write to delta
(df_with_meta
    .write
    .format("delta")
    .option("mergeSchema", "true")
    .mode("overwrite")
    .saveAsTable("test_catalog.bronze.historical_users")
)

# read and display
test_catalog_bronze_historical_users = spark.table("test_catalog.bronze.historical_users")
display(test_catalog_bronze_historical_users)