In [1]:
## This notebook ingest the raw "lap times" from the bronze layer to silver layer
### Ingest f1_lap_times_dlt

In [1]:
# Define parameters (can set parameters in a workflow job)
target_type   =oidlUtils.parameters.getParameter("TARGET_TYPE", "table")
target_format =oidlUtils.parameters.getParameter("TARGET_FORMAT", "delta")
bronze_catalog    = "f1_bronze"
silver_catalog    = "f1_silver"
bronze_schema     = "bronze"
silver_schema     = "silver"
bronze_table_dlt = "f1_lap_times_dlt"
bronze_table_par = "f1_lap_times_par"
silver_table_dlt = "f1_lap_times_dlt"
silver_table_par = "f1_lap_times_par"


# ----------
##### Step 1 - Read Bronze table to dataframe, select rename and cast columns 
# ----------

In [1]:
lap_times_df = spark.read.table(f"{bronze_catalog}.{bronze_schema}.{bronze_table_dlt}")

In [1]:
lap_times_df.show()

+------+--------+---+--------+--------+------------+
|RACEID|DRIVERID|LAP|POSITION|    TIME|MILLISECONDS|
+------+--------+---+--------+--------+------------+
|  1022|     840| 37|      14|1:49.070|      109070|
|  1022|     840| 38|      14|1:49.480|      109480|
|  1022|     840| 39|      14|1:50.079|      110079|
|  1022|     840| 40|      12|1:51.250|      111250|
|  1022|     840| 41|      12|1:49.152|      109152|
|  1022|     840| 42|      12|1:49.013|      109013|
|  1022|     840| 43|      11|1:49.758|      109758|
|  1022|     840| 44|      10|1:50.957|      110957|
|  1022|     826|  1|      11|2:35.504|      155504|
|  1022|     826|  2|      11|2:58.103|      178103|
|  1022|     826|  3|      11|2:58.429|      178429|
|  1022|     826|  4|      11|2:45.928|      165928|
|  1022|     826|  5|      11|1:54.167|      114167|
|  1022|     826|  6|      11|1:52.687|      112687|
|  1022|     826|  7|      11|1:52.631|      112631|
|  1022|     826|  8|      11|1:52.031|      1

In [1]:
from pyspark.sql.functions import col, floor, format_string, current_timestamp

lap_times_selected_df = lap_times_df \
    .withColumn("race_id", col("RACEID").cast("int")) \
    .withColumn("driver_id", col("DRIVERID").cast("int")) \
    .withColumn("lap", col("LAP").cast("int")) \
    .withColumn("position", col("POSITION").cast("int")) \
    .withColumn("milliseconds", col("MILLISECONDS").cast("int")) \
    .withColumn(
        "time",
        format_string(
            "%d:%02d.%03d",
            floor(col("milliseconds") / 60000),
            floor((col("milliseconds") % 60000) / 1000),
            col("milliseconds") % 1000
        )
    ) \
    .withColumn("ingestion_date", current_timestamp()) \
    .select("race_id", "driver_id", "lap", "position", "time", "milliseconds", "ingestion_date")


# ----------
##### Step 2 - Write the output to processed container in delta/parquet table or parquet file format
# ----------

In [1]:
if target_type == 'file':
    if target_format == 'parquet':
        lap_times_selected_df.write.mode("overwrite").parquet(f"{silver_folder_path}/lap_times")
elif target_type == 'table':
    if target_format == 'parquet':
        lap_times_selected_df.write.mode("overwrite").format("parquet").saveAsTable(f"{silver_catalog}.{silver_schema}.{silver_table_par}")
    elif  target_format == 'delta':
        lap_times_selected_df.write.mode("overwrite").format("delta").saveAsTable(f"{silver_catalog}.{silver_schema}.{silver_table_dlt}")

In [1]:
if target_type == 'file':
    if target_format == 'parquet':
        lap_times_read_df = spark.read.parquet(f"{silver_folder_path}/lap_times")
elif target_type == 'table':
    if target_format == 'parquet':
        lap_times_read_df = spark.read.table(f"{silver_catalog}.{silver_schema}.{silver_table_par}")
    elif  target_format == 'delta':
        lap_times_read_df = spark.read.table(f"{silver_catalog}.{silver_schema}.{silver_table_dlt}")

In [1]:
lap_times_read_df.show()

+-------+---------+---+--------+--------+------------+--------------------+
|race_id|driver_id|lap|position|    time|milliseconds|      ingestion_date|
+-------+---------+---+--------+--------+------------+--------------------+
|     40|       25|  3|      10|1:21.085|       81085|2025-11-14 10:27:...|
|     40|       25|  4|      10|1:19.951|       79951|2025-11-14 10:27:...|
|     40|       25|  5|      10|1:19.877|       79877|2025-11-14 10:27:...|
|     40|       25|  6|      10|1:20.000|       80000|2025-11-14 10:27:...|
|     40|       25|  7|      10|1:19.826|       79826|2025-11-14 10:27:...|
|     40|       25|  8|      10|1:19.734|       79734|2025-11-14 10:27:...|
|     40|       25|  9|      10|1:20.288|       80288|2025-11-14 10:27:...|
|     40|       25| 10|      10|1:19.945|       79945|2025-11-14 10:27:...|
|     40|       25| 11|      10|1:20.325|       80325|2025-11-14 10:27:...|
|     40|       25| 12|      10|1:19.434|       79434|2025-11-14 10:27:...|
|     40|   