In [1]:
## This notebook ingest the raw "driver standings" from the bronze layer to silver layer
### Ingest f1_driver_standings_dlt

In [1]:
# Define parameters (can set parameters in a workflow job)
target_type   =oidlUtils.parameters.getParameter("TARGET_TYPE", "table")
target_format =oidlUtils.parameters.getParameter("TARGET_FORMAT", "delta")
bronze_catalog    = "f1_bronze"
silver_catalog    = "f1_silver"
bronze_schema     = "bronze"
silver_schema     = "silver"
bronze_table_dlt = "f1_driver_standings_dlt"
bronze_table_par = "f1_driver_standings_par"
silver_table_dlt = "f1_driver_standings_dlt"
silver_table_par = "f1_driver_standings_par"


In [1]:
%sql
select * from f1_bronze.bronze.f1_driver_standings_dlt

# ----------
##### Step 1 - Read Bronze table to dataframe, select rename and cast columns 
# ----------

In [1]:
drivers_df = spark.read.table(f"{bronze_catalog}.{bronze_schema}.{bronze_table_dlt}")

In [1]:
from pyspark.sql.functions import col, floor, format_string, current_timestamp
from pyspark.sql.types import IntegerType, StringType, DoubleType, FloatType

drivers_selected_df = drivers_df \
    .withColumn("race_id", col("RACEID").cast("int")) \
    .withColumn("driver_id", col("DRIVERID").cast("int")) \
    .withColumn("points", col("POINTS").cast("float")) \
    .withColumn("wins", col("WINS").cast("int")) \
    .withColumn("ingestion_date", current_timestamp()) \
    .select("race_id", "driver_id", "points", "wins", "ingestion_date")


In [1]:
drivers_selected_df.show()

+-------+---------+------+----+--------------------+
|race_id|driver_id|points|wins|      ingestion_date|
+-------+---------+------+----+--------------------+
|     18|        1|  10.0|   1|2025-11-14 10:48:...|
|     18|        2|   8.0|   0|2025-11-14 10:48:...|
|     18|        3|   6.0|   0|2025-11-14 10:48:...|
|     18|        4|   5.0|   0|2025-11-14 10:48:...|
|     18|        5|   4.0|   0|2025-11-14 10:48:...|
|     18|        6|   3.0|   0|2025-11-14 10:48:...|
|     18|        7|   2.0|   0|2025-11-14 10:48:...|
|     18|        8|   1.0|   0|2025-11-14 10:48:...|
|     19|        1|  14.0|   1|2025-11-14 10:48:...|
|     19|        2|  11.0|   0|2025-11-14 10:48:...|
|     19|        3|   6.0|   0|2025-11-14 10:48:...|
|     19|        4|   6.0|   0|2025-11-14 10:48:...|
|     19|        5|  10.0|   0|2025-11-14 10:48:...|
|     19|        6|   3.0|   0|2025-11-14 10:48:...|
|     19|        7|   2.0|   0|2025-11-14 10:48:...|
|     19|        8|  11.0|   1|2025-11-14 10:4

# ----------
##### Step 2 - Write the output to processed container in delta/parquet table or parquet file format
# ----------

In [1]:
if target_type == 'file':
    if target_format == 'parquet':
        drivers_selected_df.write.mode("overwrite").parquet(f"{silver_folder_path}/driver_standings")
elif target_type == 'table':
    if target_format == 'parquet':
        drivers_selected_df.write.mode("overwrite").format("parquet").saveAsTable(f"{silver_catalog}.{silver_schema}.{silver_table_par}")
    elif  target_format == 'delta':
        drivers_selected_df.write.mode("overwrite").format("delta").saveAsTable(f"{silver_catalog}.{silver_schema}.{silver_table_dlt}")

In [1]:
# Load the data into the driver_standings_df DataFrame
driver_standings_df = spark.read.table("f1_silver.silver.f1_driver_standings_dlt")

# Display the contents of the driver_standings_df DataFrame
driver_standings_df.show()

+-------+---------+------+----+--------------------+
|race_id|driver_id|points|wins|      ingestion_date|
+-------+---------+------+----+--------------------+
|     18|        1|  10.0|   1|2025-11-14 10:49:...|
|     18|        2|   8.0|   0|2025-11-14 10:49:...|
|     18|        3|   6.0|   0|2025-11-14 10:49:...|
|     18|        4|   5.0|   0|2025-11-14 10:49:...|
|     18|        5|   4.0|   0|2025-11-14 10:49:...|
|     18|        6|   3.0|   0|2025-11-14 10:49:...|
|     18|        7|   2.0|   0|2025-11-14 10:49:...|
|     18|        8|   1.0|   0|2025-11-14 10:49:...|
|     19|        1|  14.0|   1|2025-11-14 10:49:...|
|     19|        2|  11.0|   0|2025-11-14 10:49:...|
|     19|        3|   6.0|   0|2025-11-14 10:49:...|
|     19|        4|   6.0|   0|2025-11-14 10:49:...|
|     19|        5|  10.0|   0|2025-11-14 10:49:...|
|     19|        6|   3.0|   0|2025-11-14 10:49:...|
|     19|        7|   2.0|   0|2025-11-14 10:49:...|
|     19|        8|  11.0|   1|2025-11-14 10:4