## This notebook ingest the raw "drivers" source file from the volume to bronze layer

In [1]:
# Define parameters (can set parameters in a workflow job)
target_type   =oidlUtils.parameters.getParameter("TARGET_TYPE", "table")
target_format =oidlUtils.parameters.getParameter("TARGET_FORMAT", "delta")
bronze_folder_path    = "/Volumes/f1_bronze/bronze/f1_bronze_volume"
bronze_catalog    = "f1_bronze"
bronze_schema     = "bronze"
bronze_table_dlt = "f1_drivers_dlt"
bronze_table_par = "f1_drivers_par"

# ----------
##### Step 1 - Read the CSV file from Volume
#  ----------

In [1]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType

In [1]:
drivers_schema = StructType(fields=[StructField("driverId", IntegerType(), False),
                                    StructField("driverRef", StringType(), True),
                                    StructField("number", IntegerType(), True),
                                    StructField("code", StringType(), True),
                                    StructField("forename", StringType(), True),
                                     StructField("surname", StringType(), True),
                                    StructField("dob", DateType(), True),
                                    StructField("nationality", StringType(), True),
                                    StructField("url", StringType(), True)  
])

In [1]:
drivers_df = spark.read \
.option("header", True) \
.schema(drivers_schema) \
.csv(f"{bronze_folder_path}/drivers.csv")

# ----------
##### Step 2 - Write to output to storage in selected format
# ----------

In [None]:
if target_type == 'file':
    if target_format == 'parquet':
        drivers_df.write.mode("overwrite").parquet(f"{bronze_folder_path}/drivers")
elif target_type == 'table':
    if target_format == 'parquet':
        drivers_df.write.mode("overwrite").format("parquet").saveAsTable(f"{bronze_catalog}.{bronze_schema}.{bronze_table_par}")
    elif  target_format == 'delta':
        drivers_df.write.mode("overwrite").format("delta").saveAsTable(f"{bronze_catalog}.{bronze_schema}.{bronze_table_dlt}")

In [None]:
if target_type == 'file':
    if target_format == 'parquet':
        drivers_read_df = spark.read.parquet(f"{bronze_folder_path}/drivers")
elif target_type == 'table':
    if target_format == 'parquet':
        drivers_read_df = spark.read.table(f"{bronze_catalog}.{bronze_schema}.{bronze_table_par}")
    elif  target_format == 'delta':
        drivers_read_df = spark.read.table(f"{bronze_catalog}.{bronze_schema}.{bronze_table_dlt}")