## Ingest qualifying.json file
Step 1- Read the json file using the spark dataframe reader API

In [0]:
dbutils.widgets.text("p_data_source","")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date","2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
from pyspark.sql.types import StructType, StringType, IntegerType, DoubleType, StructField, TimestampType, DateType, FloatType
from pyspark.sql.functions import col, struct,current_timestamp , concat, lit

In [0]:
qualifying_schema = StructType(fields= [StructField("qualifyId", IntegerType(), False),
                                         StructField("raceId", IntegerType(), True), 
                                         StructField("driverId", IntegerType(), True), 
                                         StructField("constructorId", IntegerType(), True), 
                                         StructField("number", IntegerType(), True), 
                                         StructField("position", IntegerType(), True), 
                                         StructField("q1", StringType(), True), 
                                         StructField("q2", StringType(), True), 
                                         StructField("q3", StringType(), True)])  

In [0]:
# Load the JSON file
qualifying_df = spark.read\
    .schema(qualifying_schema)\
        .option("multiLine", True)\
    .json(f"{raw_folder_path}/{v_file_date}/qualifying")

In [0]:
display(qualifying_df)

#### Step 2 - Rename cloumns and add new columns

In [0]:
qualifying_final_df = qualifying_df.withColumnRenamed("qualifyId", "qualify_id")\
  .withColumnRenamed("driverId", "driver_id")\
  .withColumnRenamed("raceId", "race_id")\
  .withColumnRenamed("constructorId", "constructor_id")\
  .withColumn("data_source", lit(v_data_source))
                                           

In [0]:
qualifying_final_df = add_ingestion_date(qualifying_final_df)

In [0]:
display(qualifying_final_df)


#### Step 3 - Write to output to processed container in parquet format

In [0]:
#qualifying_final_df.write.mode("overwrite").parquet(f"{processed_folder_path}/qualifying")
#qualifying_final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_processed.qualifying")

In [0]:
#overwrite_partition(qualifying_final_df, "f1_processed", "qualifying", "race_id")

In [0]:
merge_condition ="tgt.qualify_id =src.qualify_id  AND tgt.race_id = src.race_id"
merge_delta_data (qualifying_final_df,"f1_processed", "qualifying",processed_folder_path,merge_condition,"race_id")

In [0]:
%fs
ls /mnt/formula1dl2025practice/processed/qualifying

In [0]:
#display(spark.read.parquet("/mnt/formula1dl2025practice/processed/qualifying"))

In [0]:
dbutils.notebook.exit("Success")