## Ingest qualifying.json file
Step 1- Read the json file using the spark dataframe reader API

In [0]:
dbutils.widgets.text("p_data_source","")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
dbutils.widgets.text("p_file_date","2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
from pyspark.sql.types import StructType, StringType, IntegerType, DoubleType, StructField, TimestampType, DateType, FloatType
from pyspark.sql.functions import col, struct,current_timestamp , concat, lit

In [0]:
qualifying_schema = StructType(fields= [StructField("qualifyId", IntegerType(), False),
                                         StructField("raceId", IntegerType(), True), 
                                         StructField("driverId", IntegerType(), True), 
                                         StructField("constructorId", IntegerType(), True), 
                                         StructField("number", IntegerType(), True), 
                                         StructField("position", IntegerType(), True), 
                                         StructField("q1", StringType(), True), 
                                         StructField("q2", StringType(), True), 
                                         StructField("q3", StringType(), True)])  

In [0]:
# Load the JSON file
qualifying_df = spark.read\
    .schema(qualifying_schema)\
        .option("multiLine", True)\
    .json(f"{raw_folder_path}/{v_file_date}/qualifying")

In [0]:
display(qualifying_df)

qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
8755,1053,1,131,44,1,1:14.823,1:14.817,1:14.411
8756,1053,815,9,11,2,1:15.395,1:14.716,1:14.446
8757,1053,830,9,33,3,1:15.109,1:14.884,1:14.498
8758,1053,844,6,16,4,1:15.413,1:14.808,1:14.740
8759,1053,842,213,10,5,1:15.548,1:14.927,1:14.790
8760,1053,817,1,3,6,1:15.669,1:15.033,1:14.826
8761,1053,846,1,4,7,1:15.009,1:14.718,1:14.875
8762,1053,822,131,77,8,1:14.672,1:14.905,1:14.898
8763,1053,839,214,31,9,1:15.385,1:15.117,1:15.210
8764,1053,840,117,18,10,1:15.522,1:15.138,\N


#### Step 2 - Rename cloumns and add new columns

In [0]:
qualifying_final_df = qualifying_df.withColumnRenamed("qualifyId", "qualify_id")\
  .withColumnRenamed("driverId", "driver_id")\
  .withColumnRenamed("raceId", "race_id")\
  .withColumnRenamed("constructorId", "constructor_id")\
  .withColumn("data_source", lit(v_data_source))
                                           

In [0]:
qualifying_final_df = add_ingestion_date(qualifying_final_df)

In [0]:
display(qualifying_final_df)

qualify_id,race_id,driver_id,constructor_id,number,position,q1,q2,q3,data_source,ingestion_date
8755,1053,1,131,44,1,1:14.823,1:14.817,1:14.411,,2025-04-10T22:39:59.607+0000
8756,1053,815,9,11,2,1:15.395,1:14.716,1:14.446,,2025-04-10T22:39:59.607+0000
8757,1053,830,9,33,3,1:15.109,1:14.884,1:14.498,,2025-04-10T22:39:59.607+0000
8758,1053,844,6,16,4,1:15.413,1:14.808,1:14.740,,2025-04-10T22:39:59.607+0000
8759,1053,842,213,10,5,1:15.548,1:14.927,1:14.790,,2025-04-10T22:39:59.607+0000
8760,1053,817,1,3,6,1:15.669,1:15.033,1:14.826,,2025-04-10T22:39:59.607+0000
8761,1053,846,1,4,7,1:15.009,1:14.718,1:14.875,,2025-04-10T22:39:59.607+0000
8762,1053,822,131,77,8,1:14.672,1:14.905,1:14.898,,2025-04-10T22:39:59.607+0000
8763,1053,839,214,31,9,1:15.385,1:15.117,1:15.210,,2025-04-10T22:39:59.607+0000
8764,1053,840,117,18,10,1:15.522,1:15.138,\N,,2025-04-10T22:39:59.607+0000



#### Step 3 - Write to output to processed container in parquet format

In [0]:
#qualifying_final_df.write.mode("overwrite").parquet(f"{processed_folder_path}/qualifying")
#qualifying_final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_processed.qualifying")

In [0]:
#overwrite_partition(qualifying_final_df, "f1_processed", "qualifying", "race_id")

In [0]:
merge_condition ="tgt.qualify_id =src.qualify_id  AND tgt.race_id = src.race_id"
merge_delta_data (qualifying_final_df,"f1_processed", "qualifying",processed_folder_path,merge_condition,"race_id")

In [0]:
%fs
ls /mnt/formula1dl2025practice/processed/qualifying

path,name,size,modificationTime
dbfs:/mnt/formula1dl2025practice/processed/qualifying/_delta_log/,_delta_log/,0,1744324727000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1/,race_id=1/,0,1744324728000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=10/,race_id=10/,0,1744324729000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=100/,race_id=100/,0,1744324737000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1000/,race_id=1000/,0,1744324741000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1001/,race_id=1001/,0,1744324741000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1002/,race_id=1002/,0,1744324741000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1003/,race_id=1003/,0,1744324741000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1004/,race_id=1004/,0,1744324741000
dbfs:/mnt/formula1dl2025practice/processed/qualifying/race_id=1005/,race_id=1005/,0,1744324741000


In [0]:
#display(spark.read.parquet("/mnt/formula1dl2025practice/processed/qualifying"))

In [0]:
dbutils.notebook.exit("Success")