#### Step 1 - Ingest the multiline multi line json file

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
#Let's import some packages
from pyspark.sql.types import *
from pyspark.sql.functions import *

pit_stop_schema = StructType(fields = [
    StructField("qualifyId", IntegerType(), False),
    StructField("raceId", IntegerType(), True),
    StructField("driverId", IntegerType(), True),
    StructField("constructorId", IntegerType(), True),
    StructField("number", IntegerType(), True),
    StructField("position", IntegerType(), True),
    StructField("q1", StringType(), True),
    StructField("q2", StringType(), True),
    StructField("q3", StringType(), True),
])

qualifying_df = spark.read \
.schema(pit_stop_schema) \
.option("multiLine", True) \
.json(f"{raw_folder_path}/qualifying/qualifying_split_*.json")

In [0]:
display(qualifying_df)

qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
1,18,1,1,22,1,1:26.572,1:25.187,1:26.714
2,18,9,2,4,2,1:26.103,1:25.315,1:26.869
3,18,5,1,23,3,1:25.664,1:25.452,1:27.079
4,18,13,6,2,4,1:25.994,1:25.691,1:27.178
5,18,2,2,3,5,1:25.960,1:25.518,1:27.236
6,18,15,7,11,6,1:26.427,1:26.101,1:28.527
7,18,3,3,7,7,1:26.295,1:26.059,1:28.687
8,18,14,9,9,8,1:26.381,1:26.063,1:29.041
9,18,10,7,12,9,1:26.919,1:26.164,1:29.593
10,18,20,5,15,10,1:26.702,1:25.842,\N


#### Step 2 - Rename columns and add ingestion_date

In [0]:
final_df = add_ingestion_date(qualifying_df).withColumnRenamed("qualifyId", "qualify_id") \
.withColumnRenamed("driverId", "driver_id") \
.withColumnRenamed("raceId", "race_id") \
.withColumnRenamed("constructorID", "constructor_id")

#### Step 3 - Write data to datalake

In [0]:
final_df.write.mode("overwrite").parquet(f"{processed_folder_path}/qualifying")

In [0]:
final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_processed.qualifyingSQL")

#### Step 6 - Send exit statement for any dbutils.notebook.run cells

In [0]:
dbutils.notebook.exit("Success")

Success