In [0]:
%run "../includes/configurations"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_data_source", "")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
from pyspark.sql.types import IntegerType, StringType, StructType, StructField

In [0]:
lap_times_schema = StructType(fields=[StructField("raceId", IntegerType(), True),
                                     StructField("driverId", IntegerType(), True),
                                     StructField("lap", IntegerType(), True),
                                     StructField("position", IntegerType(), True),
                                     StructField("time", StringType(), True),
                                     StructField("milliseconds", IntegerType(), True)
                                     ])

In [0]:
lap_times_df = spark.read.schema(lap_times_schema).csv(f"{raw_folder_path}/lap_times")

In [0]:
lap_times_df.columns

In [0]:
display(lap_times_df)

raceId,driverId,lap,position,time,milliseconds
841,20,1,1,1:38.109,98109
841,20,2,1,1:33.006,93006
841,20,3,1,1:32.713,92713
841,20,4,1,1:32.803,92803
841,20,5,1,1:32.342,92342
841,20,6,1,1:32.605,92605
841,20,7,1,1:32.502,92502
841,20,8,1,1:32.537,92537
841,20,9,1,1:33.240,93240
841,20,10,1,1:32.572,92572


In [0]:
lap_times_df.count()

#### Rename and add required columns

In [0]:
from pyspark.sql.functions import current_timestamp, lit

In [0]:
final_df = lap_times_df.withColumnRenamed("driverId", "driver_id")\
.withColumnRenamed("raceId","race_id")\
.withColumn("data_source", lit(v_data_source))\
.withColumn("ingestion_date", current_timestamp())

#### Write data to processed containers in parquet format

In [0]:
final_df.write.mode("overwrite").parquet(f"{processed_folder_path}/lap_times")

In [0]:
%fs
ls /mnt/formula1shashankdl/processed/lap_times

path,name,size
dbfs:/mnt/formula1shashankdl/processed/lap_times/_committed_5906468704622910035,_committed_5906468704622910035,1023
dbfs:/mnt/formula1shashankdl/processed/lap_times/_committed_7511489675748955753,_committed_7511489675748955753,1029
dbfs:/mnt/formula1shashankdl/processed/lap_times/_committed_8444940302728589396,_committed_8444940302728589396,1023
dbfs:/mnt/formula1shashankdl/processed/lap_times/_started_5906468704622910035,_started_5906468704622910035,0
dbfs:/mnt/formula1shashankdl/processed/lap_times/_started_8444940302728589396,_started_8444940302728589396,0
dbfs:/mnt/formula1shashankdl/processed/lap_times/part-00000-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-153-1-c000.snappy.parquet,part-00000-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-153-1-c000.snappy.parquet,850491
dbfs:/mnt/formula1shashankdl/processed/lap_times/part-00001-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-154-1-c000.snappy.parquet,part-00001-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-154-1-c000.snappy.parquet,827887
dbfs:/mnt/formula1shashankdl/processed/lap_times/part-00002-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-155-1-c000.snappy.parquet,part-00002-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-155-1-c000.snappy.parquet,862807
dbfs:/mnt/formula1shashankdl/processed/lap_times/part-00003-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-156-1-c000.snappy.parquet,part-00003-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-156-1-c000.snappy.parquet,780397
dbfs:/mnt/formula1shashankdl/processed/lap_times/part-00004-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-157-1-c000.snappy.parquet,part-00004-tid-5906468704622910035-ba05e80d-3345-4b8f-adf0-ad944f11c277-157-1-c000.snappy.parquet,806852
