###Ingest drivers.json file

In [0]:
dbutils.widgets.text('p_data_source','')
v_data_source = dbutils.widgets.get('p_data_source')

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

#####Step 1 - Read the JSON file using the spark dataframe reader API

In [0]:
from pyspark.sql.types import StructType,StructField,StringType,DateType,IntegerType

In [0]:
name_schema = StructType(fields = [StructField('forename',StringType(),True),
                                   StructField('surname',StringType(),True)])

In [0]:
drivers_schema = StructType(fields = [StructField('driverId',IntegerType(),False),
                                      StructField('driverRef',StringType(),True),
                                      StructField('number',IntegerType(),True),
                                      StructField('code',StringType(),True),
                                      StructField('name',name_schema),
                                      StructField('dob',DateType(),True),
                                      StructField('nationality',StringType(),True),
                                      StructField('url',StringType(),True)])

In [0]:
drivers_df = spark.read.json(f"{raw_folder_path}/drivers.json",schema = drivers_schema)

#####Step 2 - Rename columns and add new columns

In [0]:
from pyspark.sql.functions import current_timestamp,col,lit,concat

In [0]:
drivers_with_columns_df = drivers_df.withColumnRenamed('driverId','driver_id') \
    .withColumnRenamed('driverRef','driver_ref') \
    .withColumn('ingestion_date',current_timestamp()) \
    .withColumn('name',concat(col('name.forename'),lit(' '),col('name.surname'))) \
    .withColumn('data_source',lit(v_data_source))

#####Step 3 - Drop the unwanted columns

In [0]:
drivers_final_df = drivers_with_columns_df.drop(col('url'))

#####Step 4 - Write the output to processed container in parquet format

In [0]:
drivers_final_df.write.mode('overwrite').parquet(f"{processed_folder_path}/drivers")

In [0]:
dbutils.notebook.exit('Success')