#### Ingest constructors.json file

In [0]:
dbutils.widgets.text("p_data_source","")
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
# for incremental process of data setup
dbutils.widgets.text("p_file_date","2021-03-21")
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
v_data_source

'testing'

In [0]:
%run "../../Includes/Configuration"

In [0]:
%run "../../Includes/Common Functions"

##### Step 1 - Read the JSON file using the spark dataframe reader

In [0]:
constructors_schema = "constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING"

In [0]:
constructor_df = spark.read \
    .schema(constructors_schema) \
    .json(f"{raw_folder_path}/{v_file_date}/constructors.json")

In [0]:
constructor_df.printSchema()

root
 |-- constructorId: integer (nullable = true)
 |-- constructorRef: string (nullable = true)
 |-- name: string (nullable = true)
 |-- nationality: string (nullable = true)
 |-- url: string (nullable = true)



##### Step 2 - Drop unwanted columns from the dataframe

In [0]:
from pyspark.sql.functions import col

In [0]:
constructor_dropped_df = constructor_df.drop(col("url"))

##### Step 3 - Rename columns and add ingestion date

In [0]:
from pyspark.sql.functions import current_timestamp,lit

In [0]:
constructor_final_df = constructor_dropped_df.withColumnRenamed("constructorId", "constructor_Id")\
    .withColumnRenamed("constructorRef", "constructor_Ref")\
    .withColumn("ingestion_date", current_timestamp()) \
    .withColumn("data_source", lit(v_data_source)) \
    .withColumn("file_date", lit(v_file_date))     

##### Step 4 - Write it back to ADLSg2

In [0]:
constructor_final_df.write.mode("overwrite").format("delta").saveAsTable("f1_processed.constructors")

In [0]:
dbutils.notebook.exit("Success")