### Ingest constructors.json file

In [0]:
# Create a text widget for data source input
dbutils.widgets.text("p_data_source", "")

# Retrieve the value of the data source widget
v_data_source = dbutils.widgets.get("p_data_source")

In [0]:
# Create a text widget for file date input with a default value
dbutils.widgets.text("p_file_date", "2021-03-21")

# Retrieve the value of the file date widget
v_file_date = dbutils.widgets.get("p_file_date")

In [0]:
# Running the configuration notebook to load necessary configurations and settings
%run "../includes/configuration"

In [0]:
# Run the common functions notebook to make its functions available in the current notebook
%run "../includes/common_functions"

##### Step 1 - Read the JSON file using the spark dataframe reader

In [0]:
# Define the schema for the constructors JSON file
constructors_schema = "constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING"

In [0]:
# Read the constructors JSON file into a DataFrame using the predefined schema
constructor_df = spark.read \
    .schema(constructors_schema) \
    .json(f"{raw_folder_path}/{v_file_date}/constructors.json")

##### Step 2 - Drop unwanted columns from the dataframe

In [0]:
# Import the col function from pyspark.sql.functions to reference DataFrame columns
from pyspark.sql.functions import col

In [0]:
# Drop the 'url' column from the constructor_df DataFrame
constructor_dropped_df = constructor_df.drop(col('url'))

##### Step 3 - Rename columns and add ingestion date

In [0]:
# Import the lit function from pyspark.sql.functions to add constant values to DataFrame columns
from pyspark.sql.functions import lit

In [0]:
# Rename columns and add new columns with constant values
constructor_renamed_df = constructor_dropped_df.withColumnRenamed("constructorId", "constructor_id") \
                                             .withColumnRenamed("constructorRef", "constructor_ref") \
                                             .withColumn("data_source", lit(v_data_source)) \
                                             .withColumn("file_date", lit(v_file_date))

In [0]:
# Add the current ingestion date to the DataFrame
constructor_final_df = add_ingestion_date(constructor_renamed_df)

##### Step 4 Write output to parquet file

In [0]:
# Write the final DataFrame to a Delta table in overwrite mode
constructor_final_df.write.mode("overwrite").format("delta").saveAsTable("f1_processed.constructors")

In [0]:
%sql
-- Select all columns from the constructors table in the f1_processed schema
SELECT * FROM f1_processed.constructors;

In [0]:
# Exit the notebook with a success message
dbutils.notebook.exit("Success")