### Ingest constructors.json file

##### Step 1 - Read the JSON file using the spark dataframe reader

In [0]:
constructors_schema = "constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING"

In [0]:
constructor_df = spark.read \
.schema(constructors_schema) \
.json("dbfs:/mnt/formula1projectdl/raw/constructors.json")

##### Step 2 - Drop unwanted columns from the dataframe

In [0]:
from pyspark.sql.functions import col

In [0]:
constructor_dropped_df = constructor_df.drop(col("url"))

##### Step 3 - Rename columns and add ingestion date

In [0]:
from pyspark.sql.functions import current_timestamp

In [0]:
constructor_final_df = constructor_dropped_df.withColumnRenamed("constructorId","constructor_id") \
                                             .withColumnRenamed("constructorRef","constructor_ref") \
                                             .withColumn("ingestion_date",current_timestamp())

In [0]:
display(constructor_final_df)

constructor_id,constructor_ref,name,nationality,ingestion_date
1,mclaren,McLaren,British,2024-12-17T12:34:47.078+0000
2,bmw_sauber,BMW Sauber,German,2024-12-17T12:34:47.078+0000
3,williams,Williams,British,2024-12-17T12:34:47.078+0000
4,renault,Renault,French,2024-12-17T12:34:47.078+0000
5,toro_rosso,Toro Rosso,Italian,2024-12-17T12:34:47.078+0000
6,ferrari,Ferrari,Italian,2024-12-17T12:34:47.078+0000
7,toyota,Toyota,Japanese,2024-12-17T12:34:47.078+0000
8,super_aguri,Super Aguri,Japanese,2024-12-17T12:34:47.078+0000
9,red_bull,Red Bull,Austrian,2024-12-17T12:34:47.078+0000
10,force_india,Force India,Indian,2024-12-17T12:34:47.078+0000


##### Step 4 Write output to parquet file

In [0]:
constructor_final_df.write.mode("overwrite").parquet("/mnt/formula1projectdl/processed/constructors")