### Ingest results.json file

##### Step 1 - Read the JSON file using the spark dataframe reader API

In [None]:
from pandas import read_csv,read_json
from lib import configuration
from lib import common_functions

In [None]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType

In [None]:
spark = common_functions.get_spark_session()

In [None]:
results_schema = StructType(fields=[StructField("resultId", IntegerType(), False),
                                    StructField("raceId", IntegerType(), True),
                                    StructField("driverId", IntegerType(), True),
                                    StructField("constructorId", IntegerType(), True),
                                    StructField("number", IntegerType(), True),
                                    StructField("grid", IntegerType(), True),
                                    StructField("position", IntegerType(), True),
                                    StructField("positionText", StringType(), True),
                                    StructField("positionOrder", IntegerType(), True),
                                    StructField("points", FloatType(), True),
                                    StructField("laps", IntegerType(), True),
                                    StructField("time", StringType(), True),
                                    StructField("milliseconds", IntegerType(), True),
                                    StructField("fastestLap", IntegerType(), True),
                                    StructField("rank", IntegerType(), True),
                                    StructField("fastestLapTime", StringType(), True),
                                    StructField("fastestLapSpeed", FloatType(), True),
                                    StructField("statusId", StringType(), True)])

In [None]:
results_df = spark.read \
.schema(results_schema) \
.json(f"{configuration.bronze_folder_path}/results.json")

results_df = read_json(f'{configuration.bronze_folder_path}/results.json', lines=True)
results_df

##### Step 2 - Rename columns and add new columns

In [None]:
results_df = results_df.drop(columns='statusId')
results_df["ingestion_date"] = common_functions.get_ingestion_date()
results_df["data_source"] = configuration.v_data_source
results_df

In [None]:
results_final_df = results_df.rename(columns={"resultId":"result_id","raceId":"race_id","driverId":"driver_id","constructorId":"constructor_id","positionText":"position_text","positionOrder":"position_order","fastestLap":"fastest_lap","fastestLapTime":"fastest_lap_time","fastestLapSpeed":"fastest_lap_speed"})
results_final_df

##### Step 4 - Write to output to processed container in parquet format

In [None]:
results_final_df.to_csv(f"{configuration.silver_folder_path}/results.csv", index=False)

In [None]:
df_parquet = read_csv(f'{configuration.silver_folder_path}/results.csv')
df_parquet