In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType


results_schema = StructType(fields=[
    StructField("resultId", IntegerType(), False),
    StructField("raceId", IntegerType(), False),
    StructField("driverId", IntegerType(), False),
    StructField("constructorId", IntegerType(), False),
    StructField("number", IntegerType(), True),
    StructField("grid", IntegerType(), False),
    StructField("position", IntegerType(), True),
    StructField("positionText", StringType(), False),
    StructField("positionOrder", IntegerType(), False),
    StructField("points", FloatType(), False),
    StructField("laps", IntegerType(), False),
    StructField("time", StringType(), True),
    StructField("milliseconds", IntegerType(), True),
    StructField("fastestLap", IntegerType(), True),
    StructField("rank", IntegerType(), True),
    StructField("fastestLapTime", StringType(), True),
    StructField("fastestLapSpeed", StringType(), True),
    StructField("statusId", StringType(), False)])

results_df = spark.read \
    .schema(results_schema) \
    .json("abfss://bronze@mjuanworkshopetl.dfs.core.windows.net/results.json")

results_df.display()

In [0]:
results_df.printSchema()

In [0]:
from pyspark.sql.functions import col, lit, current_timestamp

results_transf_df = results_df.withColumnRenamed('resultId', 'result_id') \
    .withColumnRenamed('raceId', 'race_id') \
    .withColumnRenamed('driverId', 'driver_id') \
    .withColumnRenamed('constructorId', 'constructor_id') \
    .withColumnRenamed('positionText', 'position_text') \
    .withColumnRenamed('positionOrder', 'position_order') \
    .withColumnRenamed('fastestLap', 'fastest_lap') \
    .withColumnRenamed('fastestLapTime', 'fastest_lap_time') \
    .withColumnRenamed('fastestLapSpeed', 'fastest_lap_speed') \
    .withColumn('ingestion_date', current_timestamp())\
    .drop('statusId')

results_transf_df.display()

In [0]:
results_transf_df.write.mode('overwrite').option("overwriteSchema", "true").partitionBy('race_id').saveAsTable('formula1_project.silver.results_transformed')