In [0]:
%python
from pyspark.sql.functions import (
    col, concat_ws, trim
)


races_bronze               = spark.table("f1.f1_bronze_races")
seasons_bronze             = spark.table("f1.f1_bronze_seasons")
status_bronze              = spark.table("f1.f1_bronze_status")

circuits_bronze            = spark.table("f1.f1_bronze_circuits")
constructors_bronze        = spark.table("f1.f1_bronze_constructors")
constructor_results_bronze = spark.table("f1.f1_bronze_constructor_results")
constructor_standings_bronze = spark.table("f1.f1_bronze_constructor_standings")

drivers_bronze             = spark.table("f1.f1_bronze_drivers")
driver_standings_bronze    = spark.table("f1.f1_bronze_driver_standings")

lap_times_bronze           = spark.table("f1.f1_bronze_lap_times")
pit_stops_bronze           = spark.table("f1.f1_bronze_pit_stops")
qualifying_bronze          = spark.table("f1.f1_bronze_qualifying")

results_bronze             = spark.table("f1.f1_bronze_results")
sprint_results_bronze      = spark.table("f1.f1_bronze_sprint_results")

In [0]:
%python
drivers_clean = (
    drivers_bronze
    .withColumn(
        "driverName",
        trim(concat_ws(" ", col("forename"), col("surname")))
    )
    .dropDuplicates(["driverId"])
)

constructors_clean = (
    constructors_bronze
    .withColumn("constructorName", trim(col("name")))
    .dropDuplicates(["constructorId"])
)

circuits_clean = circuits_bronze.dropDuplicates(["circuitId"])
races_clean    = races_bronze.dropDuplicates(["raceId"])
seasons_clean  = seasons_bronze.dropDuplicates(["year"])
status_clean   = status_bronze.dropDuplicates(["statusId"])

In [0]:
%python
race_results_silver = (
    results_bronze.alias("res")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(drivers_clean.alias("d"), "driverId", "left")
    .join(constructors_clean.alias("c"), "constructorId", "left")
    .join(circuits_clean.alias("ci"), "circuitId", "left")
    .join(status_clean.alias("s"), "statusId", "left")
    .select(
        col("res.resultId"),
        col("res.raceId"),
        col("r.year").alias("season"),
        col("r.round"),
        col("r.name").alias("raceName"),
        col("ci.circuitId"),
        col("ci.name").alias("circuitName"),
        col("ci.location"),
        col("ci.country"),
        col("d.driverId"),
        col("d.driverName"),
        col("d.nationality").alias("driverNationality"),
        col("c.constructorId"),
        col("c.constructorName").alias("teamName"),
        col("c.nationality").alias("teamNationality"),
        col("res.number"),
        col("res.grid"),
        col("res.position"),
        col("res.positionText"),
        col("res.positionOrder"),
        col("res.points"),
        col("res.laps"),
        col("res.time").alias("finishTimeStr"),
        col("res.milliseconds").alias("finishTimeMs"),
        col("res.fastestLap"),
        col("res.rank"),
        col("res.fastestLapTime"),
        col("res.fastestLapSpeed"),
        col("s.status").alias("statusDescription")
    )
)

(
    race_results_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season")
    .saveAsTable("f1.f1_silver_race_results")
)

In [0]:
%python
sprint_results_silver = (
    sprint_results_bronze.alias("res")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(drivers_clean.alias("d"), "driverId", "left")
    .join(constructors_clean.alias("c"), "constructorId", "left")
    .join(circuits_clean.alias("ci"), "circuitId", "left")
    .join(status_clean.alias("s"), "statusId", "left")
    .select(
        col("res.resultId"),
        col("res.raceId"),
        col("r.year").alias("season"),
        col("r.round"),
        col("r.name").alias("raceName"),
        col("ci.circuitId"),
        col("ci.name").alias("circuitName"),
        col("ci.location"),
        col("ci.country"),
        col("d.driverId"),
        col("d.driverName"),
        col("d.nationality").alias("driverNationality"),
        col("c.constructorId"),
        col("c.constructorName").alias("teamName"),
        col("c.nationality").alias("teamNationality"),
        col("res.number"),
        col("res.grid"),
        col("res.position"),
        col("res.positionText"),
        col("res.positionOrder"),
        col("res.points"),
        col("res.laps"),
        col("res.time").alias("finishTimeStr"),
        col("res.milliseconds").alias("finishTimeMs"),
        col("res.fastestLap"),
        col("res.fastestLapTime"),
        col("s.status").alias("statusDescription")
    )
)

(
    sprint_results_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season")
    .saveAsTable("f1.f1_silver_sprint_results")
)

In [0]:
%python
lap_times_silver = (
    lap_times_bronze.alias("lt")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(drivers_clean.alias("d"), "driverId", "left")
    .select(
        col("lt.raceId"),
        col("r.year").alias("season"),
        col("lt.driverId"),
        col("d.driverName"),
        col("lt.lap"),
        col("lt.position").alias("lapPosition"),
        col("lt.time").alias("lapTimeStr"),
        col("lt.milliseconds").alias("lapTimeMs")
    )
)

(
    lap_times_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season", "raceId")
    .saveAsTable("f1.f1_silver_lap_times")
)

In [0]:
%python
pit_stops_silver = (
    pit_stops_bronze.alias("ps")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(drivers_clean.alias("d"), "driverId", "left")
    .select(
        col("ps.raceId"),
        col("r.year").alias("season"),
        col("ps.driverId"),
        col("d.driverName"),
        col("ps.stop"),
        col("ps.lap"),
        col("ps.time").alias("pitTimeStr"),
        col("ps.duration"),
        col("ps.milliseconds").alias("pitTimeMs")
    )
)

(
    pit_stops_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season", "raceId")
    .saveAsTable("f1.f1_silver_pit_stops")
)


In [0]:
%python
qualifying_silver = (
    qualifying_bronze.alias("q")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(drivers_clean.alias("d"), "driverId", "left")
    .join(constructors_clean.alias("c"), "constructorId", "left")
    .select(
        col("q.qualifyId"),
        col("q.raceId"),
        col("r.year").alias("season"),
        col("r.round"),
        col("r.name").alias("raceName"),
        col("d.driverId"),
        col("d.driverName"),
        col("c.constructorId"),
        col("c.constructorName").alias("teamName"),
        col("q.number"),
        col("q.position"),
        col("q.q1"),
        col("q.q2"),
        col("q.q3")
    )
)

(
    qualifying_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season")
    .saveAsTable("f1.f1_silver_qualifying")
)

In [0]:
%python
driver_standings_silver = (
    driver_standings_bronze.alias("ds")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(drivers_clean.alias("d"), "driverId", "left")
    .select(
        col("ds.driverStandingsId"),
        col("ds.raceId"),
        col("r.year").alias("season"),
        col("r.round"),
        col("r.name").alias("raceName"),
        col("d.driverId"),
        col("d.driverName"),
        col("ds.points"),
        col("ds.position"),
        col("ds.positionText"),
        col("ds.wins")
    )
)

(
    driver_standings_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season")
    .saveAsTable("f1.f1_silver_driver_standings")
)

In [0]:
%python
constructor_standings_silver = (
    constructor_standings_bronze.alias("cs")
    .join(races_clean.alias("r"), "raceId", "left")
    .join(constructors_clean.alias("c"), "constructorId", "left")
    .select(
        col("cs.constructorStandingsId"),
        col("cs.raceId"),
        col("r.year").alias("season"),
        col("r.round"),
        col("r.name").alias("raceName"),
        col("c.constructorId"),
        col("c.constructorName").alias("teamName"),
        col("cs.points"),
        col("cs.position"),
        col("cs.positionText"),
        col("cs.wins")
    )
)

(
    constructor_standings_silver
    .write
    .mode("overwrite")
    .format("delta")
    .partitionBy("season")
    .saveAsTable("f1.f1_silver_constructor_standings")
)


In [0]:
%python
(
    seasons_clean
    .write
    .mode("overwrite")
    .format("delta")
    .saveAsTable("f1.f1_silver_seasons")
)