# Formula 1 DLT Silver Layer
This notebook creates all silver layer DLT tables, transforming data from bronze.

In [None]:
import dlt
from pyspark.sql.functions import col, concat, lit, to_timestamp, current_timestamp

## Circuits

In [None]:
@dlt.table(
    comment="Silver layer: cleaned circuits data with renamed columns"
)
def silver_circuits():
    df = dlt.read("bronze_circuits")
    return df.select(
        col("circuitId").alias("circuit_id"),
        col("circuitRef").alias("circuit_ref"),
        col("name"),
        col("location"),
        col("country"),
        col("lat").alias("latitude"),
        col("lng").alias("longitude"),
        col("alt").alias("altitude"),
    ).withColumn("ingestion_date", current_timestamp())

## Races

In [None]:
@dlt.table(
    comment="Silver layer: cleaned races data with race_timestamp"
)
def silver_races():
    df = dlt.read("bronze_races")
    df_transformed = df.withColumn(
        "race_timestamp",
        to_timestamp(concat(col("date"), lit(" "), col("time")), "yyyy-MM-dd HH:mm:ss")
    )
    return df_transformed.select(
        col("raceId").alias("race_id"),
        col("year").alias("race_year"),
        col("round"),
        col("circuitId").alias("circuit_id"),
        col("name"),
        col("race_timestamp"),
    ).withColumn("ingestion_date", current_timestamp())

## Constructors

In [None]:
@dlt.table(
    comment="Silver layer: cleaned constructors data"
)
def silver_constructors():
    df = dlt.read("bronze_constructors")
    return df.select(
        col("constructorId").alias("constructor_id"),
        col("constructorRef").alias("constructor_ref"),
        col("name"),
        col("nationality"),
    ).withColumn("ingestion_date", current_timestamp())

## Drivers

In [None]:
@dlt.table(
    comment="Silver layer: cleaned drivers data with flattened name"
)
def silver_drivers():
    df = dlt.read("bronze_drivers")
    df_transformed = df.withColumn(
        "name", concat(col("name.forename"), lit(" "), col("name.surname"))
    )
    return df_transformed.select(
        col("driverId").alias("driver_id"),
        col("driverRef").alias("driver_ref"),
        col("number"),
        col("code"),
        col("name"),
        col("dob"),
        col("nationality"),
    ).withColumn("ingestion_date", current_timestamp())

## Results

In [None]:
@dlt.table(
    comment="Silver layer: cleaned results data with renamed columns"
)
def silver_results():
    df = dlt.read("bronze_results")
    df_renamed = (
        df.withColumnRenamed("resultId", "result_id")
        .withColumnRenamed("raceId", "race_id")
        .withColumnRenamed("driverId", "driver_id")
        .withColumnRenamed("constructorId", "constructor_id")
        .withColumnRenamed("positionText", "position_text")
        .withColumnRenamed("positionOrder", "position_order")
        .withColumnRenamed("fastestLap", "fastest_lap")
        .withColumnRenamed("fastestLapTime", "fastest_lap_time")
        .withColumnRenamed("fastestLapSpeed", "fastest_lap_speed")
        .drop("statusId")
    )
    # Deduplicate by race_id and driver_id
    return df_renamed.dropDuplicates(["race_id", "driver_id"]).withColumn("ingestion_date", current_timestamp())

## Pit Stops

In [None]:
@dlt.table(
    comment="Silver layer: cleaned pit stops data"
)
def silver_pit_stops():
    df = dlt.read("bronze_pit_stops")
    return (
        df.withColumnRenamed("raceId", "race_id")
        .withColumnRenamed("driverId", "driver_id")
        .withColumn("ingestion_date", current_timestamp())
    )

## Lap Times

In [None]:
@dlt.table(
    comment="Silver layer: cleaned lap times data"
)
def silver_lap_times():
    df = dlt.read("bronze_lap_times")
    return (
        df.withColumnRenamed("raceId", "race_id")
        .withColumnRenamed("driverId", "driver_id")
        .withColumn("ingestion_date", current_timestamp())
    )

## Qualifying

In [None]:
@dlt.table(
    comment="Silver layer: cleaned qualifying data"
)
def silver_qualifying():
    df = dlt.read("bronze_qualifying")
    return (
        df.withColumnRenamed("qualifyId", "qualify_id")
        .withColumnRenamed("raceId", "race_id")
        .withColumnRenamed("driverId", "driver_id")
        .withColumnRenamed("constructorId", "constructor_id")
        .withColumn("ingestion_date", current_timestamp())
    )