## Produce driver ranking summary

In [None]:
# Define parameters (can set parameters in a workflow job)
target_type   =oidlUtils.parameters.getParameter("TARGET_TYPE", "table")
target_format =oidlUtils.parameters.getParameter("TARGET_FORMAT", "delta")
silver_catalog    = "f1_silver"
gold_catalog    = "f1_gold"
adw_catalog = "f1_gold_adw"
silver_schema     = "silver"
gold_schema     = "gold"
adw_schema =      "f1_gold"
gold_table_dlt = "f1_team_ranking_dlt"
gold_table_par = "f1_team_ranking_par"


In [None]:
# Step 1: Read managed silver tables
results_df = spark.table(f"{silver_catalog}.{silver_schema}.f1_results_dlt")
races_df = spark.table(f"{silver_catalog}.{silver_schema}.f1_races_dlt")

In [None]:


from pyspark.sql import functions as F
from pyspark.sql.window import Window

# Join to bring in race_year but alias it to 'year'
results_with_year_df = (
    results_df.alias("res")
    .join(
        races_df.select(
            F.col("race_id"),
            F.col("race_year").alias("year")   # <-- alias here
        ).alias("ra"),
        on="race_id",
        how="inner"
    )
)

# Filter for valid years
filtered_results_df = results_with_year_df.filter(F.col("year") >= 1950)

# Add team_name placeholder (until you join constructors for actual name)
team_results_df = filtered_results_df.withColumn(
    "team_name", F.col("constructor_id").cast("string")
)

# Mark winners
team_results_with_wins_df = team_results_df.withColumn(
    "is_winner", F.when(F.col("position_order") == 1, 1).otherwise(0)
)

# Year-level aggregation per team
yearly_team_results_df = (
    team_results_with_wins_df
    .groupBy("year", "constructor_id", "team_name")
    .agg(
        F.countDistinct("race_id").alias("total_races"),
        F.sum("is_winner").alias("total_wins"),
        F.sum(F.col("points").cast("double")).alias("total_points"),
        F.countDistinct("driver_id").alias("unique_drivers")
    )
)

# Rank by wins then points within each year
ranking_window = Window.partitionBy("year").orderBy(
    F.desc("total_wins"), F.desc("total_points")
)

yearly_team_ranking_df = (
    yearly_team_results_df
    .withColumn("rank", F.rank().over(ranking_window))
    .withColumn("update_ts", F.current_timestamp())
    .withColumnRenamed("constructor_id", "team_id")
)


In [None]:
yearly_team_ranking_df.show()

In [None]:
if target_type == 'file':
    if target_format == 'parquet':
        yearly_team_ranking_df.write.mode("overwrite").parquet(f"{gold_folder_path}/team_ranking")
elif target_type == 'table':
    if target_format == 'parquet':
        yearly_team_ranking_df.write.mode("overwrite").format("parquet").saveAsTable(f"{gold_catalog}.{gold_schema}.f1_team_ranking_dlt")
    elif  target_format == 'delta':
        yearly_team_ranking_df.write.mode("overwrite").partitionBy("year").format("delta").saveAsTable(f"{gold_catalog}.{gold_schema}.f1_team_ranking_dlt")
    elif target_format == 'adw':
        yearly_team_ranking_df.write.insertInto(f"{adw_catalog}.{adw_schema}.f1_team_ranking")