This notebook improves the lap data by addressing issues in the position plot. When a driver crashed, they didn't drop to the last places because the lap data only included fully finished laps. To fix this, we added another lap with classified positions to correctly reflect the driver's position in the plot.

Additionally, the first lap's position was inaccurate, showing the position at the end of the lap rather than the grid position. We replaced the first lap position value with the grid position.

In [None]:
import polars as pl
import pandas as pd

In [2]:
lap_data = pl.scan_parquet("/Users/max/Library/Mobile Documents/com~apple~CloudDocs/100 Hochschule/110 TU Wien/SoSe 24/InfoVis/InfoVis2024/E3/static/data/all_laps.parquet")
driver_data = pl.scan_parquet("/Users/max/Library/Mobile Documents/com~apple~CloudDocs/100 Hochschule/110 TU Wien/SoSe 24/InfoVis/InfoVis2024/E3/static/data/all_driver_data.parquet")

In [3]:
joined_data = lap_data.join(driver_data, left_on=["round_number","year","DriverNumber"], right_on=["round_number","year","DriverNumber"])

In [4]:
joined_data = joined_data.collect()

In [5]:
joined_data = joined_data.with_columns(
    pl.when(pl.col("LapNumber") == 1).then(pl.col("GridPosition")).otherwise(pl.col("Position")).alias("Position")
)


In [6]:
joined_data = joined_data.rename({"Position_right":"classified_pos"})

In [8]:
retired_drivers = joined_data.filter(pl.col("Status") != "Finished", ~ (pl.col("Status").str.contains("Lap")))

last_row_retired_drivers = retired_drivers.group_by(["round_number","year","DriverNumber"]).last()
row_to_add = last_row_retired_drivers
row_to_add = row_to_add.with_columns(pl.col("LapNumber")+1, pl.col("classified_pos").alias("Position"))

In [9]:
joined_data = joined_data.select(sorted(joined_data.columns))
row_to_add = row_to_add.select(sorted(row_to_add.columns))

In [11]:
lap_data_with_retirements = pl.concat([joined_data,row_to_add]).sort(by=["year", "round_number","Driver","LapNumber"])

In [12]:
lap_data_with_retirements = lap_data_with_retirements.with_columns(pl.when(pl.col("Position").is_null()).then(pl.col("classified_pos")).otherwise(pl.col("Position")).alias("Position"))
lap_data_with_retirements = lap_data_with_retirements.select(lap_data.columns)

In [13]:
lap_data_with_retirements.write_parquet("/Users/max/Library/Mobile Documents/com~apple~CloudDocs/100 Hochschule/110 TU Wien/SoSe 24/InfoVis/InfoVis2024/E3/static/data/all_laps.parquet")