In [9]:
import polars as pl


In [4]:
df=pl.read_csv("tournament_heats.csv")
df

Heat,Red,Green,Blue,Yellow,Orange,White
str,str,str,str,str,str,str
"""Heat 1""","""D1""",,"""D11""","""D10""","""D7""","""D5"""
"""Heat 2""","""D14""","""D1""","""D15""","""D18""","""D17""","""D7"""
"""Heat 3""","""D12""","""D17""","""D1""","""D16""",,"""D4"""
"""Heat 4""",,"""D7""","""D16""","""D14""","""D11""","""D8"""
"""Heat 5""",,"""D18""","""D13""",,,"""D6"""
…,…,…,…,…,…,…
"""Heat 16""","""D4""","""D10""","""D9""","""D2""","""D16""","""D1"""
"""Heat 17""","""D8""","""D9""","""D3""","""D15""","""D1""","""D13"""
"""Heat 18""","""D6""","""D8""","""D2""","""D1""","""D12""","""D11"""
"""Heat 19""","""D3""","""D2""","""D8""","""D17""","""D15""","""D10"""


#Validate outcomes: Is this a viable race schedule per the specs?
1. Check if drivers face eachother more than once
2. Check if a driver uses a car more than once
3. Ensure everyone races the same number of events

In [10]:
#Reorganize into "long" to accommodate analysis
long_df = (
    df
    #.melt(id_vars=["Heat"], variable_name="Car", value_name="Driver")
    .unpivot(
        index=["Heat"],          # columns to keep
        variable_name="Car",     # former column names → Car
        value_name="Driver"      # values → Driver
    )
    .drop_nulls()
)

long_df

Heat,Car,Driver
str,str,str
"""Heat 1""","""Red""","""D1"""
"""Heat 2""","""Red""","""D14"""
"""Heat 3""","""Red""","""D12"""
"""Heat 6""","""Red""","""D17"""
"""Heat 7""","""Red""","""D16"""
…,…,…
"""Heat 16""","""White""","""D1"""
"""Heat 17""","""White""","""D13"""
"""Heat 18""","""White""","""D11"""
"""Heat 19""","""White""","""D10"""


In [14]:
#Times a driver races: anyone race more than six times?
num_heats = (
    long_df
    .group_by(["Heat"])
    .agg(pl.len().alias("Number of Drivers"))
    .sort(pl.col("Heat"))
)

num_heats

Heat,Number of Drivers
str,u32
"""Heat 1""",5
"""Heat 10""",6
"""Heat 11""",5
"""Heat 12""",6
"""Heat 13""",6
…,…
"""Heat 5""",3
"""Heat 6""",5
"""Heat 7""",6
"""Heat 8""",4


In [17]:
#Times a driver races: anyone race more than six times?
driver_races = (
    long_df
    .group_by(["Driver"])
    .agg(pl.len().alias("Times_Faced"))
    .sort(pl.col("Times_Faced"), descending=True)
)

driver_races

Driver,Times_Faced
str,u32
"""D16""",6
"""D9""",6
"""D15""",6
"""D4""",6
"""D18""",6
…,…
"""D10""",6
"""D12""",6
"""D8""",6
"""D2""",6


In [18]:
#check if any driver pairs face each other more than once
pairs = (
    long_df
    .join(long_df, on="Heat", how="inner")
    .filter(pl.col("Driver") < pl.col("Driver_right"))  # remove duplicates + self
    .select([
        pl.col("Driver").alias("Driver1"),
        pl.col("Driver_right").alias("Driver2"),
        "Heat"
    ])
)

pair_counts = (
    pairs
    .group_by(["Driver1", "Driver2"])
    .agg(pl.len().alias("Times_Faced"))
    .filter(pl.col("Times_Faced") > 1)
)

pair_counts
#pair_counts.filter(pl.col("Driver1") == "D17" or pl.col("Driver2") == "D17")



Driver1,Driver2,Times_Faced
str,str,u32
"""D14""","""D8""",2
"""D10""","""D11""",2
"""D15""","""D7""",2
"""D1""","""D2""",2
"""D2""","""D6""",2
…,…,…
"""D14""","""D17""",2
"""D12""","""D3""",2
"""D16""","""D4""",2
"""D1""","""D7""",2


In [19]:
#does any driver use the same car more than once?
driver_car_usage = (
    long_df
    .group_by(["Driver", "Car"])
    .agg(pl.len().alias("Times_Used"))
    .filter(pl.col("Times_Used") > 1)
)

driver_car_usage


Driver,Car,Times_Used
str,str,u32


In [20]:
#Does everyone race the same number of heats?
race_counts = (
    long_df
    .group_by("Driver")
    .agg(pl.len().alias("Heats_Raced"))
)

race_counts





Driver,Heats_Raced
str,u32
"""D7""",6
"""D10""",6
"""D6""",6
"""D9""",6
"""D13""",6
…,…
"""D11""",6
"""D18""",6
"""D15""",6
"""D2""",6


In [21]:
expected_heats = race_counts.select(pl.col("Heats_Raced").mode()).item()
expected_heats


6

In [22]:
uneven_racers = race_counts.filter(pl.col("Heats_Raced") != expected_heats)
uneven_racers


Driver,Heats_Raced
str,u32


#Check/Review Logs
- What is happening in the application?

In [27]:

df_logs = (
    pl.read_ndjson("logs/app.log")
      .with_columns(
          pl.col("timestamp")
            .str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S")
      )
)

df_logs

timestamp,level,message,module,function,line
datetime[μs],str,str,str,str,i64
2026-01-21 11:37:57,"""INFO""","""Attempting to save full race s…","""publish_schedule""","""export_schedule_csv_Heats""",22
2026-01-21 11:37:57,"""INFO""","""Full schedule of heats success…","""publish_schedule""","""export_schedule_csv_Heats""",34
2026-01-21 11:37:57,"""INFO""","""Attempting to save Drivers sch…","""publish_schedule""","""export_schedule_csv_Drivers""",57
2026-01-21 11:37:57,"""INFO""","""Drivers schedule successfully …","""publish_schedule""","""export_schedule_csv_Drivers""",65
2026-01-21 11:39:21,"""INFO""","""Attempting to save full race s…","""publish_schedule""","""export_schedule_csv_Heats""",22
…,…,…,…,…,…
2026-01-21 11:58:21,"""INFO""","""Successfully printed schedule …","""publish_schedule""","""print_schedule_to_console""",80
2026-01-21 11:58:21,"""INFO""","""Attempting to save full race s…","""publish_schedule""","""export_schedule_csv_Heats""",20
2026-01-21 11:58:21,"""INFO""","""Full schedule of heats success…","""publish_schedule""","""export_schedule_csv_Heats""",31
2026-01-21 11:58:21,"""INFO""","""Attempting to save Drivers sch…","""publish_schedule""","""export_schedule_csv_Drivers""",53


In [33]:
#How functions are used by logging level - any errors?
df_logs.group_by(["level", "function"]).len().sort("len", descending=True)

level,function,len
str,str,u32
"""INFO""","""export_schedule_csv_Heats""",6
"""INFO""","""export_schedule_csv_Drivers""",6
"""INFO""","""print_schedule_to_console""",2


In [34]:
df_logs.select("timestamp", "level", "message").sort("timestamp")

timestamp,level,message
datetime[μs],str,str
2026-01-21 11:37:57,"""INFO""","""Attempting to save full race s…"
2026-01-21 11:37:57,"""INFO""","""Full schedule of heats success…"
2026-01-21 11:37:57,"""INFO""","""Attempting to save Drivers sch…"
2026-01-21 11:37:57,"""INFO""","""Drivers schedule successfully …"
2026-01-21 11:39:21,"""INFO""","""Attempting to save full race s…"
…,…,…
2026-01-21 11:58:21,"""INFO""","""Successfully printed schedule …"
2026-01-21 11:58:21,"""INFO""","""Attempting to save full race s…"
2026-01-21 11:58:21,"""INFO""","""Full schedule of heats success…"
2026-01-21 11:58:21,"""INFO""","""Attempting to save Drivers sch…"
