## Imports

In [11]:
import os
import re
import pandas as pd

## Helper

In [12]:
def extract_driver_number(vehicle_id):
    """Extract numeric driver number from a vehicle_id like 'GR86-005-07'."""
    m = re.search(r"-(\d+)$", str(vehicle_id))
    return int(m.group(1)) if m else None

## Trimming Time

In [30]:
# ---------------------- Setup ----------------------
base_dirs = ["../datasets_clean/cota1", "../datasets_clean/cota2"]
output_dir = "../datasets_trimmed/cota_top10"
os.makedirs(output_dir, exist_ok=True)

# ---------------------- Step 1: Collect all lap times ----------------------
all_laps = []

for folder in base_dirs:
    for file in os.listdir(folder):
        if not file.startswith("driver_") or not file.endswith(".csv"):
            continue
        path = os.path.join(folder, file)
        df = pd.read_csv(path)
        if "LAP_TIME_SEC" not in df.columns or "LAP_NUMBER" not in df.columns:
            continue

        driver_num_match = re.search(r"driver_(\d+)\.csv", file)
        if not driver_num_match:
            continue
        driver_num = int(driver_num_match.group(1))

        df = df.dropna(subset=["LAP_TIME_SEC"])
        df["SOURCE_DIR"] = os.path.basename(folder)
        df["NUMBER"] = driver_num
        all_laps.append(df[["NUMBER", "LAP_NUMBER", "LAP_TIME_SEC", "SOURCE_DIR"]])

lap_df = pd.concat(all_laps, ignore_index=True)
lap_df.sort_values("LAP_TIME_SEC", inplace=True)
top10 = lap_df.head(10).reset_index(drop=True)

print("üèÅ Top 10 fastest laps:")
print(top10)

# Save combined top 10 lap info
top10_path = os.path.join(output_dir, "top10_lap_times.csv")
top10.to_csv(top10_path, index=False)
print(f"‚úÖ Saved: {top10_path}")

# ---------------------- Step 2: Filter CSVs ----------------------
def filter_csv(folder, filename, top10_only=False):
    path = os.path.join(folder, filename)
    if not os.path.exists(path):
        print(f"‚ö†Ô∏è {filename} missing in {folder}")
        return None

    df = pd.read_csv(path)
    folder_name = os.path.basename(folder)

    # Rename DriverNumber ‚Üí NUMBER if present
    if "DriverNumber" in df.columns:
        df.rename(columns={"DriverNumber": "NUMBER"}, inplace=True)

    if "NUMBER" not in df.columns:
        print(f"‚ö†Ô∏è No NUMBER column in {filename}, skipping.")
        return None

    if top10_only:
        # Filter by top 10 driver + lap + folder
        df = df[df.apply(lambda r: (r["NUMBER"], r["lap"], folder_name) in
                         set(zip(top10["NUMBER"], top10["LAP_NUMBER"], top10["SOURCE_DIR"])), axis=1)]

    df["SOURCE_DIR"] = folder_name
    print(f"‚úÖ {filename} ({folder_name}): {len(df)} rows after filtering")
    return df

# ---------------------- Step 3: Process files ----------------------
files_to_process = ["telemetry_per_timestamp.csv", "driver_session_stats.csv", "per_lap_telemetry_summary.csv"]

for fname in files_to_process:
    combined = []
    top10_only = (fname == "telemetry_per_timestamp.csv")  # Only telemetry is filtered by top10

    for folder in base_dirs:
        df_filtered = filter_csv(folder, fname, top10_only=top10_only)
        if df_filtered is not None and not df_filtered.empty:
            combined.append(df_filtered)

    if combined:
        final_df = pd.concat(combined, ignore_index=True)

        # Save with proper naming
        if fname == "telemetry_per_timestamp.csv":
            out_path = os.path.join(output_dir, f"top10_{fname}")
        else:
            out_path = os.path.join(output_dir, fname)  # keep original name

        final_df.to_csv(out_path, index=False)
        print(f"üíæ Saved {fname} ‚Üí {out_path}")

print("\nüéØ All CSVs processed successfully!")

üèÅ Top 10 fastest laps:
   NUMBER  LAP_NUMBER  LAP_TIME_SEC SOURCE_DIR
0       7           5       148.112      cota2
1      13          16       148.115      cota2
2       7           3       148.185      cota2
3      13           6       148.198      cota2
4       7          11       148.204      cota2
5       7          16       148.266      cota2
6       7           4       148.272      cota2
7      46           6       148.317      cota2
8      13          11       148.320      cota2
9       7           6       148.330      cota2
‚úÖ Saved: ../datasets_trimmed/cota_top10\top10_lap_times.csv
‚úÖ telemetry_per_timestamp.csv (cota1): 0 rows after filtering
‚úÖ telemetry_per_timestamp.csv (cota2): 33039 rows after filtering
üíæ Saved telemetry_per_timestamp.csv ‚Üí ../datasets_trimmed/cota_top10\top10_telemetry_per_timestamp.csv
‚úÖ driver_session_stats.csv (cota1): 28 rows after filtering
‚úÖ driver_session_stats.csv (cota2): 29 rows after filtering
üíæ Saved driver_session_stats