In [5]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

DATA_ROOT = Path("../data/raw/barber")

R_BIG = DATA_ROOT / "R2_barber_telemetry_data.csv"   # <-- R2 instead of R1
print("Exists? ->", R_BIG.exists())

Exists? -> True


In [6]:
df_small = pd.read_csv(R_BIG, nrows=200_000)
df_small["meta_session"].value_counts()  # should show "R2"

meta_session
R2    200000
Name: count, dtype: int64

In [7]:
CAR_ID = "GR86-002-000"
OUT_PATH = DATA_ROOT / f"R2_barber_telemetry_vehicle_{CAR_ID}.csv"
print("Output:", OUT_PATH)

# only do this once; delete file if you want to re-run
if OUT_PATH.exists():
    print("Per-car telemetry already exists.")
else:
    chunksize = 500_000
    first = True
    for chunk in pd.read_csv(R_BIG, chunksize=chunksize, low_memory=False):
        sub = chunk[chunk["vehicle_id"] == CAR_ID]
        if not sub.empty:
            sub.to_csv(OUT_PATH, mode="w" if first else "a",
                       index=False, header=first)
            first = False
    print("Done extracting per-car telemetry.")

Output: ../data/raw/barber/R2_barber_telemetry_vehicle_GR86-002-000.csv
Per-car telemetry already exists.


In [8]:
CAR_PATH = DATA_ROOT / f"R2_barber_telemetry_vehicle_{CAR_ID}.csv"
df_car = pd.read_csv(CAR_PATH, low_memory=False)
df_car.head()

Unnamed: 0,expire_at,lap,meta_event,meta_session,meta_source,meta_time,original_vehicle_id,outing,telemetry_name,telemetry_value,timestamp,vehicle_id,vehicle_number
0,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,accx_can,0.244,2025-09-05T03:38:26.542Z,GR86-002-000,0
1,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,accy_can,0.1,2025-09-05T03:38:26.542Z,GR86-002-000,0
2,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,aps,100.0,2025-09-05T03:38:26.542Z,GR86-002-000,0
3,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,pbrake_r,0.0,2025-09-05T03:38:26.542Z,GR86-002-000,0
4,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,pbrake_f,0.0,2025-09-05T03:38:26.542Z,GR86-002-000,0


In [11]:
from pathlib import Path
import pandas as pd
import numpy as np

DATA_ROOT = Path("../data/raw/barber")
CAR_ID = "GR86-002-000"
CAR_PATH = DATA_ROOT / f"R2_barber_telemetry_vehicle_{CAR_ID}.csv"

print("Reading per-car telemetry from:", CAR_PATH)
df_car = pd.read_csv(CAR_PATH, low_memory=False)
df_car.head()

Reading per-car telemetry from: ../data/raw/barber/R2_barber_telemetry_vehicle_GR86-002-000.csv


Unnamed: 0,expire_at,lap,meta_event,meta_session,meta_source,meta_time,original_vehicle_id,outing,telemetry_name,telemetry_value,timestamp,vehicle_id,vehicle_number
0,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,accx_can,0.244,2025-09-05T03:38:26.542Z,GR86-002-000,0
1,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,accy_can,0.1,2025-09-05T03:38:26.542Z,GR86-002-000,0
2,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,aps,100.0,2025-09-05T03:38:26.542Z,GR86-002-000,0
3,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,pbrake_r,0.0,2025-09-05T03:38:26.542Z,GR86-002-000,0
4,,1,I_R06_2025-09-07,R2,kafka:gr-raw,2025-09-07T15:05:49.142Z,GR86-002-000,0,pbrake_f,0.0,2025-09-05T03:38:26.542Z,GR86-002-000,0


In [12]:
# Pivot telemetry to wide format: one row per (lap, timestamp), columns = signals
wide = (
    df_car
    .pivot_table(
        index=["lap", "timestamp"],
        columns="telemetry_name",
        values="telemetry_value",
        aggfunc="first",
    )
    .reset_index()
)

# make a proper datetime column
wide["timestamp_dt"] = pd.to_datetime(wide["timestamp"])

# Aggregate to per-lap features
agg_dict = {
    "timestamp_dt": lambda s: (s.max() - s.min()).total_seconds(),
}

# add a few nice-to-have signals only if they exist
if "aps" in wide.columns:
    agg_dict["aps"] = "mean"
if "pbrake_f" in wide.columns:
    agg_dict["pbrake_f"] = "mean"
if "Speed" in wide.columns:
    agg_dict["Speed"] = "mean"

lap_summary = (
    wide.sort_values("timestamp_dt")
        .groupby("lap")
        .agg(agg_dict)
        .rename(
            columns={
                "timestamp_dt": "lap_time_s",
                "aps": "aps_mean",
                "pbrake_f": "pbrake_f_mean",
                "Speed": "speed_mean",
            }
        )
        .reset_index()
)

lap_summary.head()

telemetry_name,lap,lap_time_s,aps_mean,pbrake_f_mean
0,1,48.881,23.522221,0.895405
1,2,99.942,72.796897,6.024207
2,3,98.273,73.407242,6.136192
3,4,98.13,73.132636,6.99451
4,5,97.759,74.514511,8.04268


In [13]:
from pathlib import Path

lap_features = lap_summary.copy()

OUT_DIR = Path("../data/processed/barber")
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_FEATS = OUT_DIR / f"barber_r2_{CAR_ID}_lap_features.csv"
lap_features.to_csv(OUT_FEATS, index=False)

OUT_FEATS

PosixPath('../data/processed/barber/barber_r2_GR86-002-000_lap_features.csv')

In [14]:
from pathlib import Path
CAR_ID = "GR86-002-000"

features_path = Path("../data/processed/barber") / f"barber_r2_{CAR_ID}_lap_features.csv"
lap_feats = pd.read_csv(features_path)
lap_feats.head()

Unnamed: 0,lap,lap_time_s,aps_mean,pbrake_f_mean
0,1,48.881,23.522221,0.895405
1,2,99.942,72.796897,6.024207
2,3,98.273,73.407242,6.136192
3,4,98.13,73.132636,6.99451
4,5,97.759,74.514511,8.04268
