In [None]:
import pandas as pd
import tempfile
from pathlib import Path

In [None]:
# use pandas to read in parquet file
df = pd.read_parquet("../data/5FT0192/2025-11-18.parquet")

In [None]:
df.head()

In [None]:
df["iQC1.iQCMode"].value_counts()

In [None]:
# 1. Dimensions
print(df.shape)

In [None]:
# 2. Column types
print(df.dtypes)

In [None]:
# 3. Missing values
print(df.isnull().sum())

In [None]:
# 4. Basic stats
print(df.describe())

## Correlations

- 3 to 1 (brake is pressed)
- 5 to 1 (brake is pressed)
- brake pedal STUFF
  - EBC1_Brakes.BrakePedalPos
  - EBC1_Brakes.EBSBrakeSwitch

- 3 to 6 (gas is pressed)
- 2 to 6 (gas is pressed)

LOWER TO HIGHER: THROTTLE
HIGHER TO LOWER: BRAKE
(usually)

DO NOT INCLUDE WHEN THEY PRESS BUTTONS TO DISABLE ACC
Signals:
- CruiseCtrlEnableSwitch
- CruiseCtrlSetSwitch

Messages:
1. CCVS1_ManagementComputer
2. CCVS1_Engine
3. CCVS1_Cab_Controller

Brake pedal press/pos:
- EBC1_Brakes.BrakePedalPos
- CCVS1_Engine.BrakeSwitch (Binary)

In [None]:
# find when "Timestamp" changes drastically
df["Timestamp_diff"] = df["Timestamp"].diff()
print(df["Timestamp_diff"].describe())
# find rows where "Timestamp_diff" is greater than 1 second (1000 milliseconds)
anomalies = df[(df["Timestamp_diff"] > 1000) | (df["Timestamp_diff"] < -1000)]
print(anomalies[["Timestamp", "Timestamp_diff"]])

In [None]:
df["IQEH5.Latitude"].describe()

In [None]:
df["IQEH5.Longitude"].describe()

In [None]:
# find all latitude longitude that are 0, 0
anomalies = df[(df["IQEH5.Latitude"] == 0) & (df["IQEH5.Longitude"] == 0)]
print(anomalies[["IQEH5.Latitude", "IQEH5.Longitude"]])

In [None]:
# ACC-active modes
acc_modes = {3, 4, 5}

# Override / non-ACC modes
override_modes = {0, 1, 2, 6}

df["is_acc"] = df["iQC1.iQCMode"].isin(acc_modes)
df["is_override"] = df["iQC1.iQCMode"].isin(override_modes)

In [None]:
df["leave_acc_event"] = (
    df["is_acc"].shift(1, fill_value=False)
    & df["is_override"]
).astype(int)

In [None]:
df["leave_acc_event"].value_counts()

In [None]:
numeric_df = df.select_dtypes(include="number")

In [None]:
correlations = (
    numeric_df
    .corr()["leave_acc_event"]
    .drop("leave_acc_event")
    .dropna()
)

In [None]:
corr_df = (
    correlations
    .rename("correlation")
    .to_frame()
    .assign(abs_corr=lambda x: x["correlation"].abs())
    .sort_values("abs_corr", ascending=False)
)

In [None]:
N = 20
corr_df.head(N)

## Lagged correlations

~100ms per row

In [None]:
lags = [1, 2, 3, 5, 10]  # timesteps before override

In [None]:
numeric_cols = (
    df.select_dtypes(include="number")
      .columns
      .drop(["leave_acc_event"], errors="ignore")
)

lagged_rows = []

for lag in lags:
    shifted = df[numeric_cols].shift(lag)

    corr = (
        shifted
        .corrwith(df["leave_acc_event"])
        .dropna()
    )

    for feature, value in corr.items():
        lagged_rows.append({
            "feature": feature,
            "lag": lag,
            "correlation": value,
            "abs_corr": abs(value),
        })

lagged_corr_df = (
    pd.DataFrame(lagged_rows)
    .sort_values("abs_corr", ascending=False)
)

In [None]:
lagged_corr_df.head(20)

In [None]:
top_features = (
    lagged_corr_df
    .drop_duplicates("feature")
    .head(5)["feature"]
    .tolist()
)

top_features

In [None]:
window_before = 20
window_after = 10

In [None]:
event_indices = df.index[df["leave_acc_event"] == 1]

aligned = []

for idx in event_indices:
    start = idx - window_before
    end = idx + window_after

    snippet = df.loc[start:end, top_features].copy()

    # skip malformed windows
    if len(snippet) != (window_before + window_after + 1):
        continue

    snippet["t"] = range(-window_before, window_after + 1)
    snippet["event_id"] = idx

    aligned.append(snippet)

aligned_df = pd.concat(aligned, ignore_index=True)

In [None]:
import matplotlib.pyplot as plt

mean_traj = (
    aligned_df
    .groupby("t")[top_features]
    .mean()
)

plt.figure(figsize=(10, 6))

for f in top_features:
    plt.plot(mean_traj.index, mean_traj[f], label=f)

plt.axvline(0, linestyle="--")
plt.xlabel("Timesteps relative to ACC override")
plt.ylabel("Signal value")
plt.title("Average signal behavior around ACC override")
plt.legend()
plt.show()

## Modeling

- finer-grained overrides
- context surrounding why the override occurred

## Data Analyses

In [None]:
# Convert Timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Check frequency
df.set_index('Timestamp', inplace=True)
print(df.index.to_series().diff().value_counts())

# Plot one variable over time
df['EEC1_Engine.EngSpeed'].plot(title="Engine Speed Over Time")

In [None]:
# Compute correlation matrix for numeric sensors
corr = df.corr()
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(12,10))
sns.heatmap(corr, cmap='coolwarm', center=0)
plt.title("Sensor Correlation Matrix")
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,8))
plt.scatter(df['VP_RP_80.Longitude'], df['VP_RP_80.Latitude'], s=1)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Vehicle GPS Trace')
plt.show()

In [None]:
plt.scatter(df['VP_RP_80.Longitude'], df['VP_RP_80.Latitude'], c=df['CCVS1_Engine.WheelBasedVehicleSpeed'], s=2, cmap='viridis')
plt.colorbar(label='Speed (m/s)')
plt.show()

In [None]:
# Check weather variables
weather_cols = [col for col in df.columns if 'MS_Weather_Telematics' in col]
print(df[weather_cols].describe())

# Plot temperature over time
df['MS_Weather_Telematics_Static.Temperature'].plot(title="Temperature Over Time")

In [None]:
# Check number of unique states for categorical sensors
cat_cols = [col for col in df.columns if df[col].nunique() < 10]
for col in cat_cols:
    print(col, df[col].value_counts())

In [None]:
# Columns that never change
constant_cols = [c for c in df.columns if df[c].nunique() == 1]

len(constant_cols), constant_cols[:10]

In [None]:
# Columns that barely change (<= 3 unique values)
low_var_cols = [c for c in df.columns if df[c].nunique() <= 3]

len(low_var_cols)

In [None]:
def top_values(col, n=5):
    return df[col].value_counts().head(n)

suspects = ['255', '127', '501.99', '524287']

for c in df.columns:
    vc = df[c].value_counts()
    if any(v in vc.index.astype(str) for v in suspects):
        print(c, vc.head(3))

In [None]:
# df[col] = df[col].replace({255: np.nan, 501.99: np.nan})

In [None]:
df['moving'] = df['CCVS1_Engine.WheelBasedVehicleSpeed'] > 0.5
df['moving'].value_counts(normalize=True)

In [None]:
df[['CCVS1_Engine.WheelBasedVehicleSpeed',
    'EEC1_Engine.EngSpeed',
    'LFE_Engine.EngFuelRate']].plot(subplots=True, figsize=(12,6))

In [None]:
cols = [
    'EEC2_Engine.AccelPedalPos1',
    'EEC1_Engine.DriversDemandEngPercentTorque',
    'EEC1_Engine.ActualEngPercentTorque'
]

df[cols].plot(figsize=(12,4))

In [None]:
df['ACC1_Radar.TargetDetected'].value_counts(normalize=True)

In [None]:
df.loc[df['ACC1_Radar.TargetDetected'] == 1,
       ['ACC1_Radar.DistanceToForwardVehicle',
        'ACC1_Radar.SpeedOfForwardVehicle']].describe()

In [None]:
df['ACC1_Radar.DistanceToForwardVehicle'].plot(
    title='Radar Distance Over Time',
    figsize=(12, 4)
)

In [None]:
plt.scatter(
    df['VDS_FC.NavigationBasedVehicleSpeed'],
    df['CCVS1_Engine.WheelBasedVehicleSpeed'],
    s=1
)
plt.xlabel("GPS speed")
plt.ylabel("Wheel speed")
plt.show()

In [None]:
braking = df[df['CCVS1_Engine.BrakeSwitch'] == 1]

braking[['CCVS1_Engine.WheelBasedVehicleSpeed',
          'EBC1_Brakes.BrakePedalPos']].plot()

In [None]:
conflict = df[
    (df['EEC2_Engine.AccelPedalPos1'] > 5) &
    (df['CCVS1_Engine.BrakeSwitch'] == 1)
]

len(conflict), len(conflict) / len(df)

In [None]:
override = df[
    (df['CCVS1_Engine.BrakeSwitch'] == 1) &
    (df['EEC1_Engine.ActualEngPercentTorque'] < 5)
]

len(override)

In [None]:
df['torque_delta'] = (
    df['EEC1_Engine.DriversDemandEngPercentTorque'] -
    df['EEC1_Engine.ActualEngPercentTorque']
)

In [None]:
import pandas as pd

brake_events = df.loc[df['CCVS1_Engine.BrakeSwitch'] == 1].index

center = brake_events[0]
window = pd.Timedelta(seconds=5)  # Â±5 seconds

window_df = df.loc[center - window : center + window]

window_df[[
    'EEC2_Engine.AccelPedalPos1',
    'EBC1_Brakes.BrakePedalPos',
    'EEC1_Engine.DriversDemandEngPercentTorque',
    'EEC1_Engine.ActualEngPercentTorque'
]].plot(figsize=(12,4))

In [None]:
plt.scatter(
    df['EEC2_Engine.AccelPedalPos1'],
    df['EEC1_Engine.ActualEngPercentTorque'],
    c=df['CCVS1_Engine.BrakeSwitch'],
    s=1
)
plt.xlabel('Throttle (%)')
plt.ylabel('Actual Engine Torque (%)')
plt.title('Brake Override Behavior')
plt.show()

In [None]:
import pandas as pd

# Detect brake rising edge
brake_on = df['CCVS1_Engine.BrakeSwitch'].diff().fillna(0) == 1

latencies = []

window = pd.Timedelta(seconds=1)  # look 1 second after brake press

for idx in df.loc[brake_on].index:
    after = df.loc[idx : idx + window]

    torque_drop = after[
        after['EEC1_Engine.ActualEngPercentTorque'] < 5
    ]

    if not torque_drop.empty:
        latencies.append(torque_drop.index[0] - idx)

latencies = pd.Series(latencies)

latencies.describe()

In [None]:
df[
    (df['ACC1_Radar.TargetDetected'] == 1) &
    (df['CCVS1_Engine.BrakeSwitch'] == 1)
][['EEC1_Engine.ActualEngPercentTorque']].describe()