# Load and preprocess data

In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout,Bidirectional,Conv1D,MaxPooling1D,TimeDistributed
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
import time, random,os

seed = 42
np.random.seed(seed)
random.seed(seed)
tf.random.set_seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)

# Enable mixed precision globally
mixed_precision.set_global_policy("mixed_float16")
print("Compute dtype:", tf.keras.mixed_precision.global_policy().compute_dtype)
print("Variable dtype:", tf.keras.mixed_precision.global_policy().variable_dtype)

# Load the trajectory data from the CSV file
df_main = pd.read_csv("/kaggle/input/57-61-tracks-with-multi-hot/57-61_tracks_with_multi_hot.csv")

# Select target columns (25–36 → Python index 25:37 since end is exclusive)
target_cols = df_main.columns[26:38]

# Remove rows where all target values are 0
df = df_main[~df_main[target_cols].eq(0).all(axis=1)].copy()

# Display the first 5 rows of the DataFrame
display(df.head())

# Display the information about the DataFrame
display(df.info())

del df_main

Compute dtype: float16
Variable dtype: float32


Unnamed: 0,frame,document_id,id,x,y,width,height,xVelocity,yVelocity,xAcceleration,...,Ego merging into an occupied lane,Ego vehicle approaching slower lead vehicle,Ego vehicle driving in lane without lead vehicle,Ego vehicle overtaking vehicle in adjacent lane,Ego vehicle performing lane change,Ego vehicle performing lane change with vehicle behind,Lead vehicle accelerating,Lead vehicle cruising,Lead vehicle decelerating,Vehicle overtaking ego vehicle
207,2,57,5,127.83,14.12,4.95,1.92,-27.08,-0.01,0.07,...,0,0,0,0,0,0,0,1,0,0
208,3,57,5,126.8,14.12,4.95,1.92,-27.07,-0.01,0.07,...,0,0,0,0,0,0,0,1,0,0
209,4,57,5,125.75,14.12,4.95,1.92,-27.07,-0.02,0.06,...,0,0,0,0,0,0,0,1,0,0
210,5,57,5,124.67,14.12,4.95,1.92,-27.07,-0.02,0.06,...,0,0,0,0,0,0,0,1,0,0
211,6,57,5,123.6,14.11,4.95,1.92,-27.06,-0.02,0.06,...,0,0,0,0,0,0,0,1,0,0


<class 'pandas.core.frame.DataFrame'>
Index: 1098500 entries, 207 to 1604466
Data columns (total 38 columns):
 #   Column                                                  Non-Null Count    Dtype  
---  ------                                                  --------------    -----  
 0   frame                                                   1098500 non-null  int64  
 1   document_id                                             1098500 non-null  int64  
 2   id                                                      1098500 non-null  int64  
 3   x                                                       1098500 non-null  float64
 4   y                                                       1098500 non-null  float64
 5   width                                                   1098500 non-null  float64
 6   height                                                  1098500 non-null  float64
 7   xVelocity                                               1098500 non-null  float64
 8   yVelocity      

None

In [6]:
# Condition: rows with negative xVelocity
if  (df["xVelocity"] < 0).any():
    mask = df["xVelocity"] < 0

    # Convert xVelocity to positive
    df.loc[mask, "xVelocity"] = -df.loc[mask, "xVelocity"]

    # Flip the sign of yVelocity, xAcceleration, yAcceleration, and precedingXVelocity
    cols_to_flip = ["yVelocity", "xAcceleration", "yAcceleration", "precedingXVelocity"]
    df.loc[mask, cols_to_flip] = -df.loc[mask, cols_to_flip]
else:
    print("No negative xVelocity found.")

# Check unique lane values
unique_lanes = df['laneId'].unique()

# Remap lanes based on number of unique values
if set(unique_lanes) == {2, 3, 5, 6}:
    mapping = {2: 6, 3: 5}  # map 2->6, 3->5
elif set(unique_lanes) == {2, 3, 4, 6, 7, 8}:
    mapping = {2: 8, 3: 7, 4: 6}  # map 2->8, 3->7, 4->6
else:
    mapping = {}  # leave unchanged if not matching expected sets
del unique_lanes
# Apply mapping
df['laneId'] = df['laneId'].replace(mapping)

In [7]:
###### # Define mutually exclusive scenario groups
group1 = [
    "Ego vehicle driving in lane without lead vehicle",
    "Lead vehicle accelerating",
    "Lead vehicle cruising",
    "Lead vehicle decelerating"
]
group2 = [
    "Ego merging into an occupied lane",
    # "Ego vehicle performing lane change",
    "Ego vehicle performing lane change with vehicle behind"
]

# Check if more than one scenario in each group is 1
mask_group1 = df[group1].sum(axis=1) > 1
mask_group2 = df[group2].sum(axis=1) > 1

# Combine masks
mask = mask_group1 | mask_group2
del mask_group1, mask_group2

# Compute transition boundaries (any scenario added or removed)
scenario_cols = df.columns[26:38]  # all scenario columns
y = df[scenario_cols].values
boundary = np.zeros(len(df), dtype=int)

for t in range(1, len(df)):
    if not np.array_equal(y[t], y[t-1]):
        boundary[t] = 1

# Create a new column with 1 if condition satisfied, else 0
df.insert(
    loc=df.columns.get_loc("laneId") + 1,  # insert after laneId
    column="valid_mask",                 # new column name
    value=(~mask).astype(int)                  # convert True/False to 1/0
)
# Add boundary column to DataFrame
df.insert(
    loc=df.columns.get_loc("valid_mask") + 1,  # insert after valid_mask
    column="boundary",                 # new column name
    value=boundary
)

del mask

**Reasoning**:
Based on the `df.info()` output, there are no missing values. Now, select relevant numerical features for the LSTM model and normalize them using StandardScaler.



In [4]:
# Select relevant numerical features for LSTM
features = ['x', 'y', 'xVelocity', 'yVelocity', 'xAcceleration', 'yAcceleration',
            'frontSightDistance', 'backSightDistance', 'dhw', 'thw', 'ttc']

# Extract features
X = df[features].copy()

# Create binary indicator surrounding vehicle indicators
vehicle_cols = ['precedingId','followingId','leftPrecedingId','rightPrecedingId',
                'leftFollowingId','rightFollowingId','leftAlongsideId','rightAlongsideId']
for col in vehicle_cols:
    X['has_'+col.replace('Id','')] = (df[col] > 0).astype(int)

# --- MODIFICATION START ---
# Add the unscaled precedingXVelocity feature to the DataFrame
X['precedingXVelocity'] = df['precedingXVelocity']

# Initialize the scalers, we will fit them later
scaler = StandardScaler()
scaler_prec = StandardScaler()

# Combine all *unscaled* features into X_scaled
# The order of columns is critical and must match the original
other_cols = [c for c in features]
X_scaled_other = X[other_cols].values # These are still unscaled
X_scaled = np.hstack([X_scaled_other, X[['precedingXVelocity'] + [c for c in X.columns if c.startswith('has_')]].values])
X_scaled = pd.DataFrame(X_scaled, index=df.index)
# --- MODIFICATION END ---

print("Shape of unscaled features:", X_scaled.shape)

# Define the sequence length (number of frames per sequence)
sequence_length = 25  # 25 frames per second
# Group the data by vehicle ID
grouped = df.groupby(['document_id','id'])
# grouped = df.groupby('id')
del df

# Create sequences of frames and corresponding labels
X_sequences, y_sequences, b_sequences, valid_sequences,doc_ids, veh_ids = [], [], [], [], [], []

# Define the target labels (scenario categories)
labels = list(scenario_cols)
del scenario_cols
test=0
# Create sequences for each vehicle
for (doc_id,vehicle_id), group in grouped:  # change here
    if len(group) < sequence_length:
        continue
    X_group = X_scaled.loc[group.index].values
    y_group = group[labels].values
    b_group = group['boundary'].values  
    valid_group = group['valid_mask'].values
    for i in range(len(X_group) - sequence_length + 1):
        X_sequences.append(X_group[i:i + sequence_length])
        y_sequences.append(y_group[i:i + sequence_length])
        b_sequences.append(b_group[i:i + sequence_length])
        valid_sequences.append(valid_group[i:i + sequence_length])
        doc_ids.append(doc_id)
        veh_ids.append(vehicle_id)
        
X_sequences = np.array(X_sequences)
y_sequences = np.array(y_sequences)
b_sequences = np.array(b_sequences) 
valid_sequences = np.array(valid_sequences)
doc_ids = np.array(doc_ids)
veh_ids = np.array(veh_ids)

# Display the shapes of the sequences and labels
print("Shape of X_sequences:", X_sequences.shape)
print("Shape of y_sequences:", y_sequences.shape)
print("Shape of valid_sequences:", valid_sequences.shape)

print("Shape of doc_id:", doc_ids.shape)
print("Shape of veh_id:", veh_ids.shape)

X_temp, X_test, y_temp, y_test,b_temp, b_test, valid_temp, valid_test, doc_temp, doc_test, veh_temp, veh_test = train_test_split(
    X_sequences, y_sequences, b_sequences, valid_sequences, doc_ids, veh_ids,  test_size=0.15, random_state=42
)
val_size = 0.15 / 0.85
X_train, X_val, y_train, y_val, b_train, b_val, valid_train, valid_val, doc_train, doc_val, veh_train, veh_val = train_test_split(
    X_temp, y_temp, b_temp, valid_temp, doc_temp, veh_temp, test_size=val_size, random_state=42
)

del X_temp, y_temp, b_temp, valid_temp, veh_temp, X_sequences # Free up memory

# --- NEW SCALING SECTION ---
# Scale *after* splitting to prevent data leakage

print("\nScaling data after split...")

# 1. Scale the main features (indices 0-10)
# Fit scaler *only* on the training data
scaler.fit(X_train[:, :, 0:11].reshape(-1, 11))

# Transform all datasets
X_train[:, :, 0:11] = scaler.transform(X_train[:, :, 0:11].reshape(-1, 11)).reshape(X_train.shape[0], sequence_length, 11)
X_val[:, :, 0:11] = scaler.transform(X_val[:, :, 0:11].reshape(-1, 11)).reshape(X_val.shape[0], sequence_length, 11)
X_test[:, :, 0:11] = scaler.transform(X_test[:, :, 0:11].reshape(-1, 11)).reshape(X_test.shape[0], sequence_length, 11)

# 2. Scale the 'precedingXVelocity' feature (index 11)
scaler_prec.fit(X_train[:, :, 11][X_train[:, :, 12] == 1].reshape(-1, 1))

# Transform all datasets
X_train[:, :, 11] = scaler_prec.transform(X_train[:, :, 11].reshape(-1, 1)).reshape(X_train.shape[0], sequence_length)
X_val[:, :, 11] = scaler_prec.transform(X_val[:, :, 11].reshape(-1, 1)).reshape(X_val.shape[0], sequence_length)
X_test[:, :, 11] = scaler_prec.transform(X_test[:, :, 11].reshape(-1, 1)).reshape(X_test.shape[0], sequence_length)

# 3. Set scaled 'precedingXVelocity' to 0 where no preceding vehicle exists
# This replicates the original np.nan_to_num(..., nan=0) logic
X_train[:, :, 11][X_train[:, :, 12] == 0] = 0
X_val[:, :, 11][X_val[:, :, 12] == 0] = 0
X_test[:, :, 11][X_test[:, :, 12] == 0] = 0

print("Scaling complete.")
print("Shape of X_train after scaling:", X_train.shape)
# print("Shape of X_val after scaling:", X_val.shape)
print("Shape of X_test after scaling:", X_test.shape)

Shape of unscaled features: (511099, 20)
Shape of X_sequences: (459964, 25, 20)
Shape of y_sequences: (459964, 25, 12)
Shape of valid_sequences: (459964, 25)
Shape of doc_id: (459964,)
Shape of veh_id: (459964,)

Scaling data after split...
Scaling complete.
Shape of X_train after scaling: (321974, 25, 20)
Shape of X_test after scaling: (68995, 25, 20)


**Reasoning**:
Define the LSTM model architecture suitable for sequence classification, including layers for feature extraction, LSTM layers for capturing temporal dependencies, and output layers for classification.

# Model defination and training
make sure to connect to GPU T4 if on kaggle.

In [14]:
try:
    del df, scenario_cols
except:
    pass

# y_train shape: (n_samples, n_labels)
n_samples, _, n_labels = y_train.shape

# count positives per label
pos_counts = np.sum(y_train, axis=0)
neg_counts = n_samples - pos_counts

# compute pos_weight for each label
pos_weights = neg_counts / (pos_counts + 1e-7)   # avoid division by zero
neg_weights = np.ones_like(pos_weights)

pos_counts = np.maximum(pos_counts, 1)
neg_counts = np.maximum(neg_counts, 1)# usually set to 1
class_weights = neg_counts / pos_counts

print("Positive weights per label:", pos_weights)

# Define the LSTM model
inputs = Input(shape=(sequence_length, X_train.shape[2]))
x = Bidirectional(LSTM(64, return_sequences=True))(inputs)
x = Bidirectional(LSTM(64,return_sequences=True))(x)
x = Bidirectional(LSTM(64,return_sequences=True))(x)
x = Bidirectional(LSTM(64,return_sequences=True))(x)

scenario_output = TimeDistributed(Dense(len(labels), activation='sigmoid', dtype='float32', name='scenario'))(x)
model = Model(inputs=inputs, outputs=scenario_output)


model.compile(
    optimizer='adamw',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

Positive weights per label: [[271.39763111 189.29196216 708.19383244  34.82663848   1.85240702
    9.19518065  53.18613261 175.71459933  13.05263617   1.02921806
   14.19246921   8.79567374]
 [271.85932201 189.85595731 703.53829306  34.78681783   1.85291251
    9.17199002  53.00436095 176.10341033  12.98609965   1.03147142
   14.1317793    8.76270467]
 [273.72184298 190.19596198 701.99999985  34.75502499   1.85351933
    9.14410838  52.84180602 175.9087912   12.91838499   1.03393535
   14.07298347   8.73201548]
 [275.84780737 190.42330558 703.53829306  34.73914974   1.85364578
    9.12815351  52.63551557 175.42410958  12.85191877   1.03635375
   14.01604328   8.7021033 ]
 [278.7341442  191.10859187 705.08333318  34.72328858   1.85405051
    9.11479015  52.46629027 175.71459933  12.78490388   1.03864881
   13.95883665   8.67557172]
 [279.46515677 191.33811229 706.63516468  34.72725255   1.85450596
    9.08785287  52.34227965 175.9087912   12.71385978   1.04122077
   13.89723777   8.6486

**Reasoning**:
Train the LSTM model on the prepared data, including splitting the data into training and validation sets, and monitoring the training progress.

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

s = time.time()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val), #valid_val
    epochs=100,
    batch_size=32,
    sample_weight=valid_train,  # mask ambiguous frames
    callbacks=[early_stop]
)
e = time.time()
print(f"Training finished. Elapsed time: {(e-s)/60:.2f} minutes")

In [None]:
model.save('/kaggle/working/ml_bi_large_v_4layers.keras')

# Evaluation model performance on the test file

### Run the below code block if want to pre-process the test file
Steps:
* Run first four code blocks with training data: '01-03_tracks_with_multi_hot.csv'
* In first code block, change file path to test file ('57-61_tracks_with_multi_hot.csv') 
* Directly run the below code blocks

In [None]:

# --- Step 1. Extract features ---
features = ['x', 'y', 'xVelocity', 'yVelocity', 'xAcceleration', 'yAcceleration',
            'frontSightDistance', 'backSightDistance', 'dhw', 'thw', 'ttc']

X_new = df[features].copy()

# --- Step 2. Add binary vehicle indicators ---
vehicle_cols = ['precedingId','followingId','leftPrecedingId','rightPrecedingId',
                'leftFollowingId','rightFollowingId','leftAlongsideId','rightAlongsideId']

for col in vehicle_cols:
    X_new['has_'+col.replace('Id','')] = (df[col] > 0).astype(int)

# --- Step 3. Add unscaled preceding velocity ---
X_new['precedingXVelocity'] = df['precedingXVelocity']

# --- Step 4. Construct consistent column order ---
other_cols = [c for c in features]
X_scaled_other = X_new[other_cols].values
X_scaled = np.hstack([
    X_scaled_other,
    X_new[['precedingXVelocity'] + [c for c in X_new.columns if c.startswith('has_')]].values
])
X_scaled = pd.DataFrame(X_scaled, index=df.index)

print("Shape of unscaled new features:", X_scaled.shape)

# --- Step 5. Group and create sequences ---
sequence_length = 25
grouped = df.groupby(['document_id', 'id'])

X_sequences_new, y_sequences_new, b_sequences_new, valid_sequences_new, doc_ids_new, veh_ids_new = [], [], [], [], [], []
labels = list(scenario_cols)  # reuse your label columns
gg=[]
for (doc_id, vehicle_id), group in grouped:   
    group = group.sort_values('frame')
    if len(group) < sequence_length:
        continue
    gg.append(group)
    X_group = X_scaled.loc[group.index].values
    y_group = group[labels].values
    b_group = group['boundary'].values
    valid_group = group['valid_mask'].values
    # print(len(X_group))
    for i in range(0,len(X_group) - sequence_length + 1,sequence_length):
        # print(i)
        X_sequences_new.append(X_group[i:i + sequence_length])
        y_sequences_new.append(y_group[i:i + sequence_length])
        b_sequences_new.append(b_group[i:i + sequence_length])
        valid_sequences_new.append(valid_group[i:i + sequence_length])
        doc_ids_new.append(doc_id)
        veh_ids_new.append(vehicle_id)
    # print("------")
    # print(len(X_sequences_new))
    # break

X_sequences_new = np.array(X_sequences_new)
y_sequences_new = np.array(y_sequences_new)
b_sequences_new = np.array(b_sequences_new)
valid_sequences_new = np.array(valid_sequences_new)
print("Shape of X_sequences_new (unscaled):", X_sequences_new.shape)
print("Shape of group (unscaled):", X_sequences_new.shape)


# --- Step 6. Apply previously fitted scalers ---
# Scale features [0:11]
X_sequences_new[:, :, 0:11] = scaler.transform(
    X_sequences_new[:, :, 0:11].reshape(-1, 11)
).reshape(X_sequences_new.shape[0], sequence_length, 11)

# Scale 'precedingXVelocity' [index 11]
X_sequences_new[:, :, 11] = scaler_prec.transform(
    X_sequences_new[:, :, 11].reshape(-1, 1)
).reshape(X_sequences_new.shape[0], sequence_length)

# Set precedingXVelocity=0 when no preceding vehicle
X_sequences_new[:, :, 11][X_sequences_new[:, :, 12] == 0] = 0

print("Scaling complete for new dataset.")
print("Shape of X_sequences_new after scaling:", X_sequences_new.shape)
n_samples, _, n_labels = y_train.shape

Rune below code block if want to load model, otherwise skip

In [6]:
from tensorflow.keras.models import load_model
model = load_model('/kaggle/input/ml-models/ml_bi_large_v_4layers.keras', compile=False)

2026-01-07 19:43:47.908789: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [11]:
from sklearn.metrics import f1_score, hamming_loss, classification_report

y_pred_proba_val = model.predict(X_val)
y_pred_proba_test = model.predict(X_test)

# Flatten for per-frame evaluation
y_val_flat = y_val.reshape(-1, n_labels)
y_pred_val_flat = y_pred_proba_val.reshape(-1, n_labels)
y_test_flat = y_test.reshape(-1, n_labels)
y_pred_test_flat = y_pred_proba_test.reshape(-1, n_labels)
mask_val_flat = valid_val.reshape(-1) == 1
mask_test_flat = valid_test.reshape(-1) == 1

# Clean masked frames
y_val_clean = y_val_flat[mask_val_flat]
y_pred_val_clean = y_pred_val_flat[mask_val_flat]

# Find optimal threshold per class
best_thresholds = []
for i in range(n_labels):
    best_f1, best_t = 0, 0.5
    for t in np.linspace(0.1, 0.9, 17):
        y_bin = (y_pred_val_clean[:, i] > t).astype(int)
        f1 = f1_score(y_val_clean[:, i], y_bin, zero_division=0)
        if f1 > best_f1:
            best_f1, best_t = f1, t
    best_thresholds.append(best_t)

print("Optimal thresholds per class:", best_thresholds)

[1m2157/2157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 51ms/step
[1m2157/2157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 50ms/step
Optimal thresholds per class: [0.65, 0.6, 0.5, 0.5, 0.4, 0.55, 0.5, 0.55, 0.4, 0.5, 0.6, 0.55]


In [12]:
# --- Predict probabilities ---
y_pred_proba_test = model.predict(X_sequences_new)

# --- Apply thresholds per sequence ---

y_pred_bin_seq = np.zeros_like(y_pred_proba_test)
for i, t in enumerate(best_thresholds):
    y_pred_bin_seq[:, :, i] = (y_pred_proba_test[:, :, i] > t).astype(int)

# --- Initialize accumulators ---
y_true_all, y_pred_all = [], []
 
y_true_all = y_sequences_new.reshape(-1, y_sequences_new.shape[-1]).astype(int)
y_pred_all = y_pred_bin_seq.reshape(-1, y_pred_bin_seq.shape[-1]).astype(int)

# --- Evaluate ---
print("Classification Report (sequence-level):")
print(classification_report(y_true_all, y_pred_all, target_names=labels))
print("Hamming Loss:", hamming_loss(y_true_all, y_pred_all))


[1m1303/1303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 49ms/step
Classification Report (sequence-level):


  _warn_prf(average, modifier, msg_start, len(result))


                                                        precision    recall  f1-score   support

                        Cut-in in front of ego vehicle       0.08      0.02      0.03     11033
                       Cut-out in front of ego vehicle       0.06      0.01      0.02     10652
                     Ego merging into an occupied lane       0.01      0.00      0.00      2332
           Ego vehicle approaching slower lead vehicle       0.44      0.50      0.47     30837
      Ego vehicle driving in lane without lead vehicle       0.89      0.80      0.84    423449
       Ego vehicle overtaking vehicle in adjacent lane       0.48      0.30      0.37    105009
                    Ego vehicle performing lane change       0.67      0.63      0.65     26696
Ego vehicle performing lane change with vehicle behind       0.45      0.26      0.33      7621
                             Lead vehicle accelerating       0.46      0.31      0.37    120802
                                 Lead v

In [13]:
from collections import defaultdict

dependent_pairs = {
    "Cut-in in front of ego vehicle": [
        "Lead vehicle accelerating",
        "Lead vehicle cruising",
        "Lead vehicle decelerating"
    ],
    "Cut-out in front of ego vehicle": [
        "Ego vehicle driving in lane without lead vehicle",
        "Lead vehicle cruising",
        "Lead vehicle decelerating",
        "Lead vehicle accelerating"
    ],
    "Ego merging into an occupied lane": [
        "Lead vehicle cruising",
        "Lead vehicle decelerating",
        "Lead vehicle accelerating"
    ],
    "Ego vehicle performing lane change": [
        "Ego vehicle driving in lane without lead vehicle"
    ],
    "Ego vehicle performing lane change with vehicle behind": [
        "Ego vehicle driving in lane without lead vehicle"
    ],
    "Ego vehicle driving in lane without lead vehicle": [
        "Ego vehicle approaching slower lead vehicle",
        "Lead vehicle accelerating",
        "Lead vehicle cruising",
        "Lead vehicle decelerating"
    ]
}

def compute_boundary_metrics_fast(y_true, y_pred, length=None):
    true_boundaries = np.where((y_true[1:] == 1) & (y_true[:-1] == 0))[0] + 1
    pred_boundaries = np.where((y_pred[1:] == 1) & (y_pred[:-1] == 0))[0] + 1
    if len(true_boundaries) == 0 or len(pred_boundaries) == 0:
        return np.nan
    diffs = []
    for tb in true_boundaries:
        idx = np.searchsorted(pred_boundaries, tb)
        cands = []
        if idx > 0: cands.append(abs(tb - pred_boundaries[idx - 1]))
        if idx < len(pred_boundaries): cands.append(abs(tb - pred_boundaries[idx]))
        diffs.append(min(cands))
    mabe = float(np.mean(diffs))
    if length is not None:
        return mabe / length  # <= normalization step
    return mabe

def pretty_transition_report(
    y_sequences,            # (N_seq, seq_len, n_labels) ground truth (0/1)
    y_pred_proba=None,      # (N_seq, seq_len, n_labels) probabilities OR None if y_pred_bin provided
    doc_ids=None,           # length N_seq, mapping sequence -> doc_id
    veh_ids=None,           # length N_seq, mapping sequence -> vehicle id
    labels=None,            # list of n_labels label names
    best_thresholds=None,   # list/array of per-class thresholds (used if y_pred_proba provided)
    y_pred_bin=None,        # optional precomputed binary predictions (N_seq, seq_len, n_labels)
    dependent_pairs=None,   # dictionary of dependent pairs as you provided
    match_tolerance_prev=1, # tolerance in frames for matching predicted prev_end to true prev_end (default ±1)
    allow_curr_offset=1     # allow curr_start == prev_end or prev_end + 1 (default 1)
):
    """Compute boundary MABE, missed scenario-per-vehicle, and transition MABE+missed per dependent pair.

    Returns dict with 'boundary_summary', 'missed_percent_vehicle', 'transition_summary', etc.
    """
    # Basic checks
    assert labels is not None, "Provide labels list"
    n_seq, seq_len, n_labels = y_sequences.shape

    # Build binary predictions if not provided
    if y_pred_bin is None:
        if y_pred_proba is None or best_thresholds is None:
            raise ValueError("Either y_pred_bin OR (y_pred_proba and best_thresholds) must be provided.")
        thr = np.array(best_thresholds).reshape(1, 1, -1)
        y_pred_bin = (y_pred_proba.astype(np.float32) > thr).astype(np.int32)
    else:
        y_pred_bin = y_pred_bin.astype(np.int32)

    # ensure y_sequences ints
    y_sequences = y_sequences.astype(np.int32)

    # 1) Build mapping (doc,veh) -> ordered list of sequence indices
    seqs_by_vehicle = defaultdict(list)
    for idx, (d, v) in enumerate(zip(doc_ids, veh_ids)):
        seqs_by_vehicle[(d, v)].append(idx)

    # 2) Boundary MABE per label averaged across vehicles
    boundary_results = defaultdict(list)
    for (d, v), seq_indices in seqs_by_vehicle.items():
        y_true_vehicle = np.concatenate([y_sequences[i] for i in seq_indices], axis=0)
        y_pred_vehicle = np.concatenate([y_pred_bin[i] for i in seq_indices], axis=0)
        for i, lab in enumerate(labels):
            mabe = compute_boundary_metrics_fast(y_true_vehicle[:, i], y_pred_vehicle[:, i],length=len(y_true_vehicle))
            if not np.isnan(mabe):
                boundary_results[lab].append(mabe)
    boundary_summary = {lab: float(np.mean(vals)) if len(vals) > 0 else np.nan
                        for lab, vals in boundary_results.items() for _ in [0]}
    # preserve order and fill missing labels
    boundary_summary = {lab: (boundary_summary.get(lab, np.nan)) for lab in labels}
    avg_mabe = float(np.nanmean([v for v in boundary_summary.values() if not np.isnan(v)])) if any(
        not np.isnan(v) for v in boundary_summary.values()) else np.nan

    # 3) Missed Scenario Detection Rate per vehicle
    missed_counts_vehicle = defaultdict(int)
    total_support_vehicle = defaultdict(int)
    for (d, v), seq_indices in seqs_by_vehicle.items():
        y_true_vehicle = np.concatenate([y_sequences[i] for i in seq_indices], axis=0)
        y_pred_vehicle = np.concatenate([y_pred_bin[i] for i in seq_indices], axis=0)
        for i, lab in enumerate(labels):
            if y_true_vehicle[:, i].sum() > 0:
                total_support_vehicle[lab] += 1
                if y_pred_vehicle[:, i].sum() == 0:
                    missed_counts_vehicle[lab] += 1
    missed_percent_vehicle = {lab: (100.0 * missed_counts_vehicle.get(lab, 0) / total_support_vehicle.get(lab, 1))
                              if total_support_vehicle.get(lab, 0) > 0 else np.nan for lab in labels}

    # Simplified per-vehicle transition evaluation WITHOUT match_tolerance_prev
    label2idx = {l: i for i, l in enumerate(labels)}
    transition_results = defaultdict(list)   # per-pair list of per-instance distances
    transition_support = defaultdict(int)    # number of true transition instances (actual)
    missed_transitions = defaultdict(int)    # number of true instances missed
    
    allow_curr_offset = 1  # allow predicted curr_start == pred_prev_end or pred_prev_end + 1
    
    def ends(idx_seq):   # 1->0 ends, returns indices of the frame after end
        return np.where((idx_seq[:-1] == 1) & (idx_seq[1:] == 0))[0] + 1
    
    def starts(idx_seq): # 0->1 starts, returns indices
        return np.where((idx_seq[1:] == 1) & (idx_seq[:-1] == 0))[0] + 1
    
    # loop per vehicle
    for (d, v), seq_indices in seqs_by_vehicle.items():
        # reconstruct whole vehicle timeline
        y_true_vehicle = np.concatenate([y_sequences[i] for i in seq_indices], axis=0)
        y_pred_vehicle = np.concatenate([y_pred_bin[i] for i in seq_indices], axis=0)
        vehicle_len = len(y_true_vehicle)
    
        for prev_label, next_labels in (dependent_pairs or {}).items():
            if prev_label not in label2idx:
                continue
            p_idx = label2idx[prev_label]
            pred_prev_end = ends(y_pred_vehicle[:, p_idx])
    
            for curr_label in next_labels:
                if curr_label not in label2idx:
                    continue
                c_idx = label2idx[curr_label]
    
                true_prev_end = ends(y_true_vehicle[:, p_idx])
                true_curr_start = starts(y_true_vehicle[:, c_idx])
                pred_curr_start = starts(y_pred_vehicle[:, c_idx])
    
                # build predicted pairs in vehicle: (pred_prev_end -> pred_curr_start) where curr==prev or prev+1
                pred_pairs_curr = []
                if len(pred_prev_end) > 0 and len(pred_curr_start) > 0:
                    for p in pred_prev_end:
                        matches = pred_curr_start[(pred_curr_start == p) | (pred_curr_start == p + allow_curr_offset)]
                        if len(matches) > 0:
                            pred_pairs_curr.extend(list(matches))  # collect predicted curr_starts
    
                # For each true prev_end that actually has a following true curr_start (0 or +1)
                for te in true_prev_end:
                    if not (np.any(true_curr_start == te) or np.any(true_curr_start == te + 1)):
                        continue  # not a true prev->curr instance
    
                    transition_support[(prev_label, curr_label)] += 1
    
                    if len(pred_pairs_curr) == 0:
                        # no predicted dependent pairs anywhere in this vehicle -> missed
                        missed_transitions[(prev_label, curr_label)] += 1
                        continue
    
                    # choose the true curr_start for this instance (prefer te, else te+1)
                    true_cs_cands = true_curr_start[(true_curr_start == te) | (true_curr_start == te + 1)]
                    if len(true_cs_cands) > 0:
                        true_cs = int(true_cs_cands[0])
                    else:
                        # fallback: nearest true curr start
                        true_cs = int(true_curr_start[np.argmin(np.abs(true_curr_start - te))]) if len(true_curr_start) > 0 else None
    
                    if true_cs is None:
                        missed_transitions[(prev_label, curr_label)] += 1
                        continue
    
                    # compute distance to nearest predicted curr_start among vehicle's predicted pairs
                    pred_cs_arr = np.array(pred_pairs_curr)
                    best_dist = np.min(np.abs(pred_cs_arr - true_cs)) / vehicle_len 
                    transition_results[(prev_label, curr_label)].append(best_dist)
    
    # Final per-pair MABE (mean of per-instance distances)
    transition_summary = {
        pair: float(np.mean(dists)) if len(dists) > 0 else 0
        for pair, dists in transition_results.items()
    }

    # compute missed rates and print results
    print("\nBoundary metrics per class (averaged across vehicles):")
    for lab in labels:
        mabe = boundary_summary.get(lab, np.nan)
        if np.isnan(mabe):
            print(f"{lab}: MABE = n/a")
        else:
            print(f"{lab}: MABE = {mabe:.2f}")
    print(f"\nAverage MABE across all classes: {avg_mabe:.4f}\n")

    print("Missed Scenario Detection Rates (% of vehicles where class was present but never predicted):")
    for lab in labels:
        pct = missed_percent_vehicle.get(lab, np.nan)
        if np.isnan(pct):
            print(f"{lab}: n/a")
        else:
            print(f"{lab}: {pct:.2f}% missed (support: {total_support_vehicle.get(lab,0)})")
    avg_miss_rate = float(np.nanmean([v for v in missed_percent_vehicle.values() if not np.isnan(v)])) if any(not np.isnan(v) for v in missed_percent_vehicle.values()) else np.nan
    print(f"\nAverage Miss Rate Across All Classes (per vehicle): {avg_miss_rate:.2f}%\n")

    # overall stats
    avg_transition_mabe = float(np.nanmean([v for v in transition_summary.values() if not np.isnan(v)])) if len(transition_summary) > 0 else np.nan
    total_transitions = sum(transition_support.values())
    total_missed = sum(missed_transitions.values())
    overall_miss_rate = (total_missed / total_transitions * 100) if total_transitions > 0 else 0.0

    # --- 5) Summarize transition metrics ---
    print("\nTransition-based MABE (evaluated per vehicle):")
    for prev_label, next_labels in dependent_pairs.items():
        for curr_label in next_labels:
            pair = (prev_label, curr_label)
            total = transition_support.get(pair, 0)
            if total == 0:
                continue  # skip transitions that never occurred in ground truth
    
            missed = missed_transitions.get(pair, 0)
            mabe_vals = transition_results.get(pair, [])
            mean_mabe = np.mean(mabe_vals) if len(mabe_vals) > 0 else np.nan
            missed_rate = (missed / total * 100) if total > 0 else 0.0
    
            print(f"{prev_label} -> {curr_label}: "
                  f"MABE = {mean_mabe:.2f} | Support = {total} | "
                  f"Missed = {missed} ({missed_rate:.1f}%)")
    if any(v > 0 for v in transition_support.values()):
        avg_mabe = np.nanmean([np.mean(v) for v in transition_results.values() if len(v) > 0])
        total_transitions = sum(transition_support.values())
        total_missed = sum(missed_transitions.values())
        avg_miss_rate = (total_missed / total_transitions * 100) if total_transitions > 0 else 0
        print(f"\nAverage Transition MABE Across All Pairs: {avg_mabe:.4f}")
        print(f"Overall Missed Transition Detection Rate (per vehicle): {avg_miss_rate:.2f}%")
    else:
        print("\n⚠️ No valid transitions detected in the test set.")

    # Return structured results
    return {
        "boundary_summary": boundary_summary,
        "avg_mabe": avg_mabe,
        "missed_percent_vehicle": missed_percent_vehicle,
        "avg_miss_rate_vehicle": avg_miss_rate if 'avg_miss_rate' in locals() else avg_miss_rate,
        "support_counts_vehicle": {lab: total_support_vehicle.get(lab, 0) for lab in labels},
        "missed_counts_vehicle": {lab: missed_counts_vehicle.get(lab, 0) for lab in labels},
        "transition_summary": transition_summary,
        "transition_support": dict(transition_support),
        "missed_transitions": dict(missed_transitions),
        "avg_transition_mabe": avg_transition_mabe,
        "overall_miss_rate": overall_miss_rate
    }

# -------------------------------
# Usage (plug your variables):
# -------------------------------
result = pretty_transition_report(
    y_sequences=y_sequences_new,
    y_pred_proba=y_pred_proba_test,
    doc_ids=doc_ids_new,
    veh_ids=veh_ids_new,
    labels=labels,
    best_thresholds=best_thresholds,
    y_pred_bin=y_pred_bin_seq,            # if you already have binary preds; else set None and function will threshold
    dependent_pairs=dependent_pairs
)



Boundary metrics per class (averaged across vehicles):
Cut-in in front of ego vehicle: MABE = 0.22
Cut-out in front of ego vehicle: MABE = 0.14
Ego merging into an occupied lane: MABE = 0.44
Ego vehicle approaching slower lead vehicle: MABE = 0.19
Ego vehicle driving in lane without lead vehicle: MABE = 0.17
Ego vehicle overtaking vehicle in adjacent lane: MABE = 0.12
Ego vehicle performing lane change: MABE = 0.07
Ego vehicle performing lane change with vehicle behind: MABE = 0.10
Lead vehicle accelerating: MABE = 0.21
Lead vehicle cruising: MABE = 0.18
Lead vehicle decelerating: MABE = 0.21
Vehicle overtaking ego vehicle: MABE = 0.09

Average MABE across all classes: 0.1787

Missed Scenario Detection Rates (% of vehicles where class was present but never predicted):
Cut-in in front of ego vehicle: 90.00% missed (support: 130)
Cut-out in front of ego vehicle: 91.94% missed (support: 124)
Ego merging into an occupied lane: 95.00% missed (support: 20)
Ego vehicle approaching slower lea