# Step 1: Parse input data into readable datasets

In [3]:
import os
import pandas as pd
import torch

def load_vehicle_annotations(folder_path, save_encoded=False, filepath=None):
    """
    Reads KITTI-style label text files and extracts:
    xmin, xmax, ymin, ymax, label, and distance (z in camera coordinates).

    Returns both a DataFrame and PyTorch tensors with label encoding.
    """
    records = []

    # Collect all .txt files in numerical order
    files = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')])

    for file in files:
        file_id = os.path.splitext(file)[0]  # e.g. "000000"
        file_path = os.path.join(folder_path, file)

        with open(file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if not parts:
                    continue

                label = parts[0]
                if label == 'DontCare':
                    continue  # skip unlabeled/ignored regions

                # bounding box coords
                xmin = float(parts[4])
                ymin = float(parts[5])
                xmax = float(parts[6])
                ymax = float(parts[7])

                # z-coordinate = distance from camera
                distance = float(parts[13])

                records.append({
                    'file_id': file_id,
                    'label': label,
                    'xmin': xmin,
                    'ymin': ymin,
                    'xmax': xmax,
                    'ymax': ymax,
                    'distance': distance
                })

    df = pd.DataFrame(records)

    # --- Encode labels as integers ---
    label_to_idx = {label: idx for idx, label in enumerate(sorted(df['label'].unique()))}
    df['label_id'] = df['label'].map(label_to_idx)

    # --- Convert to PyTorch tensors ---
    features = torch.tensor(
        df[['xmin', 'xmax', 'ymin', 'ymax', 'distance']].values, dtype=torch.float32
    )
    labels = torch.tensor(df['label_id'].values, dtype=torch.long)

    if save_encoded:
        if filepath == None:
            torch.save({
                "features": features,
                "labels": labels,
                "label_map": label_to_idx
            }, os.path.join(folder_path, "vehicle_dataset.pt"))
            print(f"Saved encoded dataset to {folder_path}/vehicle_dataset.pt")
        else:
            torch.save({
                "features": features,
                "labels": labels,
                "label_map": label_to_idx
            }, filepath)
            print(f"Saved encoded dataset to {filepath}")

    return df, features, labels, label_to_idx


In [5]:

df, X, y, label_map = load_vehicle_annotations("dataset/training/label_2", save_encoded=True, filepath="dataset/vehicle_dataset.pt")

print(df.head())
print(label_map)
print(X.shape, y.shape)


Saved encoded dataset to dataset/vehicle_dataset.pt
  file_id       label    xmin    ymin    xmax    ymax  distance  label_id
0  000000  Pedestrian  712.40  143.00  810.73  307.92      8.41         3
1  000001       Truck  599.41  156.40  629.75  189.25     69.44         6
2  000001         Car  387.63  181.54  423.81  203.12     58.49         0
3  000001     Cyclist  676.60  163.95  688.98  193.93     45.84         1
4  000002        Misc  804.79  167.34  995.43  327.94      8.55         2
{'Car': 0, 'Cyclist': 1, 'Misc': 2, 'Pedestrian': 3, 'Person_sitting': 4, 'Tram': 5, 'Truck': 6, 'Van': 7}
torch.Size([40570, 5]) torch.Size([40570])
