# Step 1: Parse input data into readable datasets

In [None]:
import os
import pandas as pd
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader, random_split

In [None]:

def load_vehicle_annotations(folder_path, save_encoded=False, filepath=None):
    """
    Reads KITTI-style label text files and extracts:
    xmin, xmax, ymin, ymax, label, and distance (z in camera coordinates).

    Returns both a DataFrame and PyTorch tensors with one-hot encoded labels.
    """
    records = []

    # Collect all .txt files in numerical order
    files = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')])

    for file in files:
        file_id = os.path.splitext(file)[0]  # e.g. "000000"
        file_path = os.path.join(folder_path, file)

        with open(file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if not parts:
                    continue

                label = parts[0]
                if label == 'DontCare':
                    continue  # skip unlabeled/ignored regions

                # bounding box coords
                xmin = float(parts[4])
                ymin = float(parts[5])
                xmax = float(parts[6])
                ymax = float(parts[7])

                # z-coordinate = distance from camera
                distance = float(parts[13])

                records.append({
                    'file_id': file_id,
                    'label': label,
                    'xmin': xmin,
                    'ymin': ymin,
                    'xmax': xmax,
                    'ymax': ymax,
                    'distance': distance
                })

    df = pd.DataFrame(records)

    # --- One-hot encode labels ---
    unique_labels = sorted(df['label'].unique())
    label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
    df['label_id'] = df['label'].map(label_to_idx)
    
    # Convert to integer indices first
    label_indices = torch.tensor(df['label_id'].values, dtype=torch.long)
    # Convert to one-hot encoding
    num_classes = len(unique_labels)
    labels_onehot = F.one_hot(label_indices, num_classes=num_classes).float()

    # --- Convert features to PyTorch tensors ---
    features = torch.tensor(
        df[['xmin', 'xmax', 'ymin', 'ymax', 'distance']].values, dtype=torch.float32
    )

    if save_encoded:
        if filepath == None:
            torch.save({
                "features": features,
                "labels": labels_onehot,
                "label_map": label_to_idx
            }, os.path.join(folder_path, "vehicle_dataset.pt"))
            print(f"Saved encoded dataset to {folder_path}/vehicle_dataset.pt")
        else:
            torch.save({
                "features": features,
                "labels": labels_onehot,
                "label_map": label_to_idx
            }, filepath)
            print(f"Saved encoded dataset to {filepath}")

    return df, features, labels_onehot, label_to_idx


In [7]:

df, X, y, label_map = load_vehicle_annotations("dataset/training/label_2", save_encoded=True, filepath="dataset/vehicle_dataset.pt")

print(df.head())
print(label_map)
print(X.shape, y.shape)


Saved encoded dataset to dataset/vehicle_dataset.pt
  file_id       label    xmin    ymin    xmax    ymax  distance  label_id
0  000000  Pedestrian  712.40  143.00  810.73  307.92      8.41         3
1  000001       Truck  599.41  156.40  629.75  189.25     69.44         6
2  000001         Car  387.63  181.54  423.81  203.12     58.49         0
3  000001     Cyclist  676.60  163.95  688.98  193.93     45.84         1
4  000002        Misc  804.79  167.34  995.43  327.94      8.55         2
{'Car': 0, 'Cyclist': 1, 'Misc': 2, 'Pedestrian': 3, 'Person_sitting': 4, 'Tram': 5, 'Truck': 6, 'Van': 7}
torch.Size([40570, 5]) torch.Size([40570, 8])


## Basic NN implementation

In [None]:

class DistanceRegressorOneHot(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        """
        input_dim: total number of features including one-hot label
        """
        super(DistanceRegressorOneHot, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)  # output: distance
        )

    def forward(self, x):
        return self.model(x)

# --- Helper function to one-hot encode labels ---
def one_hot_encode(labels, num_classes):
    """
    labels: torch tensor of shape [N] with integer label IDs
    returns: tensor of shape [N, num_classes] with one-hot encoding
    """
    return torch.nn.functional.one_hot(labels, num_classes=num_classes).float()

# --- Training function ---
def train_distance_model_onehot(features, labels, num_classes, epochs=50, batch_size=64, lr=1e-3):
    """
    features: tensor of shape [N, 4] -> xmin, xmax, ymin, ymax
    labels: tensor of shape [N] -> label_id as integer
    num_classes: total number of vehicle types
    """
    # One-hot encode labels
    labels_onehot = one_hot_encode(labels, num_classes)
    
    # Concatenate bbox features + one-hot label
    X = torch.cat([features, labels_onehot], dim=1)
    y = labels.view(-1, 1)  # distance targets

    # Dataset and split
    dataset = TensorDataset(X, y)
    n_train = int(0.8 * len(dataset))
    n_val = len(dataset) - n_train
    train_ds, val_ds = random_split(dataset, [n_train, n_val])

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=batch_size)

    # Initialize model
    model = DistanceRegressorOneHot(input_dim=X.shape[1])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * X_batch.size(0)
        train_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                preds = model(X_batch)
                val_loss += criterion(preds, y_batch).item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)

        print(f"Epoch {epoch+1:03d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

    return model
