In [1]:
import pickle
import cv2
import os
import pandas as pd
import numpy as np
import torch
from PIL import Image
from tqdm import tqdm
from collections import defaultdict

from utils.hazard import Hazard, get_area, select_most_common

with open("resources/annotations_public.pkl", "rb") as f:
    anns = pickle.load(f)

In [8]:
# Convert to Video -> Track -> Frame hierarchy.
data = defaultdict(lambda: defaultdict(dict))
for video, video_data in anns.items():
    for frame, frame_data in video_data.items():
        for obj in frame_data["challenge_object"]:
            data[video][obj["track_id"]][frame] = {
                "bbox": obj["bbox"],
                "area": get_area(obj["bbox"]),
                "frame": frame,
                "video": video,
            }
    data[video] = dict(data[video])
data = dict(data)


# Create hazard objects
hazards = defaultdict(dict)
for video, video_data in data.items():
    for track, track_data in video_data.items():
        hazards[video, track] = Hazard(video, track, track_data)


# Add captions to hazards
cap_largest = torch.load(
    "resources/molmo-captions/molmo-obj-cap-largest.pkl", weights_only=False
)
for (video, track), hazard in hazards.items():
    caps = [i.split() for i in cap_largest[video, track]]
    caps = [item for column in zip(*caps) for item in column]  # Columnwise flatten
    caps = np.array([i.lower() for i in caps])
    caps_most_common = select_most_common(caps, k=5)
    hazard.caption_list = caps_most_common

hazards_remap = defaultdict(dict)
for (video, track), hazard in hazards.items():
    hazards_remap[video][track] = hazard
hazards_remap = dict(hazards_remap)

# Parse CIFAR Data
obj_cls = torch.load("resources/cifar-classification/all-dense.pkl", weights_only=False)
cls_data = defaultdict(dict)
for video, video_data in obj_cls.items():
    for frame, frame_data in video_data.items():
        for i in frame_data:
            cls_data[video, i["track_id"]][frame] = {
                "top10_probs": i["top10_probs"],
                "top10_class": i["top10_class"],
            }
cls_data = dict(cls_data)


# Add CIFAR class data to hazards
for (video, track), hazard in hazards.items():
    assert cls_data[video, track].keys() == hazards[video, track].frames.keys()
    for frame, frame_data in hazards[video, track].frames.items():
        frame_data["probs10"] = cls_data[video, track][frame]["top10_class"]
        frame_data["class10"] = cls_data[video, track][frame]["top10_probs"]

### Submission

In [9]:
data_sub = defaultdict(list)
for video, video_hazard in tqdm(hazards_remap.items()):
    for track, hazard in video_hazard.items():
        frame_ids = list(hazard.frames.keys())
        if hazard.dangerous:
            for frame in frame_ids:
                data_sub[f"{video}_{frame}"].append(
                    {"track": int(track), "name": hazard.caption}
                )
data_sub = dict(data_sub)


# Create submission
df_template = pd.read_csv("./submissions/results_driverchange_ensemble.csv")
df_sub = []
for i, row in tqdm(df_template.iterrows()):
    row_dict = {"ID": row["ID"], "Driver_State_Changed": row["Driver_State_Changed"]}
    if row["ID"] in data_sub:
        for i, data in enumerate(data_sub[row["ID"]]):
            row_dict[f"Hazard_Track_{i}"] = str(data["track"])
            row_dict[f"Hazard_Name_{i}"] = data["name"]

    df_sub.append(row_dict)
df_sub = pd.DataFrame(df_sub).fillna(" ")
df_sub.to_csv("./submissions/final_submission-v0.1.csv", index=0)

100%|██████████| 200/200 [00:17<00:00, 11.40it/s]
55770it [00:02, 25410.88it/s]
