# Convert VidHOI baseline output to our format

In [None]:
import json
from pathlib import Path
from tqdm import tqdm

vidhoi_output_path = Path("../../VidHOI/output/SLOWFAST_32x2_R50_SHORT_SCRATCH_EVAL_GT_trajectory-toipool-spa_conf/all_results_vidor_checkpoint_epoch_00020.pyth_proposal_less-168-examples.json")

output_path = Path("../../runs/sttran_gaze_vidhoi/vidhoi_baseline/eval/all_results.json")


In [None]:
with vidhoi_output_path.open() as f:
    vidhoi_results = json.load(f)

print(vidhoi_results[0])


In [None]:
all_results = []

for vidhoi_result in tqdm(vidhoi_results):
    bboxes = [b[1:] for b in vidhoi_result["proposal_boxes"]]
    pred_labels = [int(l[1]) for l in vidhoi_result["proposal_classes"]]
    confidences = [c[1] for c in vidhoi_result["proposal_scores"]]
    pair_idxes = vidhoi_result["preds_bbox_pair_ids"]
    interaction_distribution = vidhoi_result["preds_score"]
    bboxes_gt = [bg[1:] for bg in vidhoi_result["gt_boxes"]]
    labels_gt = [int(lg[1]) for lg in vidhoi_result["gt_obj_classes"]]
    ids_gt = [i for i in range(len(bboxes_gt))]
    pair_idxes_gt = vidhoi_result["gt_bbox_pair_ids"]
    interactions_gt = vidhoi_result["gt_action_labels"]

    result = {
        "bboxes": bboxes,  # detected bboxes, [x1, y1, x2, y2]
        "pred_labels": pred_labels,  # detected labels
        "confidences": confidences,  # detection confidences
        "pair_idxes": pair_idxes,  # all detected pairs
        "interaction_distribution": interaction_distribution,
        "bboxes_gt": bboxes_gt,  # ground-truth object bboxes
        "labels_gt": labels_gt,  # ground-truth object labels
        "ids_gt": ids_gt,  # ground-truth ids, important for anticipation
        "pair_idxes_gt": pair_idxes_gt,  # gt pair idxes
        "interactions_gt": interactions_gt,  # gt interactions
    }

    all_results.append(result)

In [None]:
with output_path.open("w") as out:
    json.dump(all_results, out)

# Examine which 168 frames to delete

In [None]:
import json
import torch
from pathlib import Path

vidhoi_168_path = Path("/mnt/DATA/datasets/VidOR/VidHOI_annotation/val_instances_predictions_train_small_vidor_with_pseudo_labels.pth")
vidhoi_det_168_path = Path("/mnt/DATA/datasets/VidOR/VidHOI_annotation/det_val_frame_annots.json")
vidhoi_val_annotation_path = Path("/mnt/DATA/datasets/VidOR/VidHOI_annotation/val_frame_annots.json")

pseudo_detections = torch.load(str(vidhoi_168_path))
with vidhoi_det_168_path.open() as f:
    all_detections = json.load(f)
with vidhoi_val_annotation_path.open() as f:
    annotations = json.load(f)



In [None]:
print(len(all_detections))
print(len(pseudo_detections))
print(len(annotations))


In [None]:
set1 = [det["image_id"] for det in pseudo_detections]
set1 = set(set1)
set2 = all_detections.keys()
set2 = set(set2)

print(len(set1))
print(len(set2))

frames_to_removed = set1 - set2 
print(len(frames_to_removed))
print(frames_to_removed)


In [None]:
to_delete = set()
for anno in annotations:
    middle_frame_timestamp = anno['middle_frame_timestamp'] + 1
    image_id_middle = f"{anno['video_folder']}/{anno['video_id']}_{middle_frame_timestamp:06d}"
    if image_id_middle in frames_to_removed:
        image_id = f"{anno['video_folder']}/{anno['video_id']}_{anno['frame_id']}"
        to_delete.add(image_id)

print(len(to_delete))
print(to_delete)

output_path = Path("../vidhoi_related/168_frames_to_remove.json")
with output_path.open("w") as f:
    json.dump(list(to_delete), f)
    