In [1]:
import pandas as pd

## Explore COCO HRNet person detections

COCO bounding box annotations are not normalized and in the form:

```json
[x_min, y_min, width, height]
```

In [2]:
val_path = "data/coco2017/COCO_val2017_detections_AP_H_56_person.json"
val_boxes = pd.read_json(val_path)
display(val_boxes.head(5))
sample_bbox = val_boxes.head().iloc[0]["bbox"]
print(f"Sample bbox: {sample_bbox}")

Unnamed: 0,bbox,category_id,image_id,score
0,"[249.8199079291458, 175.21093805640606, 74.004...",1,532481,0.999274
1,"[284.07836701140275, 370.43747059580454, 13.84...",1,532481,0.983319
2,"[244.05099093550018, 170.61324112294565, 74.56...",1,532481,0.471152
3,"[285.7852415208732, 370.21400193144893, 10.915...",1,532481,0.427866
4,"[254.66838783379924, 177.22587387050734, 65.52...",1,532481,0.212189


Sample bbox: [249.8199079291458, 175.21093805640606, 74.00419360691592, 55.626325589288854]


## Transform YOLO Detections to same COCO HRNet format

YOLO assigns an independent .txt file for each image, where each line contains a different detection. The format is the following:

```txt
class_id, x_centre, y_centre, width, height, score
```

Following the class id assignation of COCO, the class 0 belongs to 'person', which are the detections we are interested in. Bounding box coordinates are further normalized with respec to to the image size

In [3]:
from src.utils import load_img_info

test_anns = load_img_info("data/thermal_pose/coco_fmt/dataset/annotations/thermalPose_test.json", "test", rm_extension=True)
val_anns = load_img_info("data/thermal_pose/coco_fmt/dataset/annotations/thermalPose_val.json", "val", rm_extension = True)
merged_anns = pd.concat([val_anns,test_anns])
merged_anns.sample(5)

Unnamed: 0,file_name,height,width,image_id,split
36,thermal_11_23_40,480,640,36,test
18,thermal_10_9_1,480,640,18,test
70,FLIR_06170,146,157,70,val
193,FLIR_01825,259,395,193,val
101,thermal_11_9_29,480,640,101,test


Load Yolov7 detections into a dataframe. This data will be then crossed with previous annotations

In [11]:
import glob
import os

PRETRAIN = "gray"

def load_yolo_det(det_dir, split):
    det_files = glob.glob(os.path.join(det_dir,"*.txt"))
    data = []
    for file in det_files:
        filename = file.split("/")[-1].split(".")[0]
        det_df = pd.read_csv(file, sep=" ", header=None, names=["class_id","x_centre","y_centre","width","height"])
        det_df["file_name"] = filename
        data.append(det_df)
    data = pd.concat(data)
    data["split"] = split
    # we filter to person detections only
    relevant_data = data[data["class_id"] == 0]
    return relevant_data

val_det =  load_yolo_det(f"data/thermal_pose/yolo_fmt/detections/{PRETRAIN}_pretrain/val/labels","val")
test_det = load_yolo_det(f"data/thermal_pose/yolo_fmt/detections/{PRETRAIN}_pretrain/test/labels","test")
merged_det = pd.concat([val_det, test_det])
merged_det.sample(5)

Unnamed: 0,class_id,x_centre,y_centre,width,height,file_name,split
2,0,0.814062,0.367188,0.3625,0.734375,IMG_0413,val
3,0,0.583984,0.525,0.353906,0.751042,IMG_2694,val
0,0,0.691016,0.518359,0.202344,0.534115,thermal_10_55_55,test
3,0,0.54918,0.51845,0.197001,0.314576,FLIR_01868,val
6,0,0.226416,0.650651,0.360449,0.621615,thermal_10_56_32,test


We now cross information to associate image id to each detection 

In [12]:
crossed_data = pd.merge(merged_anns, merged_det,how='inner',on = ["file_name","split"], suffixes=('_ann','_det'))
crossed_data.sample(5)

Unnamed: 0,file_name,height_ann,width_ann,image_id,split,class_id,x_centre,y_centre,width_det,height_det
1324,FLIR_08491,179,210,173,val,0,0.418761,0.26445,0.42228,0.502294
652,00101,480,640,78,val,0,0.603906,0.550651,0.370313,0.686198
649,00101,480,640,78,val,0,0.653906,0.551172,0.225,0.614323
737,FLIR_03130,354,499,91,val,0,0.497687,0.073376,0.04219,0.118117
1892,thermal_11_37_30,480,640,39,test,0,0.743359,0.5,0.513281,1.0


In [13]:
def yolo_to_coco(data_row):
    bbox_width = round(data_row["width_det"]*data_row["width_ann"],6)
    bbox_height = round(data_row["height_det"]*data_row["height_ann"],6)
    bbox_x1 = round(data_row["x_centre"]*data_row["width_ann"] - bbox_width/2,6)
    bbox_y1 = round(data_row["y_centre"]*data_row["height_ann"] - bbox_height/2,6)
    return [bbox_x1, bbox_y1, bbox_width, bbox_height]

clean_det_data = crossed_data.copy()
clean_det_data["bbox"] = clean_det_data.apply(lambda data_row: yolo_to_coco(data_row), axis=1)
# map from 0 to 1 class id
clean_det_data["class_id"] = 1
clean_det_data = clean_det_data.rename(columns = {"class_id":"category_id"})
relevant_cols = ["bbox","category_id","image_id", "file_name", "split"]
clean_det_data = clean_det_data[relevant_cols]
clean_det_data.head()

Unnamed: 0,bbox,category_id,image_id,file_name,split
0,"[116.900096, 58.099968, 130.699776, 393.099776]",1,0,frame_04126,val
1,"[378.4, 101.800192, 122.400256, 367.799808]",1,0,frame_04126,val
2,"[90.500096, 55.699712, 63.399936, 227.10016]",1,0,frame_04126,val
3,"[85.74976, 52.09984, 112.65024, 245.100032]",1,0,frame_04126,val
4,"[90.400256, 57.099776, 62.199808, 142.900224]",1,0,frame_04126,val


Finally, save annotations to disk

In [None]:
import os
import json

def save_clean_data(clean_data: pd.DataFrame, split):
    output_dir = f"data/thermal_pose/coco_fmt/detections/{PRETRAIN}_pretrain"
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    subset_data = clean_data[clean_data["split"] == split].drop(columns="split")
    output_file = os.path.join(output_dir, f"thermalPose_yolov7_person_detections_{split}.json")
    records = subset_data.to_dict('records')
    with open(output_file,'w') as out:
        json.dump(records, out, indent=4)

save_clean_data(clean_det_data, "val")
save_clean_data(clean_det_data, "test")