In [1]:
import pandas as pd

## Explore COCO HRNet person detections

COCO bounding box annotations are not normalized and in the form:

```json
[x_min, y_min, width, height]
```

In [6]:
val_path = "data/coco2017/COCO_val2017_detections_AP_H_56_person.json"
val_boxes = pd.read_json(val_path)
display(val_boxes.head(5))
sample_bbox = val_boxes.head().iloc[0]["bbox"]
print(f"Sample bbox: {sample_bbox}")

Unnamed: 0,bbox,category_id,image_id,score
0,"[249.8199079291458, 175.21093805640606, 74.004...",1,532481,0.999274
1,"[284.07836701140275, 370.43747059580454, 13.84...",1,532481,0.983319
2,"[244.05099093550018, 170.61324112294565, 74.56...",1,532481,0.471152
3,"[285.7852415208732, 370.21400193144893, 10.915...",1,532481,0.427866
4,"[254.66838783379924, 177.22587387050734, 65.52...",1,532481,0.212189


Sample bbox: [249.8199079291458, 175.21093805640606, 74.00419360691592, 55.626325589288854]


## Transform YOLO Detections to same COCO HRNet format

YOLO assigns an independent .txt file for each image, where each line contains a different detection. The format is the following:

```txt
class_id, x_centre, y_centre, width, height, score
```

Following the class id assignation of COCO, the class 0 belongs to 'person', which are the detections we are interested in. Bounding box coordinates are further normalized with respecto to the image size

In [47]:
import json

def load_ann_file(ann_path):
    with open(ann_path) as file:
        data = json.load(file)
    return data

def load_anns(ann_path, split):
    anns = load_ann_file(ann_path)
    df = pd.DataFrame(anns["images"])
    df["split"] = split
    df = df.rename(columns = {"id":"image_id"})
    df["file_name"] = df["file_name"].str.split(".").str[0]
    return df[["file_name","height","width","image_id","split"]]

test_anns = load_anns("data/thermal_pose/dataset/annotations/thermalPose_test.json", "test")
val_anns = load_anns("data/thermal_pose/dataset/annotations/thermalPose_val.json", "val")
train_anns = load_anns("data/thermal_pose/dataset/annotations/thermalPose_train.json", "train")
merged_anns = pd.concat([train_anns,val_anns,test_anns])
merged_anns.sample(5)

Unnamed: 0,file_name,height,width,image_id,split
243,FLIR_02103,359,343,243,train
98,thermal_10_3_29,480,640,98,test
108,02_00202,387,492,108,train
322,FLIR_04857,340,413,322,train
20,FLIR_02893,512,640,20,val


Load Yolov7 detections into a dataframe. This data will be then crossed with previous annotations

In [57]:
import glob
import os

def load_yolo_det(det_dir, split):
    det_files = glob.glob(os.path.join(det_dir,"*.txt"))
    data = []
    for file in det_files:
        filename = file.split("/")[-1].split(".")[0]
        det_df = pd.read_csv(file, sep=" ", header=None, names=["class_id","x_centre","y_centre","width","height", "score"])
        det_df["file_name"] = filename
        data.append(det_df)
    data = pd.concat(data)
    data["split"] = split
    # we filter to person detections only
    relevant_data = data[data["class_id"] == 0]
    return relevant_data

train_det = load_yolo_det("data/thermal_pose/yolo-detections/train/labels","train")
val_det =  load_yolo_det("data/thermal_pose/yolo-detections/val/labels","val")
test_det = load_yolo_det("data/thermal_pose/yolo-detections/test/labels","test")
merged_det = pd.concat([train_det,val_det, test_det])
merged_det.sample(5)

Unnamed: 0,class_id,x_centre,y_centre,width,height,score,file_name,split
1,0,0.4625,0.446875,0.128125,0.535417,0.388417,IMG_2629,train
0,0,0.3875,0.554237,0.253571,0.416949,0.886983,FLIR_05887,train
2,0,0.391406,0.505208,0.064062,0.310417,0.839487,thermal_10_6_44,test
5,0,0.635156,0.24707,0.064062,0.177734,0.429736,FLIR_03181,train
1,0,0.535156,0.577083,0.248437,0.675,0.500166,IMG_6804,train


We now cross information to associate image id to each detection 

In [58]:
crossed_data = pd.merge(merged_anns, merged_det,how='inner',on = ["file_name","split"], suffixes=('_ann','_det'))
crossed_data.sample(5)

Unnamed: 0,file_name,height_ann,width_ann,image_id,split,class_id,x_centre,y_centre,width_det,height_det,score
982,IMG_2177,480,640,172,val,0,0.842188,0.497917,0.309375,0.991667,0.364075
187,FLIR_03450,512,640,151,train,0,0.941406,0.535156,0.098437,0.328125,0.891741
1171,thermal_11_38_31,480,640,82,test,0,0.850781,0.620833,0.129688,0.3,0.877833
375,frame_00660,512,512,307,train,0,0.329102,0.34375,0.205078,0.503906,0.937869
766,FLIR_video_00004,340,508,5,val,0,0.952756,0.470588,0.043307,0.247059,0.864013


In [60]:
def yolo_to_coco(data_row):
    bbox_width = round(data_row["width_det"]*data_row["width_ann"],6)
    bbox_height = round(data_row["height_det"]*data_row["height_ann"],6)
    bbox_x1 = round(data_row["x_centre"]*data_row["width_ann"] - bbox_width/2,6)
    bbox_y1 = round(data_row["y_centre"]*data_row["height_ann"] - bbox_height/2,6)
    return [bbox_x1, bbox_y1, bbox_width, bbox_height]

clean_det_data = crossed_data.copy()
clean_det_data["bbox"] = clean_det_data.apply(lambda data_row: yolo_to_coco(data_row), axis=1)
# map from 0 to 1 class id
clean_det_data["class_id"] = 1
clean_det_data = clean_det_data.rename(columns = {"class_id":"category_id"})
relevant_cols = ["bbox","category_id","image_id", "score", "split"]
clean_det_data = clean_det_data[relevant_cols]
clean_det_data.head()

Unnamed: 0,bbox,category_id,image_id,score,split
0,"[18.00006, 18.0, 60.99996, 106.0]",1,1,0.468904,train
1,"[82.000116, 55.000064, 87.00003, 296.999936]",1,2,0.926724,train
2,"[25.000047, 38.000128, 59.00013, 202.999808]",1,2,0.940262,train
3,"[219.00024, 52.0, 77.999994, 242.999808]",1,2,0.9407,train
4,"[201.000105, 90.000012, 29.00001, 93.00004]",1,3,0.363022,train


Finally, save annotations to disk

In [66]:
import os
import json

def save_clean_data(clean_data: pd.DataFrame, split):
    output_dir = "data/thermal_pose/dataset/annotations"
    subset_data = clean_data[clean_data["split"] == split].drop(columns="split")
    output_file = os.path.join(output_dir, f"thermalPose_yolov7_person_detections_{split}.json")
    records = subset_data.to_dict('records')
    with open(output_file,'w') as out:
        json.dump(records, out, indent=4)

save_clean_data(clean_det_data,"train")
save_clean_data(clean_det_data, "val")
save_clean_data(clean_det_data, "test")