In [None]:
import pandas as pd
import numpy as np
from ensemble_boxes import *


def coor_normalize(x,y):
    return [i/y for i in x]
    
def get_all(sub):
    sub["class"] = sub.PredictionString.apply(lambda x: x.split(" ")[::6]).apply(lambda x: [int(i) for i in x])
    sub["scores"] = sub.PredictionString.apply(lambda x: x.split(" ")[1::6]).apply(lambda x: [float(i) for i in x])
    sub["x_min"] = sub.PredictionString.apply(lambda x: x.split(" ")[2::6]).apply(lambda x: [float(i) for i in x])
    sub["y_min"] = sub.PredictionString.apply(lambda x: x.split(" ")[3::6]).apply(lambda x: [float(i) for i in x])
    sub["x_max"] = sub.PredictionString.apply(lambda x: x.split(" ")[4::6]).apply(lambda x: [float(i) for i in x])
    sub["y_max"] = sub.PredictionString.apply(lambda x: x.split(" ")[5::6]).apply(lambda x: [float(i) for i in x])
    
    sub["x_min"] = sub.apply(lambda row: coor_normalize(row['x_min'],10000), axis=1)
    sub["x_max"] = sub.apply(lambda row: coor_normalize(row['x_max'],10000), axis=1)
    
    sub["y_min"] = sub.apply(lambda row: coor_normalize(row['y_min'],10000), axis=1)
    sub["y_max"] = sub.apply(lambda row: coor_normalize(row['y_max'],10000), axis=1)
    
    return sub

def get_boxes(sub):
    box_list = []
    box_df = sub[["image_id","class","scores"]].copy()
    for i in tqdm(sub.index):
        temp = np.stack((sub.loc[i,"x_min"],sub.loc[i,"y_min"],sub.loc[i,"x_max"],sub.loc[i,"y_max"]),axis=1)
        temp = [k.tolist() for k in temp]
        box_list.append(temp)
    return box_list

def lets_ensemble(val_det,val_det2,
                  check_classes=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],
                  is_nms=True,
                  is_wbf=True,
                 iou_thr = 0.6,
                  skip_box_thr = 0.000,
    sigma = 0.1,
    weights = [2,1],
                  nms_iou=0.35
                 ):
    
    val_det = get_all(val_det)
    val_box = get_boxes(val_det)

    val_det2 = get_all(val_det2)
    val_box2 = get_boxes(val_det2)


    blend_df = val_det[["image_id"]].copy()
    blend_df["PredictionString"] = ""
    blend_df.head()


    i = 1
    final_blend_list = []
    lengths_list = []
    change_count = 0
    changed_imdex_list = []
    for i in tqdm(val_det.index):


        filter_ = np.isin(np.array(val_det.loc[i,"class"]),check_classes)
        filter2_ = np.isin(np.array(val_det2.loc[i,"class"]),check_classes)

        labels_list = [np.array(val_det.loc[i,"class"])[filter_],np.array(val_det2.loc[i,"class"])[filter2_]]
        scores_list = [np.array(val_det.loc[i,"scores"])[filter_],np.array(val_det2.loc[i,"scores"])[filter2_]]
        boxes_list = [np.array(val_box[i])[filter_],np.array(val_box2[i])[filter2_]]

        if is_wbf:
            boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
        else:
            boxes, scores, labels = nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr)
        if is_nms:
            boxes, scores, labels = nms([boxes], [scores], [labels], weights=None, iou_thr=nms_iou)

        labels = np.concatenate((np.array(val_det.loc[i,"class"])[~filter_],labels))
        scores = np.concatenate((np.array(val_det.loc[i,"scores"])[~filter_],scores))
        boxes = np.concatenate((np.array(val_box[i])[~filter_],boxes))

        sorted_index_list = np.argsort(scores)[::-1]
        labels = labels[np.argsort(scores)[::-1]] 
        boxes = boxes[np.argsort(scores)[::-1]] 
        scores = scores[np.argsort(scores)[::-1]] 



        
        # Rescaling boxes
        boxes[:,0] = boxes[:,0] * 10000
        boxes[:,2] = boxes[:,2] * 10000

        boxes[:,1] = boxes[:,1] * 10000
        boxes[:,3] = boxes[:,3] * 10000

        scores = np.clip(scores,0,1)

        len_ = len(labels)
        lengths_list.append(len_)
        temp = []
        for j in range(len_):
            temp.append(str(int(labels[j])))
            temp.append(str(scores[j]))
            temp.append(" ".join([str(k) for k in boxes[j]]))


        final_string = " ".join(temp)
        first_string = val_det.loc[i,"PredictionString"]
        blend_df.loc[i,"PredictionString"] = final_string
        if first_string != final_string: #Check
            change_count+=1
            changed_imdex_list.append(i)
         
    print(len_)
    return blend_df




In [None]:
# Example 
# blend_df = lets_ensemble(file_1,file_2,check_classes=[1,4,5],iou_thr = 0.4,is_nms=False) # This means, you are only ensembling boxes 
# having classes 1,4 and 5. Rest of the boxes will be kept as they are in file_1.
