# Notebooks used to build the Ensemble

### Our Models:

- File: `"../input/submission-vbg/ds_tst_F1_noTH_noClean.csv"`
 - Sergio's Effdet_d2 Fold1: [notebook_v1](https://www.kaggle.com/socom20/effdet-inference)
 [ckpts](https://www.kaggle.com/socom20/vinbigdata-effdet-d2-f0f2-ckpts)
 - CSV file, Old version: [dataset](https://www.kaggle.com/socom20/effdet-inference-old)
 - CSV file, New version: [dataset](https://www.kaggle.com/socom20/effdet-inference)


- File: `"../input/submission-vbg/ds_tst_F2_noTH_noClean.csv"`
 - Sergio's first Effdet_d2 Fold2: [notebook_v1](https://www.kaggle.com/socom20/effdet-inference)
 [ckpts](https://www.kaggle.com/socom20/vinbigdata-effdet-d2-f0f2-ckpts)
 - CSV file, Old version: [dataset](https://www.kaggle.com/socom20/effdet-inference-old)
 - CSV file, New version: [dataset](https://www.kaggle.com/socom20/effdet-inference)


- File: `"../input/vbg-yolo-submission/Fold 1.csv"`
 - Mohammed's YOLOv5: [notebook](https://www.kaggle.com/morizin/vinbigdata-yolo-inference)
 [ckpt](https://www.kaggle.com/morizin/resnet50d-096-stage-2)
 - CSV file, Old version: [dataset_v2](https://www.kaggle.com/morizin/vbg-yolo-submission)
 - CSV file, New version: [Fold_2_cleaned](https://www.kaggle.com/morizin/vinbigdata-yolo-inference/output?select=Fold_2_cleaned.csv)


- File: `"../input/vbg-yolo-submission/Fold 4.csv"`
 - Mohammed's YOLOv5: [notebook](https://www.kaggle.com/morizin/vinbigdata-yolo-inference)
 [ckpt](https://www.kaggle.com/morizin/resnet50d-096-stage-2)
 - CSV file, Old version: [dataset_v2](https://www.kaggle.com/morizin/vbg-yolo-submission)
 - CSV file, New version: [Fold_5_cleaned](https://www.kaggle.com/morizin/vinbigdata-yolo-inference/output?select=Fold_5_cleaned.csv)
 

- File: `"../input/vbg-yolo-submission/submission (2).csv"`
 - Mohammed's YOLOv5 TTA: [notebook](https://www.kaggle.com/morizin/vinbigdata-yolo-inference)
 [ckpt](https://www.kaggle.com/morizin/resnet50d-096-stage-2)
 - CSV file, Old version: [dataset_v2](https://www.kaggle.com/morizin/vbg-yolo-submission)
 - CSV file, New version: [Fold_5_TTA](https://www.kaggle.com/morizin/vinbigdata-yolo-inference/output?select=Fold_5_TTA.csv)


- File: `"../input/mohammedyolov5/submission.csv"`
 - Mohammed's YOLOv5: [notebook_v2](https://www.kaggle.com/morizin/vinbigdata-cxr-ad-yolov5-14-class-infer-184dd1/data?scriptVersionId=57010094&select=submission.csv)
 [ckpt](https://www.kaggle.com/morizin/vinbigdata-cxr-ad-yolov5-14-class-train-4de71f?scriptVersionId=56346979)
 - CSV file: [datset_v1](https://www.kaggle.com/socom20/mohammedyolov5?select=submission.csv)
 
 

### Public Kernels:

- File: `"../input/yolov5-chest-512/submission.csv"`
 - nxhong93: [notebook v1](https://www.kaggle.com/nxhong93/yolov5-chest-512/data)


- File: `"../input/publickernel-vinbigdata-yolov5-16-class-version-1/submission.csv"`
 - duythanhng: [notebook v1](https://www.kaggle.com/duythanhng/vinbigdata-yolov5-16-class/output?scriptVersionId=56314815)


- File: `"../input/public-kernel-vinbigdata-detectron2-prediction-v9/results/20210110_train_all_500k_512/submission_det.csv"`
 - corochann: [notebook v9](https://www.kaggle.com/corochann/vinbigdata-detectron2-prediction?scriptVersionId=52564402)
 

- File: `"../input/vinbigdata-2-class-classifier-complete-pipeline/results/tmp_debug/test_pred.csv"`
 - corochann: [notebook v13](https://www.kaggle.com/corochann/vinbigdata-2-class-classifier-complete-pipeline/output)



# Imports

In [None]:
!pip install -U ensemble-boxes

In [None]:
import os, sys
from glob import glob
import copy
import shutil

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
from ensemble_boxes import *

# Cleaning Functions

In [None]:
def solve_bbox_problems(bbox_v, scores_v, labels_v):
    """ 
    Solves problems in the "ensemble-boxes" way 
    """
    
    to_remove = np.zeros(bbox_v.shape[0], dtype=np.bool)
    for i in range(bbox_v.shape[0]):
        x1, y1, x2, y2 = bbox_v[i]
        
        if x2 < x1:
#             warnings.warn('X2 < X1 value in box. Swap them.')
            x1, x2 = x2, x1
        if y2 < y1:
#             warnings.warn('Y2 < Y1 value in box. Swap them.')
            y1, y2 = y2, y1
        if x1 < 0:
#             warnings.warn('X1 < 0 in box. Set it to 0.')
            x1 = 0
        if x1 > 1:
#             warnings.warn('X1 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
            x1 = 1
        if x2 < 0:
#             warnings.warn('X2 < 0 in box. Set it to 0.')
            x2 = 0
        if x2 > 1:
#             warnings.warn('X2 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
            x2 = 1
        if y1 < 0:
# # #             warnings.warn('Y1 < 0 in box. Set it to 0.')
            y1 = 0
        if y1 > 1:
# #             warnings.warn('Y1 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
            y1 = 1
        if y2 < 0:
#             warnings.warn('Y2 < 0 in box. Set it to 0.')
            y2 = 0
        if y2 > 1:
#             warnings.warn('Y2 > 1 in box. Set it to 1. Check that you normalize boxes in [0, 1] range.')
            y2 = 1
        if (x2 - x1) * (y2 - y1) == 0.0:
#             warnings.warn("Zero area box skipped: {}.".format(box_part))
            to_remove[i] = True
    
        bbox_v[i] = x1, y1, x2, y2
    
    if to_remove.sum() > 0:
        # Hack to remove bboxes using min confidence th
        bbox_v[to_remove] = np.array([0.0, 0.0, 1.0, 1.0])
        scores_v[to_remove] = 0.0
        
    return bbox_v, scores_v, labels_v


def calc_iou(bb0, bb1):
    if (len(bb0.shape) == 2):
        bb0 = bb0.T
        
    if (len(bb1.shape) == 2):
        bb1 = bb1.T
        

    bb0_x0, bb0_y0, bb0_x1, bb0_y1 = bb0
    bb1_x0, bb1_y0, bb1_x1, bb1_y1 = bb1
    
    assert (bb0_x0 < bb0_x1).all()
    assert (bb0_y0 < bb0_y1).all()
    assert (bb1_x0 < bb1_x1).all()
    assert (bb1_y0 < bb1_y1).all()

    # determine the coordinates of the intersection rectangle
    x_left   = np.maximum(bb0_x0, bb1_x0)
    y_top    = np.maximum(bb0_y0, bb1_y0)
    x_right  = np.minimum(bb0_x1, bb1_x1)
    y_bottom = np.minimum(bb0_y1, bb1_y1)

#     if (x_right < x_left).all(axis=0) or (y_bottom < y_top).all(axis=0):
#         return np.zeros( out_dim )
    
    ret_mask = ~( (x_right < x_left) + (y_bottom < y_top) )

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # compute the area of both AABBs
    bb0_area = (bb0_x1 - bb0_x0) * (bb0_y1 - bb0_y0)
    bb1_area = (bb1_x1 - bb1_x0) * (bb1_y1 - bb1_y0)
    
    iou = intersection_area / (bb0_area + bb1_area - intersection_area)
    
    
    return iou * ret_mask


def merge_preds(bbox_v, p_det_v=None, mode='p_det_weight'):
    
    if p_det_v is None:
        p_det_v = np.ones(bbox_v.shape[0])
        
    if mode == 'p_det_weight' or mode == 'p_det_weight_pmean':
        typed_p_det_v = p_det_v.astype(bbox_v.dtype)
        p_v = ( typed_p_det_v / typed_p_det_v.sum() )[:,None]
        
        bbox = (bbox_v * p_v).sum(axis=0)
        p = p_det_v.mean()
    
    elif mode == 'p_det_weight_psum':
        typed_p_det_v = p_det_v.astype(bbox_v.dtype)
        p_v = ( typed_p_det_v / typed_p_det_v.sum() )[:,None]
        
        bbox = (bbox_v * p_v).sum(axis=0)
        p = p_det_v.sum()
    
    elif mode == 'median' or mode == 'median_pmean':
        bbox = np.median(bbox_v, axis=0)
        p = p_det_v.mean()

    elif mode == 'p_det_max':
        i_max = p_det_v.argmax()
        
        bbox = bbox_v[i_max]
        p    = p_det_v[i_max]
    
    elif mode == 'random':
        i_max = np.random.randint(0, p_det_v.shape[0])
        
        bbox = bbox_v[i_max]
        p    = p_det_v[i_max]
        
    else:
        raise Exception(f'Unknown mode "{mode}"')
        
        
    return bbox, p
    

def clean_predictions(preds_v, iou_th=0.1, mode='p_det_weight', consensus_level=1, n_models2ensemble=1):
    ret_preds_v = []
    for pred_d in preds_v:
        
        cls_v = pred_d['cls']
        
        if 'bbox' in pred_d.keys():
            bbox_key = 'bbox'
        else:
            bbox_key = 'bboxes'
            
        bbox_v = pred_d[bbox_key]
        
        if 'p_det' in pred_d.keys():
            ret_p_det = True
            p_det_v = pred_d['p_det']
        else:
            ret_p_det = False
            p_det_v = np.ones(pred_d['cls'].shape)
        
        
        if 'rad_id' in pred_d.keys():
            ret_rad_id = True
            rad_id_v = pred_d['rad_id']
        else:
            ret_rad_id = False


        if 'model_id' in pred_d.keys():
            model_id_v = pred_d['model_id']
        else:
            model_id_v = np.zeros(pred_d['cls'].shape, dtype=np.int)
            
            
        new_cls_v = []
        new_bbox_v = []
        new_p_det_v = []
        new_rad_id_v = []
        for i_c in np.unique(cls_v):
            f_c = (cls_v == i_c)
            
            n_c = f_c.sum()
            if n_c == 1:
                if consensus_level > 1 and i_c != -1:
                    continue
                    
                    
                if ret_rad_id:
                    if i_c == -1:
                        n_rads = rad_id_v.size
                        
                        if n_rads < consensus_level:
                            continue
                            
                        else:
                            if n_rads > 1:
                                new_rad_id_v.append( np.concatenate(rad_id_v, axis=-1) )
                            else:
                                new_rad_id_v.append( rad_id_v[f_c][0] )
                    else:
                        new_rad_id_v.append( rad_id_v[f_c][0] )
                    

                new_cls_v.append( i_c )
                new_bbox_v.append( bbox_v[f_c][0] )
                new_p_det_v.append( p_det_v[f_c][0] )
                
                
                
            else:
                f_cls_v = cls_v[f_c]
                f_bbox_v = bbox_v[f_c]
                f_p_det_v = p_det_v[f_c]
                f_model_id_v = model_id_v[f_c]

                if ret_rad_id:
                    f_rad_id_v = rad_id_v[f_c]
                    
                to_join_idxs_v = []
                for i in range(0, n_c):
                    idxs_s = set( np.argwhere( calc_iou(f_bbox_v[i], f_bbox_v) > iou_th ).T[0] )
                    
#                     print(idxs_s)
                    for i in range(len(to_join_idxs_v)):
                        if len( idxs_s.intersection(to_join_idxs_v[i]) ) > 0:
                            to_join_idxs_v[i] = to_join_idxs_v[i].union(idxs_s)
                            break
                            
                    else:
                        to_join_idxs_v.append(idxs_s)
                    
                for to_join_idxs in to_join_idxs_v:
                    to_join_idxs = list(to_join_idxs)
                    
                    if len(to_join_idxs) < consensus_level:
                        continue
                        
                    bbox, p_det = merge_preds(
                        f_bbox_v[to_join_idxs],
                        f_p_det_v[to_join_idxs],
                        mode=mode,
                    )

                    if n_models2ensemble > 1:
                        ens_prop = len( np.unique(f_model_id_v[to_join_idxs]) ) / n_models2ensemble
                        p_det = p_det * ens_prop
                        
                    new_cls_v.append( i_c )
                    new_bbox_v.append( bbox )
                    new_p_det_v.append( p_det )
                    
                    if ret_rad_id:
                        new_rad_id_v.append( np.concatenate(f_rad_id_v[to_join_idxs], axis=-1))
        
        ret_preds_d = {
            'cls': np.array(new_cls_v),
            bbox_key: np.array(new_bbox_v),
        }
        
        if ret_p_det:
            ret_preds_d['p_det'] = np.array(new_p_det_v)
            
        if ret_rad_id:
            ret_preds_d['rad_id'] = np.array(new_rad_id_v, dtype=object)
            
        for k in pred_d.keys():
            if k not in ['cls', bbox_key, 'p_det', 'rad_id']:
                ret_preds_d[k] = pred_d[k]
        
        ret_preds_v.append(ret_preds_d)
    
    return ret_preds_v


    
def pred_to_str(pred_d):
    cls_v = pred_d['cls']
    bbox_v = pred_d['bbox']
    p_det_v = pred_d['p_det']
    
    if len([c for c in cls_v if c <= 14]) == 0:
        ret_s = '14 1 0 0 1 1'
    
    else:
        s_v = []
        for cls, p_det, bbox in zip(cls_v.astype(np.int), p_det_v, np.round(bbox_v).astype(np.int)):
            if cls == 14:
                bbox = np.array([0,0,1,1])
            
            if cls > 14:
                continue
                
            s = '{} {:0.05} {} {} {} {}'.format(
                int(cls),
                p_det,
                *bbox
            )
            
            s_v.append(s)
            
        ret_s = ' '.join(s_v)
    
    return ret_s


def read_prediction_csv(
    filename='./ds_tst_F0_V6_JustCLS0_1.25x.csv',
    skip_cls14=False,
):
    sub_df = pd.read_csv(filename)
    
    preds_v = []
    for sample_id, preds in sub_df.values:
        preds_split = preds.split()

        pred_d = {
            'sample_id': sample_id,
            'cls':    [],
            'bbox': [],
            'p_det':  [],
            }


        for i in range(0, len(preds_split), 6):
            cls, p_det, x_min, y_min, x_max, y_max = [float(x) for x in preds_split[i:i+6]]
            cls = int(cls)
            
            if (not skip_cls14) or (cls != 14):
                
                if cls != 14:
                    bboxes = np.array([x_min, y_min, x_max, y_max])
                    
                else:
                    bboxes = np.array([0, 0, 1, 1])
                
                pred_d['cls'].append(cls)
                pred_d['bbox'].append(bboxes)
                pred_d['p_det'].append(p_det)

        pred_d['cls']    = np.array( pred_d['cls'] )
        pred_d['bbox']   = np.array( pred_d['bbox'] )
        pred_d['p_det']  = np.array(pred_d['p_det'] )
        
        preds_v.append(pred_d)
        
    return preds_v


def predictions_to_df(
    preds_v,
    save_path=None,
):
    pred_summary_d = {
        'image_id':[],
        'PredictionString':[]
    }
    
    for pred_d in preds_v:
        pred_str = pred_to_str(pred_d)
        pred_summary_d['image_id'].append( pred_d['sample_id'] )
        pred_summary_d['PredictionString'].append( pred_str )
        
    pred_summary_df = pd.DataFrame(pred_summary_d)
    
    if save_path is not None:
        pred_summary_df.to_csv(
            save_path,
            index=None)
        
        print(f' Saved submission: "{save_path}"')
        
    return pred_summary_df


def norm_p_det(pred_v):
    p_det_v = []
    for pred_d in pred_v:
        if len(pred_d['p_det']) > 0:
            p_det_v.append( pred_d['p_det'] )
    
    p_det_v = np.concatenate(p_det_v)
    p_det_max = p_det_v.max()
    
    
    print('p_det_max =', p_det_max)
    if p_det_max > 1.0:
        ret_pred_v = copy.deepcopy(pred_v)
        for pred_d in ret_pred_v:
            if len(pred_d['p_det']) > 0:
                pred_d['p_det'] = pred_d['p_det'] / p_det_max
    
    else:
        print('skipping norm_p_det')
        
        return pred_v
        
    return ret_pred_v


def fix_boxes(preds_v):
    """
    Fixes:
    - p_det > 1.0 or p_det < 0.0
    - xmax - xmin > 0
    - ymax - ymin > 0

    """
    for preds_d in preds_v:
        if len(preds_d['cls']) > 0:
            dx_dy = preds_d['bbox'][:,2:] - preds_d['bbox'][:,:2]
            
            f0 = (dx_dy <= 1).any(axis=-1)
            f1 = (preds_d['p_det']<=0) + (preds_d['p_det']>1.0)
            
            if f0.any() or f1.any():
                print('.', end='')
                f = ~(f0 + f1)
                for k in ['p_det', 'bbox', 'cls']:
                    preds_d[k] = preds_d[k][f]
                    
    return None


def add_class_14(
    preds_v,
    pred_clf_c14_filename='2-cls test pred.csv',
    low_threshold=0.00,
    high_threshold=0.99,
    rm_preds_high_th=True,
    inv_p_cls=True,
    ):

    cls_df = pd.read_csv(pred_clf_c14_filename)

    class_14_d = {}
    for sample_id, p_cls in cls_df.values[:,:2]:
        if inv_p_cls:
            p_14 = 1.0 - p_cls
        else:
            p_14 = p_cls

        if p_14 < low_threshold:
            # Keep, do nothing.
            class_14_d[sample_id] = 0.0

        elif p_14 >= high_threshold:
            # Replace, remove all "det" preds.
            class_14_d[sample_id] = 1.0

        else:
            # Add, keep "det" preds and add normal pred.
            class_14_d[sample_id] = p_14
            
    
    
    ret_preds_v = copy.deepcopy(preds_v)
                                
    for pred_d in tqdm(ret_preds_v):
        default_case = False
        p_14 = class_14_d[ pred_d['sample_id'] ]

        if p_14 == 1:
            if rm_preds_high_th:
                pred_d['bbox']  = np.array([[0.0, 0.0, 1.0, 1.0]])
                pred_d['cls']   = np.array([14])
                pred_d['p_det'] = np.array([1.0])
                
            else:
                default_case = True

        elif p_14 == 0.0:
            continue

        else:
            default_case = True
            
        if default_case:
            if len(pred_d['bbox']) > 0 and 14 not in pred_d['cls']:
                pred_d['bbox'] = np.append(pred_d['bbox'], np.array([[0.0, 0.0, 1.0, 1.0]]), axis=0)
                pred_d['cls']  = np.append(pred_d['cls'], 14)
                pred_d['p_det'] = np.append(pred_d['p_det'], p_14)

            else:
                pred_d['bbox'] = np.array([[0, 0, 1, 1]])
                pred_d['cls']  = np.array([14], dtype=np.int)
                pred_d['p_det'] = np.array([p_14])
    
    return ret_preds_v

# Reading Inputs

In [None]:
# Reading original image shapes
height_dict = pd.read_csv('../input/vinbigdata-original-image-dataset/vinbigdata/test.csv').to_dict('records')
fnl_dict ={}
for ix,i in enumerate(height_dict):
    fnl_dict[i['image_id']] = [i['width'],i['height'],i['width'],i['height']]

In [None]:
# Reading all the predictions

subs = [
    pd.read_csv('../input/effdet-inference/ds_tst_F2_noTH_noClean.csv'),
    pd.read_csv('../input/vbg-yolo-submission/Fold 1.csv'),
    pd.read_csv('../input/public-kernel-vinbigdata-detectron2-prediction-v9/results/20210110_train_all_500k_512/submission_det.csv'),    
    pd.read_csv('../input/effdet-inference/ds_tst_F1_noTH_noClean.csv'),
    pd.read_csv('../input/vbg-yolo-submission/Fold 4.csv'),
    pd.read_csv('../input/vbg-yolo-submission/submission (2).csv'),
    pd.read_csv('../input/yolov5-chest-512/submission.csv'),
    pd.read_csv('../input/yolov5-chest-512/submission.csv'),
    pd.read_csv('../input/publickernel-vinbigdata-yolov5-16-class-version-1/submission.csv'),
    pd.read_csv('../input/mohammedyolov5/submission.csv')
]


In [None]:
def submission_decoder(df:pd.DataFrame) -> np.ndarray:
    info = df.values
    df_lst = []
    for i in info:
        pre_lst = i[1].split(' ')
        for j in range(0,len(pre_lst),6):
            df_lst.append([i[0],int(pre_lst[j]),float(pre_lst[j+1]),int(pre_lst[j+2]),int(pre_lst[j+3]),\
                           int(pre_lst[j+4]),int(pre_lst[j+5]),fnl_dict.get(i[0])[0],fnl_dict.get(i[0])[1]])
            
    return pd.DataFrame(df_lst,columns = ['image_id','class_id','score','x_min','y_min','x_max','y_max','width','height'])

In [None]:
subs = [submission_decoder(subs[i]) for i in range(len(subs))]

# Ensembling

In [None]:
boxes_dict = {}
scores_dict = {}
labels_dict = {}
whwh_dict = {}

for i in tqdm(subs[0].image_id.unique()):
    if not i in boxes_dict.keys():
        boxes_dict[i] = []
        scores_dict[i] = []
        labels_dict[i] = []
        whwh_dict[i] = []

    size_ratio = fnl_dict.get(i)
    whwh_dict[i].append(size_ratio) 
    tmp_df = [subs[x][subs[x]['image_id']==i] for x in range(len(subs))]
    
    for x in range(len(tmp_df)):
        bbox_v = ((tmp_df[x][['x_min','y_min','x_max','y_max']].values)/size_ratio)
        scores_v = tmp_df[x]['score'].values
        labels_v = tmp_df[x]['class_id'].values
        
        bbox_v, scores_v, labels_v = solve_bbox_problems(bbox_v, scores_v, labels_v)
        
        boxes_dict[i].append(bbox_v.tolist())
        scores_dict[i].append(scores_v.tolist())
        labels_dict[i].append(labels_v.tolist())
            

In [None]:
weights  = [3,2,3,3,2,4,4,4,4,3]
weights1 = [3,2,4,5]

iou_thr = 0.25
skip_box_thr = 0.01
sigma = 0.1

fnl = {}

for i in tqdm(boxes_dict.keys()):
    # First Ensemble using: WBF, NMS, SoftNMS, NMW (10 models)
    boxes3, scores3, labels3 = weighted_boxes_fusion(boxes_dict[i], scores_dict[i], labels_dict[i],\
                                                     weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    boxes1, scores1, labels1 = nms(boxes_dict[i], scores_dict[i], labels_dict[i], weights=weights, iou_thr=iou_thr)
    
    boxes0, scores0, labels0 = soft_nms(boxes_dict[i], scores_dict[i], labels_dict[i], weights=weights,\
                                        iou_thr=iou_thr, sigma=sigma, thresh=skip_box_thr)
    
    boxes2, scores2, labels2 = non_maximum_weighted(boxes_dict[i], scores_dict[i], labels_dict[i],\
                                                    weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    # Final Ensemble using: WBF (4 previous ensembles)
    boxes, scores, labels = weighted_boxes_fusion([boxes0,boxes1,boxes2,boxes3],\
                                                  [scores0,scores1,scores2,scores3],\
                                                  [labels0,labels1,labels2,labels3],\
                                                  weights=weights1, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    if not i in fnl.keys():
        fnl[i] = {'boxes':[],'scores':[],'labels':[]}
        
    fnl[i]['boxes'] = boxes*whwh_dict[i]
    fnl[i]['scores'] = scores
    fnl[i]['labels'] = labels

In [None]:
pd_form = []
for i in fnl.keys():
    b = fnl[i]
    for j in range(len(b['boxes'])):
        pd_form.append([i,int(b['labels'][j]),round(b['scores'][j],2),\
                        int(b['boxes'][j][0]),int(b['boxes'][j][1]),\
                        int(b['boxes'][j][2]),int(b['boxes'][j][3])])
        
final_df = pd.DataFrame(pd_form,columns = ['image_id','class_id','score','x_min','y_min','x_max','y_max'])
final_df = final_df.drop_duplicates(keep = 'first')

In [None]:
def submission_encoder(df:pd.DataFrame) -> np.ndarray:
    dct = {}
    for i in tqdm(df['image_id'].unique()):
        if not i in dct.keys():
            dct[i] = []
        tmp = df[df['image_id'] == i].values
        for j in tmp:
            dct[i].append(int(j[1]))
            dct[i].append(float(j[2]))
            dct[i].append(int(j[3]))
            dct[i].append(int(j[4]))
            dct[i].append(int(j[5]))
            dct[i].append(int(j[6]))
        
        dct[i] = map(str,dct[i])
        dct[i] = ' '.join(dct[i])
    dct = [[k, v] for k, v in dct.items()]
    return pd.DataFrame(dct,columns = ['image_id','PredictionString']).reset_index(drop = True)

df = submission_encoder(final_df)


In [None]:
df.to_csv('submission_wo_cls2f.csv', index=False)

# Final cleaning + 2 cls filtering

In [None]:
# Using filter from:
# https://www.kaggle.com/corochann/vinbigdata-2-class-classifier-complete-pipeline/output#VinBigData-2-class-classifier-complete-pipeline
# notebook Version 13

preds_v = read_prediction_csv(
    "submission_wo_cls2f.csv",
    skip_cls14=False,
)

# Fixing:
#     - p_det > 1.0 or p_det < 0.0
#     - xmax - xmin > 0
#     - ymax - ymin > 0

fix_boxes(preds_v)

clean_pred_v = clean_predictions(
    preds_v,
    iou_th=0.25,
    mode='p_det_weight_psum') #p_det_max , p_det_weight, p_det_weight_psum


# Fixing p_det > 1.0
pred_v = norm_p_det(clean_pred_v)


# # Adding class 14
pred_v = add_class_14(
    pred_v,
    pred_clf_c14_filename='../input/vinbigdata-2-class-classifier-complete-pipeline/results/tmp_debug/test_pred.csv',
    low_threshold=0.00,
    high_threshold=0.999,
    rm_preds_high_th=False,
    inv_p_cls=False,
)

# Saving submission

preds_df = predictions_to_df(
    pred_v,
    f'ds_tst_Ens_pmax_w_2clsf_corochann.csv')

preds_df