# Inference Kernel on Test Data

# 1. Prepare ResNest and Best Weights Data
Jung's already make ResNest repo as a dataset. We will move this to Kaggle's working directory so that we can modify (`/kaggle/input/` is read-only)

In [None]:
!cp -rf /kaggle/input/wheatdetection-resnest-develop-branch-july9/wheatdetection/wheatdetection /kaggle/working/
!ls /kaggle/working

In [None]:
!cp -rf /kaggle/input/mydata /kaggle/working

In [None]:
CODE_PATH = '/kaggle/working/wheatdetection/'
!ls {CODE_PATH}
%cd {CODE_PATH}

Next, we define a path to our best checkpoint. Please modify the following cell to your own trained dataset.

In [None]:
import torch
BEST_PATH = "/kaggle/input/mydata/FRCNN_F0_best.bin"

# print to see best weights information../input/gdrive-dataset-downloader/F0_86_finetune_epoch_best.bin
ckp = torch.load(BEST_PATH)
print(ckp.keys())
print(ckp['epoch'], ckp['best_valid_loss'])

# 2. Inference

Define hyperparameters here. If we don't use NMS, we simply filter out the boxes using `SCORE_THRESHOLD` ; If we use NMS, we will combine both `SCORE_THRESHOLD` and `IOU_THRESHOLD` to filter box according to NMS logic.

In [None]:
USE_NMS = False
SCORE_THRESHOLD = 0.8
NMS_IOU_THRESHOLD = 0.5
IMG_SIZE = 1024

WBF_IOU, WBF_SKIP_BOX = 0.8, 0.8

In the repo, WheatDetector Class always need internet connection for `pretrained` weights, so below we rewrite the file for `pretrained=False`

In [None]:
%%writefile ./data/build.py
# encoding: utf-8
"""
@author:  wuxin.wang
@contact: wuxin.wang@whu.edu.cn
"""

from torch.utils import data
from torch.utils.data.sampler import SequentialSampler, RandomSampler

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold

from .datasets.train_wheat import train_wheat
from .datasets.test_wheat import test_wheat
from .transforms import build_transforms
from .transforms import get_test_transform
from .collate_batch import collate_batch

def split_dataset(cfg):
    marking = pd.read_csv(f'{cfg.DATASETS.DIR}')
    df = marking.groupby('image_id').count()
#     bboxs = np.stack(marking['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
#     for i, column in enumerate(['x', 'y', 'w', 'h']):
#         marking[column] = bboxs[:, i]
#     marking.drop(columns=['bbox'], inplace=True)
#     marking['area'] = marking['w'] * marking['h']
#     marking = marking[marking['area'] < 154200.0]
#     error_bbox = [100648.0, 145360.0, 149744.0, 119790.0, 106743.0]
#     marking = marking[~marking['area'].isin(error_bbox)]
#     marking = marking[marking['h']>16.0]
#     marking = marking[marking['w']>16.0]

#     skf = StratifiedKFold(n_splits=1, shuffle=True, random_state=42)

#     df_folds = marking[['image_id']].copy()
#     df_folds.loc[:, 'bbox_count'] = 1
#     df_folds = df_folds.groupby('image_id').count()
#     df_folds.loc[:, 'source'] = marking[['image_id', 'source']].groupby('image_id').min()['source']
#     df_folds.loc[:, 'stratify_group'] = np.char.add(
#         df_folds['source'].values.astype(str),
#         df_folds['bbox_count'].apply(lambda x: f'_{x // 15}').values.astype(str)
#     )
#     df_folds.loc[:, 'fold'] = 0

#     for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
#         df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number

#     train_ids = df_folds[df_folds['fold'] != cfg.DATASETS.VALID_FOLD].index.values
    valid_ids = df.index.values
#     if cfg.DEBUG:
#         train_ids = train_ids[:40]
#         valid_ids = valid_ids[:10]

    return marking, valid_ids

def build_dataset(cfg):
    marking, valid_ids = split_dataset(cfg)
#     train_dataset = train_wheat(
#         root = cfg.DATASETS.ROOT_DIR,
#         image_ids=train_ids,
#         marking=marking,
#         transforms=build_transforms(cfg, is_train=True),
#         test=False,
#     )

    validation_dataset = train_wheat(
        root=cfg.DATASETS.ROOT_DIR,
        image_ids=valid_ids,
        marking=marking,
        transforms=build_transforms(cfg, is_train=False),
        test=True,
    )

    return validation_dataset

def make_data_loader(cfg, is_train=True):
    if is_train:
        batch_size = cfg.SOLVER.IMS_PER_BATCH
    else:
        batch_size = cfg.TEST.IMS_PER_BATCH

    validation_dataset = build_dataset(cfg)

    num_workers = cfg.DATALOADER.NUM_WORKERS
#     train_loader = data.DataLoader(
#         train_dataset,
#         batch_size=batch_size,
#         sampler=RandomSampler(train_dataset),
#         pin_memory=False,
#         drop_last=True,
#         num_workers=num_workers,
#         collate_fn=collate_batch,
#     )
    val_loader = data.DataLoader(
        validation_dataset,
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False,
        sampler=SequentialSampler(validation_dataset),
        pin_memory=False,
        collate_fn=collate_batch,
    )

    return val_loader

def build_test_dataset(cfg):
    test_df = pd.read_csv(f'{cfg.DATASETS.ROOT_DIR}/sample_submission.csv')
    print(test_df.shape)
    test_dataset = test_wheat(test_df, f'{cfg.DATASETS.ROOT_DIR}/test', get_test_transform())

    return test_dataset

def make_test_data_loader(cfg):
    batch_size = cfg.TEST.IMS_PER_BATCH

    test_dataset = build_test_dataset(cfg)

    num_workers = cfg.DATALOADER.NUM_WORKERS

    test_loader = data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        drop_last=False,
        collate_fn=collate_batch
    )

    return test_loader

In [None]:
%%writefile ./modeling/wheat_detector.py

import torch
from torch import nn
from layers import FasterRCNN
from layers.backbone_utils import resnest_fpn_backbone

class WheatDetector(nn.Module):
    def __init__(self, cfg, **kwargs):
        super(WheatDetector, self).__init__()
        self.backbone = resnest_fpn_backbone(pretrained=False) #change here
        self.base = FasterRCNN(self.backbone, num_classes=cfg.MODEL.NUM_CLASSES, **kwargs)

    def forward(self, images, targets=None):
        return self.base(images, targets)

In [None]:
%%writefile ./config/defaults.py
# encoding: utf-8
"""
@author:  wuxin.wang
@contact: wuxin.wang@whu.edu.cn
"""
import sys
sys.path.insert(0, "./external/yacs")
from yacs.config import CfgNode as CN

# -----------------------------------------------------------------------------
# Convention about Training / Test specific parameters
# -----------------------------------------------------------------------------
# Whenever an argument can be either used for training or for testing, the
# corresponding name will be post-fixed by a _TRAIN for a training parameter,
# or _TEST for a test-specific parameter.
# For example, the number of images during training will be
# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be
# IMAGES_PER_BATCH_TEST

# -----------------------------------------------------------------------------
# Config definition
# -----------------------------------------------------------------------------

_C = CN()

_C.DEBUG = False
_C.SEED = 42
_C.VERBOSE = True

_C.MODEL = CN()
_C.MODEL.DEVICE = "cuda"
_C.MODEL.NUM_CLASSES = 2

# -----------------------------------------------------------------------------
# INPUT
# -----------------------------------------------------------------------------
_C.INPUT = CN()
# RandomSizedCrop paramters
_C.INPUT.RSC_MIN_MAX_HEIGHT = (800, 800)
_C.INPUT.RSC_HEIGHT = 1024
_C.INPUT.RSC_WIDTH = 1024
_C.INPUT.RSC_PROB = 0.5
# HueSaturationValue paramters
_C.INPUT.HSV_H = 0.2
_C.INPUT.HSV_S = 0.2
_C.INPUT.HSV_V = 0.2
_C.INPUT.HSV_PROB = 0.9
# RandomBrightnessContrast paramters
_C.INPUT.BC_B = 0.2
_C.INPUT.BC_C = 0.2
_C.INPUT.BC_PROB = 0.9
# Color paramters
_C.INPUT.COLOR_PROB = 0.9
# Random probability for ToGray
_C.INPUT.TOFGRAY_PROB = 0.01
# Random probability for HorizontalFlip
_C.INPUT.HFLIP_PROB = 0.5
# Random probability for VerticalFlip
_C.INPUT.VFLIP_PROB = 0.5
# Coutout paramters
_C.INPUT.COTOUT_NUM_HOLES = 8
_C.INPUT.COTOUT_MAX_H_SIZE = 64
_C.INPUT.COTOUT_MAX_W_SIZE = 64
_C.INPUT.COTOUT_FILL_VALUE = 0
_C.INPUT.COTOUT_PROB = 0.5

# -----------------------------------------------------------------------------
# Dataset
# -----------------------------------------------------------------------------
_C.DATASETS = CN()
# Root dir of dataset
_C.DATASETS.ROOT_DIR = "/content/global-wheat-detection"
_C.DATASETS.DIR = "/content/global-wheat-detection"
# Fold to validate
_C.DATASETS.VALID_FOLD = 0
# # List of the dataset names for training, as present in paths_catalog.py
# _C.DATASETS.TRAIN = ()
# # List of the dataset names for testing, as present in paths_catalog.py
# _C.DATASETS.TEST = ()

# -----------------------------------------------------------------------------
# DataLoader
# -----------------------------------------------------------------------------
_C.DATALOADER = CN()
# Number of data loading threads
_C.DATALOADER.NUM_WORKERS = 2

# ---------------------------------------------------------------------------- #
# Solver
# ---------------------------------------------------------------------------- #
_C.SOLVER = CN()
_C.SOLVER.OPTIMIZER_NAME = "SGD"
_C.SOLVER.SCHEDULER_NAME = "CosineAnnealingWarmRestarts"
_C.SOLVER.COS_CPOCH = 2
_C.SOLVER.T_MUL = 2

_C.SOLVER.MAX_EPOCHS = 72

_C.SOLVER.BASE_LR = 0.005
_C.SOLVER.BIAS_LR_FACTOR = 1

_C.SOLVER.MOMENTUM = 0.9

_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0
_C.SOLVER.WEIGHT_DECAY_BN = 0

_C.SOLVER.WARMUP_EPOCHS = 10

_C.SOLVER.EARLY_STOP_PATIENCE = 20

_C.SOLVER.TRAIN_CHECKPOINT = False
_C.SOLVER.CLEAR_OUTPUT = True

# Number of images per batch
# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will
# see 2 images per batch
_C.SOLVER.IMS_PER_BATCH = 4

# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will
# see 2 images per batch
_C.TEST = CN()
_C.TEST.IMS_PER_BATCH = 4
_C.TEST.WEIGHT = "/content/output/best-checkpoint.bin"

# ---------------------------------------------------------------------------- #
# Misc options
# ---------------------------------------------------------------------------- #
_C.OUTPUT_DIR = "/content/drive/My Drive/Global_Wheat_Detection/experiments/baseline"

Define TTA (Hide cell)

In [None]:
import matplotlib.pyplot as plt
import cv2

import os
import warnings
import torch
import numpy as np
from tqdm import tqdm
import pandas as pd
from itertools import product
import sys
sys.path.insert(0, "./external/wbf")
import ensemble_boxes
warnings.filterwarnings("ignore")


class BaseWheatTTA:
    """ author: @shonenkov """
    image_size = IMG_SIZE

    def augment(self, image):
        raise NotImplementedError

    def batch_augment(self, images):
        raise NotImplementedError

    def deaugment_boxes(self, boxes):
        raise NotImplementedError


class TTAHorizontalFlip(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)

    def batch_augment(self, images):
        return images.flip(2)

    def deaugment_boxes(self, boxes):
        boxes[:, [1, 3]] = self.image_size - boxes[:, [3, 1]]
        return boxes


class TTAVerticalFlip(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(2)

    def batch_augment(self, images):
        return images.flip(3)

    def deaugment_boxes(self, boxes):
        boxes[:, [0, 2]] = self.image_size - boxes[:, [2, 0]]
        return boxes


class TTARotate90(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))

    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0, 2]] = self.image_size - boxes[:, [3, 1]]
        res_boxes[:, [1, 3]] = boxes[:, [0, 2]]
        return res_boxes


class TTACompose(BaseWheatTTA):
    """ author: @shonenkov """

    def __init__(self, transforms):
        self.transforms = transforms

    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image

    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images

    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:, 0] = np.min(boxes[:, [0, 2]], axis=1)
        result_boxes[:, 2] = np.max(boxes[:, [0, 2]], axis=1)
        result_boxes[:, 1] = np.min(boxes[:, [1, 3]], axis=1)
        result_boxes[:, 3] = np.max(boxes[:, [1, 3]], axis=1)
        return result_boxes

    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)

In [None]:
from evaluate.evaluate import evaluate

class Tester:
    def __init__(self, model, device, cfg, test_loader):
        self.config = cfg
        self.test_loader = test_loader

        self.base_dir = f'{self.config.OUTPUT_DIR}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)

        self.log_path = f'{self.base_dir}/log.txt'
        self.score_threshold = SCORE_THRESHOLD
        self.iou_threshold = NMS_IOU_THRESHOLD
        self.use_nms = USE_NMS
        
        self.model = model
        self.model.eval()

        self.device = device
        self.model.to(self.device)

        self.log(f'Tester prepared. Device is {self.device}')

    def test(self):
        all_predictions = self.infer(pp_shrink=-1)
        return all_predictions
#         best_score_threshold, best_final_score = evaluate(all_predictions)
#         return best_score_threshold, best_final_score, all_predictions
#         self.save_predictions(results)

    def process_det(self, index, outputs):
        boxes = outputs[index]['boxes'].data.cpu().numpy()
        scores = outputs[index]['scores'].data.cpu().numpy()
        boxes = (boxes).clip(min=0, max=1023).astype(int)
        indexes = np.where(scores > self.score_threshold)
        boxes = boxes[indexes]
        scores = scores[indexes]
        return boxes, scores

    def make_tta_predictions(self, tta_transforms, images):
        with torch.no_grad():
            images = torch.stack(images).float().cuda()
            predictions = []
            for tta_transform in tta_transforms:
                result = []
                outputs = self.model(tta_transform.batch_augment(images.clone()))
                
                
                for i, image in enumerate(images):
                    boxes = outputs[i]['boxes'].data.cpu().numpy()
                    scores = outputs[i]['scores'].data.cpu().numpy()
                    indexes = np.where(scores > self.score_threshold)[0]
                    boxes = tta_transform.deaugment_boxes(boxes.copy())
                    
                    if self.use_nms: 
                        labels = np.ones(scores.shape[0]).astype(int).tolist()
                        boxes, scores, labels = ensemble_boxes.ensemble_boxes_nms.nms_method([boxes], [scores], [labels], method=3,
                                                                                        weights=None, iou_thr=self.iou_threshold,
                                                                                        thresh=self.score_threshold)
                    else: # not use NMS, just filter by confidence score
                        boxes = boxes[indexes]
                        scores = scores[indexes]
                    result.append({
                        'boxes': boxes,
                        'scores': scores,
                    })
                predictions.append(result)
        return predictions
    
    def run_wbf(self, predictions, image_index, image_size=IMG_SIZE, iou_thr=WBF_IOU, skip_box_thr=WBF_SKIP_BOX, weights=None):
        boxes = [(prediction[image_index]['boxes'] / (image_size - 1)).tolist() for prediction in predictions]
        scores = [prediction[image_index]['scores'].tolist() for prediction in predictions]
        labels = [np.ones(prediction[image_index]['scores'].shape[0]).astype(int).tolist() for prediction in
                  predictions]
        boxes, scores, labels = ensemble_boxes.ensemble_boxes_wbf.weighted_boxes_fusion(boxes, scores, labels,
                                                                                        weights=None, iou_thr=iou_thr,
                                                                                        skip_box_thr=skip_box_thr)
        boxes = boxes * (image_size - 1)
        return boxes, scores, labels

    def format_prediction_string(self, boxes, scores):
        pred_strings = []
        for j in zip(scores, boxes):
            pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))
        return " ".join(pred_strings)

    def infer(self, pp_shrink=-1):
        self.model.eval()
        torch.cuda.empty_cache()
        
        tta_transforms = []
        for tta_combination in product([TTAHorizontalFlip(), None],
#                                        [TTAVerticalFlip(), None],[TTARotate90(), None]
                                      ):
            tta_transforms.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))
        test_loader = tqdm(self.test_loader, total=len(self.test_loader), desc="Testing")
        results = []
        
        boxes10 = []
        
        all_predictions = []
        
        for images, targets, image_ids in test_loader: # test_loader must contains targets in valid inference
            predictions = self.make_tta_predictions(tta_transforms, images)
            for i, image in enumerate(images):
                boxes, scores, labels = self.run_wbf(predictions, image_index=i)
                boxes = boxes.round().astype(np.int32).clip(min=0, max=1023)
                image_id = image_ids[i]
                
                if len(boxes10) < 10:
                    print('writing ... ',i,image_id)
                    sample = image.permute(1,2,0).cpu().numpy()

                    fig, ax = plt.subplots(1, 1, figsize=(16, 8))
                    boxes10.append((sample, boxes))
                    
                    for box, score in zip(boxes,scores):
                        cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (0, 0, 1), 4)
                        cv2.putText(sample, '%.2f'%(score), (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX ,  
                   1, (255,255,255), 3, cv2.LINE_AA)
                    
                    ax.set_axis_off()
                    ax.imshow(sample);
                    plt.show()
                    
                boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
                
                #post-processing box shrinking as host advised boxes in test are tight to image
                boxes[:, 0] = boxes[:, 0] + pp_shrink
                boxes[:, 1] = boxes[:, 1] + pp_shrink
                boxes[:, 2] = boxes[:, 2] - pp_shrink 
                boxes[:, 3] = boxes[:, 3] - pp_shrink 
                
                #validation in case boxes are expanded not shrunk that they don't go beyond the boundaries of what is acceptable
                for ppa in range(0,4):
                    boxes[:, ppa] = [max(min(x, 1023), 0) for x in boxes[:, ppa]]
                
                gt = targets[i]['boxes'].cpu().numpy().astype(int)
                gt[:, 2] = gt[:, 2] - gt[:, 0]
                gt[:, 3] = gt[:, 3] - gt[:, 1]
            
                all_prediction = {
                    'pred_boxes': boxes,
                    'scores': scores,
                    'gt_boxes': gt,
                    'image_id': image_id,
                }

                all_predictions.append(all_prediction)
                
        return all_predictions

    def format_prediction_string(self, boxes, scores):
        pred_strings = []
        for j in zip(scores, boxes):
            pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

        return " ".join(pred_strings)

    def save_predictions(self, results):
        test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
        test_df.to_csv(f'{self.config.OUTPUT_DIR}/submission.csv', index=False)

    def load(self, path):
        checkpoint = torch.load(path)
        self.model.load_state_dict(checkpoint['model_state_dict'])

    def log(self, message):
        if self.config.VERBOSE:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

## Split fold and make correct CV with TTA/WBF/NMS
Split valid fold : specify fold in `cfg.DATASETS.VALID_FOLD`

In [None]:
from config import cfg

#IMPORTANT
cfg.DATASETS.VALID_FOLD = 0

cfg['OUTPUT_DIR'] = "/kaggle/working/"
cfg['DATASETS']['ROOT_DIR'] = "/kaggle/input/global-wheat-detection"
cfg['DATASETS']['DIR'] = "/kaggle/input/mydata/test.csv"
cfg['TEST']['WEIGHT'] = BEST_PATH
cfg

In [None]:
from data import make_data_loader
val_loader = make_data_loader(cfg, is_train=False)
# for x,y in enumerate(val_loader):
#     print(x,y)

In [None]:
import os
import sys

from os import mkdir
sys.path.append('.')
from config import cfg
from data import make_test_data_loader
from modeling import build_model
from utils.logger import setup_logger

# start here!!
if True:
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir and not os.path.exists(output_dir):
        print('creating ',cfg.OUTPUT_DIR)
        mkdir(output_dir)

    model = build_model(cfg)
    device = cfg.MODEL.DEVICE
    checkpoint = torch.load(cfg.TEST.WEIGHT)

    tester = Tester(model=model, device=device, cfg=cfg, test_loader=val_loader)
    tester.load(cfg['TEST']['WEIGHT'])
    print('*** success load weights! ***')
    
    all_predictions = tester.test()

In [None]:
import numba
from evaluate.calculate_score import *
def calculate_individual_scores(all_predictions, score_threshold):
    final_scores = []
    final_missed_boxes_nums = []
    image_id_list = []
    # Numba typed list!
    iou_thresholds = numba.typed.List()

    for x in [0.5, 0.55, 0.6, 0.65, 0.7, 0.75]:
        iou_thresholds.append(x)

    for i in range(len(all_predictions)):
        gt_boxes = all_predictions[i]['gt_boxes'].copy()
        pred_boxes = all_predictions[i]['pred_boxes'].copy()
        scores = all_predictions[i]['scores'].copy()
        image_id = all_predictions[i]['image_id']
        
        indexes = np.where(scores > score_threshold)
        pred_boxes = pred_boxes[indexes]
        scores = scores[indexes]
        
        image_precision = calculate_image_precision(gt_boxes, pred_boxes, thresholds=iou_thresholds, 
                                                    form='coco'#'pascal_voc'
                                                   )
        final_scores.append(image_precision)
        image_id_list.append(image_id)
    return final_scores, image_id_list, np.mean(final_scores)

def evaluate(all_predictions):
    best_final_score, best_score_threshold = 0, 0
    for score_threshold in tqdm(np.arange(0, 1, 0.05), total=np.arange(0, 1, 0.05).shape[0], desc="OOF"):
        _,_,final_score = calculate_individual_scores(all_predictions, score_threshold)
        if final_score > best_final_score:
            best_final_score = final_score
            best_score_threshold = score_threshold

    for i in range(len(all_predictions)):
        gt_boxes = all_predictions[i]['gt_boxes'].copy()
        pred_boxes = all_predictions[i]['pred_boxes'].copy()
        scores = all_predictions[i]['scores'].copy()
        indexes = np.where(scores>best_score_threshold)
        pred_boxes = pred_boxes[indexes]
        all_predictions[i]['final_missed_boxes_nums'] = len(gt_boxes)-len(pred_boxes)

    return best_score_threshold, best_final_score

## Correct CV is printed out here!
This CV taking into account TTA and WBF

In [None]:
best_score_threshold, best_final_score = evaluate(all_predictions)

print('CV -- mAP .5:.75 for valid inference with best threshold = %.2f, TTA/WBF/NMS = %.4f' % (best_score_threshold, best_final_score))

###### See the 20 worst predicted images
NOte that normally we have good performance, these are just 'worst' images