# Global Wheat Head Detection 

# Introduction

Object detection is one of the most important topics of computer vision since it has many applications in several fields. One application of it is this amazing challenge.

Object detection models can be improved thanks to ensemble techniques.

However, the process of ensembling object detectors poses
several challenges including the selection of models but most importantly the way of ensembling itself. Because different models have different output types with different confidence range which requires some 'work'. 

Here we ensembled EfficientDet models.

# EfficientDet5

## EfficientDet Architecture 

![image.png](attachment:image.png)

## EfficientDet ModelFlops vs COCO accuracy

![image.png](attachment:image.png)

## Ensemble different EfficientDet Models
### Different architectures (Different Input Size, Different Backbone Networks, Layers)
### Different folds & using different augmentations


![image.png](attachment:image.png)

## EfficientDet 512x512

In [None]:
# Dependencies

!pip install --no-deps '../input/timm-package/timm-0.1.26-py3-none-any.whl' > /dev/null
!pip install --no-deps '../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl' > /dev/null





import sys
sys.path.insert(0, "../input/timm-efficientdet-pytorch")
sys.path.insert(0, "../input/omegaconf")
sys.path.insert(0, "../input/weightedboxesfusion")



import ensemble_boxes
import torch
import numpy as np
import pandas as pd
from glob import glob
from torch.utils.data import Dataset,DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import cv2
import gc
from matplotlib import pyplot as plt
from effdet import get_efficientdet_config, EfficientDet, DetBenchEval
from effdet.efficientdet import HeadNet




def get_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)




DATA_ROOT_PATH = '../input/global-wheat-detection/test'




class DatasetRetriever(Dataset):

    def __init__(self, image_ids, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        image_size, width, _ = image.shape
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id, image_size

    def __len__(self) -> int:
        return self.image_ids.shape[0]
    
    
    
    
    
dataset = DatasetRetriever(
    image_ids=np.array([path.split('/')[-1][:-4] for path in glob(f'{DATA_ROOT_PATH}/*.jpg')]),
    transforms=get_valid_transforms()
)




def collate_fn(batch):
    return tuple(zip(*batch))





data_loader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=False,
    num_workers=2,
    drop_last=False,
    collate_fn=collate_fn
)


class BaseWheatTTA:
    """ author: @shonenkov """
    image_size = 512

    def augment(self, image):
        raise NotImplementedError
    
    def batch_augment(self, images):
        raise NotImplementedError
    
    def deaugment_boxes(self, boxes):
        raise NotImplementedError

class TTAContrastBrightness(BaseWheatTTA):
    """ author: @ffares """

    def augment(self, image):
        alpha = round(random.uniform(1, 1.3),2) # Contrast control// alpha value [1.0-3.0]
        beta = round(random.uniform(0, 0.2),2)  # Brightness control// beta value [0-100]
        return torch.clamp(torch.add(torch.mul(image,alpha),beta),0, 1, out=None)
    
    def batch_augment(self, images):
        alpha = round(random.uniform(1, 1.3),2) # Contrast control// alpha value [1.0-3.0]
        beta = round(random.uniform(0, 0.2),2)  # Brightness control// beta value [0-100]
        return torch.clamp(torch.add(torch.mul(images,alpha),beta),0, 255, out=None)
    
    def deaugment_boxes(self, boxes):
        return boxes
        
class TTAHorizontalFlip(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)
    
    def batch_augment(self, images):
        return images.flip(2)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [1,3]] = self.image_size - boxes[:, [3,1]]
        return boxes

class TTAVerticalFlip(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return image.flip(2)
    
    def batch_augment(self, images):
        return images.flip(3)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,2]] = self.image_size - boxes[:, [2,0]]
        return boxes
    
class TTARotate90(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = self.image_size - boxes[:, [1,3]]
        res_boxes[:, [1,3]] = boxes[:, [2,0]]
        return res_boxes
    
class TTARotate180(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 2, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 2, (2, 3))
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,1,2,3]] = self.image_size - boxes[:, [2,3,0,1]]
        return boxes
    
class TTARotate270(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 3, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 3, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = boxes[:, [1,3]]
        res_boxes[:, [1,3]] = self.image_size - boxes[:, [2,0]]
        return res_boxes
    
class TTACompose(BaseWheatTTA):
    """ author: @shonenkov """
    def __init__(self, transforms):
        self.transforms = transforms
        
    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image
    
    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images
    
    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:,0] = np.min(boxes[:, [0,2]], axis=1)
        result_boxes[:,2] = np.max(boxes[:, [0,2]], axis=1)
        result_boxes[:,1] = np.min(boxes[:, [1,3]], axis=1)
        result_boxes[:,3] = np.max(boxes[:, [1,3]], axis=1)
        return result_boxes
    
    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)
    
    
    
from itertools import product

tta_transforms = []
for tta_combination in product([TTAHorizontalFlip(), None], 
                               [TTAVerticalFlip(), None],
                               [TTARotate90(), None]):
    tta_transforms.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))
    

def load_net5(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d5')
    net = EfficientDet(config, pretrained_backbone=False)

    config.num_classes = 1
    config.image_size=512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))

    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])

    del checkpoint
    gc.collect()

    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()

def load_net7(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d7')
    net = EfficientDet(config, pretrained_backbone=False)

    config.num_classes = 1
    config.image_size=512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))

    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])

    del checkpoint
    gc.collect()

    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()


# Models
models =[    
    #With Cleaning/ Old Version fold 0
    #load_net5('../input/efficientdetearlierversionf0/best-checkpoint-047epoch.bin'), 
    #With Cleaning/ New Version fold 0
    load_net5('../input/efficientdet5f0/best-checkpoint-028epoch.bin'), 
    #With CLEANING fold 1 fine tuned on half arvalis
    #load_net5('../input/efficientdet5f1finetunearv2/best-checkpoint-001epoch.bin'), 
    #Without Cleaning fold 3
    load_net7('../input/training-efficientdet-f3/effdet5-cutmix-augmix/best-checkpoint-033epoch.bin'),
    #Without Cleaning fold 4
    #load_net7('../input/training-efficientdet-f4/effdet5-cutmix-augmix/best-checkpoint-037epoch.bin'), 
]
    

def make_predictions(images, score_threshold=0.1):
    images = torch.stack(images).cuda().float()
    predictions = []
    with torch.no_grad():
        det = net(images, torch.tensor([1]*images.shape[0]).float().cuda())
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:,:4]    
            scores = det[i].detach().cpu().numpy()[:,4]
            indexes = np.where(scores > score_threshold)[0]
            boxes = boxes[indexes]
            boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            predictions.append({
                'boxes': boxes[indexes],
                'scores': scores[indexes],
            })
    return [predictions]
  
    
def make_tta_predictions(images, score_threshold=0.1):
    with torch.no_grad():
        images = torch.stack(images).float().cuda()
        predictions = []
        for tta_transform in tta_transforms:
            result = []
            det = net(tta_transform.batch_augment(images.clone()), torch.tensor([1]*images.shape[0]).float().cuda())

            for i in range(images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:,:4]    
                scores = det[i].detach().cpu().numpy()[:,4]
                indexes = np.where(scores > score_threshold)[0]
                boxes = boxes[indexes]
                boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                boxes = tta_transform.deaugment_boxes(boxes.copy())
                result.append({
                    'boxes': boxes,
                    'scores': scores[indexes],
                })
            predictions.append(result)
    return predictions


def make_tta_models_predictions(images, score_threshold=0.1):
    images = torch.stack(images).float().cuda()
    with torch.no_grad():
        predictions = []
        for tta_transform in tta_transforms:
            for net in models:
                result = []
                det = net(tta_transform.batch_augment(images.clone()), torch.tensor([1]*images.shape[0]).float().cuda())
                for i in range(images.shape[0]):
                    boxes = det[i].detach().cpu().numpy()[:,:4]    
                    scores = det[i].detach().cpu().numpy()[:,4]
                    indexes = np.where(scores > score_threshold)[0]
                    boxes = boxes[indexes]
                    boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                    boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                    boxes = tta_transform.deaugment_boxes(boxes.copy())
                    
                    result.append({
                        'boxes': boxes,
                        'scores': scores[indexes],
                    })
                predictions.append(result)
    return predictions

def make_models_predictions(images, score_threshold=0.1):
    images = torch.stack(images).cuda().float()
    predictions = []
    for net in models:
        with torch.no_grad():
            det = net(images, torch.tensor([1]*images.shape[0]).float().cuda())
            result = []
            for i in range(images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:,:4]    
                scores = det[i].detach().cpu().numpy()[:,4]
                indexes = np.where(scores > score_threshold)[0]
                boxes = boxes[indexes]
                boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                result.append({
                    'boxes': boxes[indexes],
                    'scores': scores[indexes],
                })
            predictions.append(result)
    return predictions


# Inference 
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))
    return " ".join(pred_strings)



def resize_predicitions(predictions): 
    '''
    Resize bboxes of efficient det as it predicts bboxes in the range of 512 
    We need to double the predicitions
    '''
    for i in range(len(predictions)):
        for j in range(len(predictions[i])):
            predictions[i][j]['boxes']=predictions[i][j]['boxes']*2
        
    return predictions


def effdet_organize(predictions, image_index):
    
    boxes = [(prediction[image_index]['boxes']).tolist() for prediction in predictions]
    scores = [prediction[image_index]['scores'].tolist() for prediction in predictions]
    labels = [np.ones(prediction[image_index]['scores'].shape[0]).astype(int).tolist() for prediction in predictions]
        
    return boxes, scores, labels


def run_wbf_ensemble(boxes_effdet, boxes_yolo, scores_effdet, scores_yolo, labels_effdet, labels_yolo, iou_thr=0.6, skip_box_thr=0.5, weights=None):    
        
    boxes = boxes_effdet + boxes_yolo
    scores = scores_effdet + scores_yolo
    labels = labels_effdet + labels_yolo
    
    boxes, scores, labels = ensemble_boxes.ensemble_boxes_wbf.weighted_boxes_fusion(boxes, scores, labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    return boxes, scores, labels



def run_wbf(boxes, scores, labels, iou_thr=0.5, skip_box_thr=0.3, weights=None):    
    boxes, scores, labels = ensemble_boxes.ensemble_boxes_wbf.weighted_boxes_fusion(boxes, scores, labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    return boxes, scores, labels  


def run_wbf_initial(boxes,scores, image_size=1024, iou_thr=0.41, skip_box_thr=0.4, weights=None):
    labels0 = [np.ones(len(scores[idx])) for idx in range(len(scores))]
    boxes, scores, labels = ensemble_boxes.ensemble_boxes_wbf.weighted_boxes_fusion(boxes, scores, labels0, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    return boxes, scores, labels

In [None]:
for images, image_ids, image_size in data_loader:
    
    #predictions = make_tta_predictions(images)
    predictions = make_tta_models_predictions(images)
    #predictions = make_predictions(images)
    predictions=resize_predicitions(predictions)

    for i, image in enumerate(images):
        
        image_id=image_ids[i]

        
        # boxes using efficientdet5
        boxes_effdet, scores_effdet, labels_effdet = effdet_organize(predictions, image_index=i)
        
        boxes, scores, labels = run_wbf(boxes_effdet, scores_effdet, labels_effdet, iou_thr=0.4, skip_box_thr=0.43, weights=None)

        boxes = (boxes/2).round().astype(np.int32).clip(min=0, max=512)

    
    
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    
    
        sample = images[i].permute(1,2,0).cpu().numpy()
    
        font = cv2.FONT_HERSHEY_SIMPLEX 
    
        # fontScale 
        fontScale = 1

        # Blue color in RGB 
        color = (0, 0, 1) 

        # Line thickness of 2 px 
        thickness = 2

        for box,score in zip(boxes,scores):
            cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (0, 0, 1), 1)
            cv2.putText(sample, '{:.2}'.format(score), (box[0]+np.random.randint(20),box[1]), font, fontScale, color, thickness, cv2.LINE_AA)
    
        ax.set_axis_off()
        ax.imshow(sample);
    
        break
    break

## EfficientDet5 1024x1024

In [None]:
import torch.nn.functional as F


def get_valid_transforms_1024():
    return A.Compose([
            A.Resize(height=1024, width=1024, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

    
dataset_1024 = DatasetRetriever(
    image_ids=np.array([path.split('/')[-1][:-4] for path in glob(f'{DATA_ROOT_PATH}/*.jpg')]),
    transforms=get_valid_transforms_1024()
)



data_loader_1024 = DataLoader(
    dataset_1024,
    batch_size=1,
    shuffle=False,
    num_workers=0,
    drop_last=False,
    collate_fn=collate_fn
)


class BaseWheatTTA_1024:
    """ author: @shonenkov """
    image_size = 1024

    def augment(self, image):
        raise NotImplementedError
    
    def batch_augment(self, images):
        raise NotImplementedError
    
    def deaugment_boxes(self, boxes):
        raise NotImplementedError

        
class TTAHorizontalFlip_1024(BaseWheatTTA_1024):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)
    
    def batch_augment(self, images):
        return images.flip(2)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [1,3]] = self.image_size - boxes[:, [3,1]]
        return boxes

class TTAVerticalFlip_1024(BaseWheatTTA_1024):
    """ author: @shonenkov """
    
    def augment(self, image):
        return image.flip(2)
    
    def batch_augment(self, images):
        return images.flip(3)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,2]] = self.image_size - boxes[:, [2,0]]
        return boxes
    
class TTARotate90_1024(BaseWheatTTA_1024):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = self.image_size - boxes[:, [1,3]]
        res_boxes[:, [1,3]] = boxes[:, [2,0]]
        return res_boxes
    
class resize1024to512(BaseWheatTTA_1024):
    """ author: @ffares """
    
    def augment(self, image):
        return F.interpolate(image, size=(512,512))

    def batch_augment(self, images):
        return F.interpolate(images, size=(512,512))
    
    def deaugment_boxes(self, boxes):
        return boxes
    
    
class TTACompose_1024(BaseWheatTTA_1024):
    """ author: @shonenkov """
    def __init__(self, transforms):
        self.transforms = transforms
        
    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image
    
    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images
    
    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:,0] = np.min(boxes[:, [0,2]], axis=1)
        result_boxes[:,2] = np.max(boxes[:, [0,2]], axis=1)
        result_boxes[:,1] = np.min(boxes[:, [1,3]], axis=1)
        result_boxes[:,3] = np.max(boxes[:, [1,3]], axis=1)
        return result_boxes
    
    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)
    
    
    
tta_transforms_1024 = []
for tta_combination in product([TTAHorizontalFlip_1024(), None], 
                               [TTAVerticalFlip_1024(), None],
                               [TTARotate90_1024(), None]):
    tta_transforms_1024.append(TTACompose_1024([tta_transform for tta_transform in tta_combination if tta_transform]))
    

def load_net5_1024(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d5')
    net = EfficientDet(config, pretrained_backbone=False)

    config.num_classes = 1
    config.image_size=1024
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))

    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])

    del checkpoint
    gc.collect()

    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()



# Models
models_1024 =[load_net5_1024('../input/wheatweightsefficientdet51024v4/best-checkpoint-006epoch.bin')]
    


def make_tta_models_predictions_1024(images, score_threshold=0.1):
    images = torch.stack(images).float().cuda()
    with torch.no_grad():
        predictions = []
        for tta_transform in tta_transforms_1024:
            for net in models_1024:
                result = []
                det = net(tta_transform.batch_augment(images.clone()), torch.tensor([1]*images.shape[0]).float().cuda())
                for i in range(images.shape[0]):
                    boxes = det[i].detach().cpu().numpy()[:,:4]    
                    scores = det[i].detach().cpu().numpy()[:,4]
                    indexes = np.where(scores > score_threshold)[0]
                    boxes = boxes[indexes]
                    boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                    boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                    boxes = tta_transform.deaugment_boxes(boxes.copy())
                    
                    result.append({
                        'boxes': boxes,
                        'scores': scores[indexes],
                    })
                predictions.append(result)
    return predictions



In [None]:
for images, image_ids, image_size in data_loader_1024:
    
    predictions_1024 = make_tta_models_predictions_1024(images)


    for i, image in enumerate(images):
        
        image_id=image_ids[i]

        
        # boxes using efficientdet5
        boxes_effdet, scores_effdet, labels_effdet = effdet_organize(predictions_1024, image_index=i)
        
        boxes, scores, labels = run_wbf(boxes_effdet, scores_effdet, labels_effdet, iou_thr=0.4, skip_box_thr=0.43, weights=None)

        boxes = (boxes).round().astype(np.int32).clip(min=0, max=1023)

    
    
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    
    
        sample = images[i].permute(1,2,0).cpu().numpy()
    
        font = cv2.FONT_HERSHEY_SIMPLEX 
    
        # fontScale 
        fontScale = 1

        # Blue color in RGB 
        color = (0, 0, 1) 

        # Line thickness of 2 px 
        thickness = 2

        for box,score in zip(boxes,scores):
            cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (0, 0, 1), 1)
            cv2.putText(sample, '{:.2}'.format(score), (box[0]+np.random.randint(20),box[1]), font, fontScale, color, thickness, cv2.LINE_AA)
    
        ax.set_axis_off()
        ax.imshow(sample);
    
        break
    break

## Ensemble ALL models

![image.png](attachment:image.png)

In [None]:
tta_transforms_512 = []
for tta_combination in product([resize1024to512()],
                               [TTAHorizontalFlip(), None], 
                               [TTAVerticalFlip(), None],
                               [TTARotate90(), None]):
    tta_transforms_512.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))
    
def make_tta_models_predictions_512(images, score_threshold=0.1):
    images = torch.stack(images).float().cuda()
    with torch.no_grad():
        predictions = []
        for tta_transform in tta_transforms_512:
            for net in models:
                result = []
                det = net(tta_transform.batch_augment(images.clone()), torch.tensor([1]*images.shape[0]).float().cuda())
                for i in range(images.shape[0]):
                    boxes = det[i].detach().cpu().numpy()[:,:4]    
                    scores = det[i].detach().cpu().numpy()[:,4]
                    indexes = np.where(scores > score_threshold)[0]
                    boxes = boxes[indexes]
                    boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                    boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                    boxes = tta_transform.deaugment_boxes(boxes.copy())
                    
                    result.append({
                        'boxes': boxes,
                        'scores': scores[indexes],
                    })
                predictions.append(result)
    return predictions


In [None]:
results = []

for images, image_ids, image_sizes in data_loader_1024:
    
    predictions_1024 = make_tta_models_predictions_1024(images)
    
    predictions_512 = make_tta_models_predictions_512(images)
    predictions_512= resize_predicitions(predictions_512)
    
    for i, image in enumerate(images):
        
        image_id=image_ids[i]
        image_size= image_sizes[i]
        
        
        # EfficientDet predictions
        boxes_effdet_1024, scores_effdet_1024, labels_effdet_1024 = effdet_organize(predictions_1024, image_index=i)
                
        
        # EfficientDet predictions
        boxes_effdet_512, scores_effdet_512, labels_effdet_512 = effdet_organize(predictions_512, image_index=i)
        
        
        # Fusion of both predictions
        boxes, scores, labels = run_wbf_ensemble(boxes_effdet_1024, boxes_effdet_512, scores_effdet_1024, scores_effdet_512, labels_effdet_1024, 
                                                 labels_effdet_512, iou_thr=0.4, skip_box_thr=0.43, weights=None)
        
        boxes = (boxes*(image_size/1024)).round().astype(np.int32).clip(min=0, max=image_size-1)
        
        
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]

        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }
        results.append(result)
        

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.to_csv('submission.csv', index=False)
test_df.head(10)

In [None]:
for images, image_ids, image_sizes in data_loader_1024:
    
    predictions_1024 = make_tta_models_predictions_1024(images)
    
    predictions_512 = make_tta_models_predictions_512(images)
    predictions_512= resize_predicitions(predictions_512)
    
    for i, image in enumerate(images):
        
        image_id=image_ids[i]
        image_size= image_sizes[i]
        
        
        # EfficientDet predictions
        boxes_effdet_1024, scores_effdet_1024, labels_effdet_1024 = effdet_organize(predictions_1024, image_index=i)
                
        
        # EfficientDet predictions
        boxes_effdet_512, scores_effdet_512, labels_effdet_512 = effdet_organize(predictions_512, image_index=i)
        

        # Fusion of both predictions
        boxes, scores, labels = run_wbf_ensemble(boxes_effdet_1024, boxes_effdet_512, scores_effdet_1024, scores_effdet_512, labels_effdet_1024, 
                                                 labels_effdet_512, iou_thr=0.4, skip_box_thr=0.43, weights=None)
        
        boxes = (boxes*(image_size/1024)).round().astype(np.int32).clip(min=0, max=image_size-1)

    
    
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    
    
        sample = images[i].permute(1,2,0).cpu().numpy()
    
        font = cv2.FONT_HERSHEY_SIMPLEX 
    
        # fontScale 
        fontScale = 1

        # Blue color in RGB 
        color = (0, 0, 1) 

        # Line thickness of 2 px 
        thickness = 2

        for box,score in zip(boxes,scores):
            cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (0, 0, 1), 1)
            cv2.putText(sample, '{:.2}'.format(score), (box[0]+np.random.randint(20),box[1]), font, fontScale, color, thickness, cv2.LINE_AA)
    
        ax.set_axis_off()
        ax.imshow(sample);
    
        break
    break

## Thank you for reading my kernel!