# final_project


In [None]:
import sys
sys.path.insert(0, "../input/timm-efficientdet-pytorch")
sys.path.insert(0, "../input/omegaconf")
sys.path.insert(0, "../input/weightedboxesfusion")
sys.path.insert(0, "../input/yetanotherefficientdetpytorch")

import ensemble_boxes
import torch
import numpy as np
import pandas as pd
from glob import glob
from torch.utils.data import Dataset,DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import cv2
import gc
from matplotlib import pyplot as plt
from matplotlib import patches as patches
from backbone import EfficientDetBackbone
from torchvision import transforms
from efficientdet.utils import BBoxTransform, ClipBoxes
from typing import Union
from torchvision.ops.boxes import batched_nms

Functions that will be used later

In [None]:
def invert_affine(metas: Union[float, list, tuple], preds):
    for i in range(len(preds)):
        if len(preds[i]['rois']) == 0:
            continue
        else:
            if metas is float:
                preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / metas
                preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / metas
            else:
                new_w, new_h, old_w, old_h, padding_w, padding_h = metas[i]
                preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / (new_w / old_w)
                preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / (new_h / old_h)
    return preds


def aspectaware_resize_padding(image, width, height, interpolation=None, means=None):
    old_h, old_w, c = image.shape
    if old_w > old_h:
        new_w = width
        new_h = int(width / old_w * old_h)
    else:
        new_w = int(height / old_h * old_w)
        new_h = height

    canvas = np.zeros((height, height, c), np.float32)
    if means is not None:
        canvas[...] = means

    if new_w != old_w or new_h != old_h:
        if interpolation is None:
            image = cv2.resize(image, (new_w, new_h))
        else:
            image = cv2.resize(image, (new_w, new_h), interpolation=interpolation)

    padding_h = height - new_h
    padding_w = width - new_w

    if c > 1:
        canvas[:new_h, :new_w] = image
    else:
        if len(image.shape) == 2:
            canvas[:new_h, :new_w, 0] = image
        else:
            canvas[:new_h, :new_w] = image

    return canvas, new_w, new_h, old_w, old_h, padding_w, padding_h,


def preprocess(*image_path, max_size=512, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
    ori_imgs = [cv2.imread(img_path) for img_path in image_path]
    normalized_imgs = [(img[..., ::-1] / 255 - mean) / std for img in ori_imgs]
    imgs_meta = [aspectaware_resize_padding(img, max_size, max_size,
                                            means=None) for img in normalized_imgs]
    framed_imgs = [img_meta[0] for img_meta in imgs_meta]
    framed_metas = [img_meta[1:] for img_meta in imgs_meta]

    return ori_imgs, framed_imgs, framed_metas


def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold):
    transformed_anchors = regressBoxes(anchors, regression)
    transformed_anchors = clipBoxes(transformed_anchors, x)
    scores = torch.max(classification, dim=2, keepdim=True)[0]
    scores_over_thresh = (scores > threshold)[:, :, 0]
    out = []
    for i in range(x.shape[0]):
        if scores_over_thresh[i].sum() == 0:
            out.append({
                'rois': np.array(()),
                'class_ids': np.array(()),
                'scores': np.array(()),
            })
            continue

        classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0)
        transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...]
        scores_per = scores[i, scores_over_thresh[i, :], ...]
        scores_, classes_ = classification_per.max(dim=0)
        anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold)

        if anchors_nms_idx.shape[0] != 0:
            classes_ = classes_[anchors_nms_idx]
            scores_ = scores_[anchors_nms_idx]
            boxes_ = transformed_anchors_per[anchors_nms_idx, :]

            out.append({
                'rois': boxes_.cpu().numpy(),
                'class_ids': classes_.cpu().numpy(),
                'scores': scores_.cpu().numpy(),
            })
        else:
            out.append({
                'rois': np.array(()),
                'class_ids': np.array(()),
                'scores': np.array(()),
            })

    return out

In [None]:
def get_valid_transforms():
    return A.Compose([
            ToTensorV2(p=1.0),
        ], p=1.0)

In [None]:
DATA_ROOT_PATH = '../input/global-wheat-detection/test'

class DatasetRetriever(Dataset):

    def __init__(self, image_ids, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
dataset = DatasetRetriever(
    image_ids=np.array([path.split('/')[-1][:-4] for path in glob(f'{DATA_ROOT_PATH}/*.jpg')]),
    transforms=get_valid_transforms()
)

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = DataLoader(
    dataset,
    batch_size=4,
    shuffle=False,
    num_workers=2,
    drop_last=False,
    collate_fn=collate_fn
)

In [None]:
weights_path = '../input/selftrained/efficientdet-d7_79_29040.pth'

net = EfficientDetBackbone(num_classes = 1, compound_coef=7,
                           ratio=eval('[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]'),
                           scales=eval('[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]'))
net.load_state_dict(torch.load(weights_path), strict=False)
net.eval()
net.cuda()

Try one image

In [None]:
image, image_id = dataset[5]

numpy_image = image.permute(1,2,0).cpu().numpy().copy()

regressBoxes = BBoxTransform()
clipBoxes = ClipBoxes()
threshold = 0.2
nms_threshold = 0.5

with torch.no_grad():
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    image_path = '../input/global-wheat-detection/test/' + image_id + '.jpg'
    ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=512, mean=mean, std=std)
    x = torch.from_numpy(framed_imgs[0])
    x = x.unsqueeze(0).permute(0, 3, 1, 2)
    x = x.cuda()

    features, regression, classification, anchors = net(x)

    preds = postprocess(x,
                        anchors, regression, classification,
                        regressBoxes, clipBoxes,
                        threshold, nms_threshold)

    preds = invert_affine(framed_metas, preds)[0]

    scores = preds['scores']
    class_ids = preds['class_ids']
    rois = preds['rois']

    rois[:, 2] -= rois[:, 0]
    rois[:, 3] -= rois[:, 1]

    
fig ,ax = plt.subplots(1, figsize=(10, 10))
ax.imshow(numpy_image)
for bbox in rois:
    rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3],linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)

# Inference

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))
    return " ".join(pred_strings)

In [None]:
results = []
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

for i in range(len(dataset)):
    with torch.no_grad():
        image, image_id = dataset[i]
        image_path = '../input/global-wheat-detection/test/' + image_id + '.jpg'
        ori_imgs, framed_imgs, framed_metas = preprocess(image_path, max_size=1024, mean=mean, std=std)
        x = torch.from_numpy(framed_imgs[0])
        x = x.unsqueeze(0).permute(0, 3, 1, 2)
        x = x.cuda()

        features, regression, classification, anchors = net(x)

        preds = postprocess(x,
                            anchors, regression, classification,
                            regressBoxes, clipBoxes,
                            0.2, nms_threshold)

        preds = invert_affine(framed_metas, preds)[0]

        scores = preds['scores']
        class_ids = preds['class_ids']
        rois = preds['rois']
        
        if rois.ndim == 2:
            rois[:, 2] -= rois[:, 0]
            rois[:, 3] -= rois[:, 1]
            rois = rois.astype(np.int64)

            result = {
                'image_id': image_id,
                'PredictionString': format_prediction_string(rois, scores)
            }
        
        else:
            result = {
                'image_id': image_id,
                'PredictionString': ''
            }
        results.append(result)
        

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.to_csv('submission.csv', index=False)
test_df.head()

# Thank you for reading my kernel!