In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 
import cv2
import glob
import os
import sys

from tqdm import tqdm
from itertools import product
import torch
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from IPython.display import clear_output

# Install mmdetection and dependencies

In [None]:
!cp -r /kaggle/input/mmdetection/mmdetection-master/ /mmdetection
!cp -r /kaggle/input/detectors/DetectoRS-mmdetv2/ /detectors
!cp -r /kaggle/input/cocoapi/cocoapi-master/ /cocoapi
!cp -r /kaggle/input/mmcv-lib/mmcv-0.6.2/ /mmcv
!cp -r /kaggle/input/terminaltables/terminaltables-master/ /terminaltables
!cp -r /kaggle/input/addict/addict-master/ /addict
!cp -r /kaggle/input/weightedbf/Weighted-Boxes-Fusion-master/ /wbf

In [None]:
!pip install /wbf
!cd /cocoapi/PythonAPI/ && pip install -e .
sys.path.append('/cocoapi/PythonAPI/')
!pip install /addict
!pip install /mmcv
!pip install /terminaltables
!pip install -v -e /mmdetection
sys.path.append('/mmdetection')

In [None]:
!cp -r ../input/pytorchyolov4/pytorch-YOLOv4-master/tool .
from tool.utils import *
from tool.torch_utils import *
from tool.darknet2pytorch import Darknet
from ensemble_boxes import weighted_boxes_fusion

# Main pipeline

## YOLO part

In [None]:
yolo_iou_threshold = 0.6
yolo_score_threshold = 0.25

yolov4_cfgfile = '../input/yolov4weights/yolov4-wheat.cfg'
yolov4_weightfile = '../input/yolov4weights/yolov4-wheat_40000.weights'

yolo_m = Darknet(yolov4_cfgfile)
yolo_m.load_weights(yolov4_weightfile)
yolo_m.cuda()
yolo_num_classes = yolo_m.num_classes

yolo_models = [yolo_m]

yolo_transform = A.Compose([
            A.Resize(height=1024, width=1024, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

def convert_image_to_yolo(img):
    image = img.copy()
    image = cv2.resize(image, (1024, 1024))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    sample = {'image': image}
    sample = yolo_transform(**sample)
    image = sample['image']
    return (image,)

class BaseWheatTTA:
    """ author: @shonenkov """
    image_size = 1024

    def augment(self, image):
        raise NotImplementedError
    
    def batch_augment(self, images):
        raise NotImplementedError
    
    def deaugment_boxes(self, boxes):
        raise NotImplementedError

class TTAHorizontalFlip(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)
    
    def batch_augment(self, images):
        return images.flip(2)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [1,3]] = self.image_size - boxes[:, [3,1]]
        return boxes

class TTAVerticalFlip(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return image.flip(2)
    
    def batch_augment(self, images):
        return images.flip(3)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,2]] = self.image_size - boxes[:, [2,0]]
        return boxes
    
class TTARotate90(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = self.image_size - boxes[:, [1,3]]
        res_boxes[:, [1,3]] = boxes[:, [2,0]]
        return res_boxes

    
class TTACompose(BaseWheatTTA):
    """ author: @shonenkov """
    def __init__(self, transforms):
        self.transforms = transforms
        
    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image
    
    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images
    
    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:,0] = np.min(boxes[:, [0,2]], axis=1)
        result_boxes[:,2] = np.max(boxes[:, [0,2]], axis=1)
        result_boxes[:,1] = np.min(boxes[:, [1,3]], axis=1)
        result_boxes[:,3] = np.max(boxes[:, [1,3]], axis=1)
        return result_boxes
    
    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)

def process_det(index, det, score_threshold=0.25):
    scores = det[index][:, 5].copy()
    det = det[index][:, :4].copy()
    bboxes = np.zeros((det.shape))
    multiplier = 1024
    bboxes[:, 0] = ((det[:, 0]) * multiplier).astype(int)
    bboxes[:, 1] = ((det[:, 1]) * multiplier).astype(int)
    bboxes[:, 2] = (det[:, 0] + (det[:, 2] * multiplier)).astype(int)
    bboxes[:, 3] = (det[:, 1] + (det[:, 3] * multiplier)).astype(int)
    bboxes = (bboxes).clip(min = 0, max = multiplier-1).astype(int)
    
    indexes = np.where(scores>score_threshold)
    bboxes = bboxes[indexes]
    scores = scores[indexes]
    return bboxes, scores


def make_tta_predictions(yolo_model, image, score_threshold=0.25, iou_th=0.6):
    predictions = np.array([])
    with torch.no_grad():
        images = torch.stack(image).float().cuda()

        for tta_transform in yolo_tta_transforms:
            input_img = tta_transform.batch_augment(images.clone()).permute(0,2,3,1).cpu().numpy()
            det = do_detect(yolo_model, input_img, 0.0001, iou_th)
            det_new = []
            for i_i in det:
                if i_i is not None:
                    det_new.append(np.array(i_i))
                else:
                    det_new.append(np.array([]))
            det = det_new
            
            if det[0].size !=0:
                boxes, scores = process_det(0, det)
                if len(boxes) > 0:
                    boxes = tta_transform.deaugment_boxes(boxes.copy())
                    boxes = (boxes).round().astype(np.int32).clip(min=0, max=1023)
                    if len(boxes) > 0:
                        predictions_curr = np.zeros((len(boxes), 5))
                        predictions_curr[:,:4] = boxes
                        predictions_curr[:,4] = scores
                        if predictions.size==0:
                            predictions = predictions_curr
                        else:
                            predictions = np.concatenate((predictions, predictions_curr), axis=0)
                            
    return predictions

yolo_tta_transforms = []
for tta_combination in product([TTAHorizontalFlip(), None], 
                               [TTAVerticalFlip(), None],
                               [TTARotate90(), None]):
    yolo_tta_transforms.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))

## MMdetection part

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for sc, bbox in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(sc, bbox[0], bbox[1], bbox[2], bbox[3]))

    return " ".join(pred_strings)

def predict(models, 
            test_files, 
            confidence_thresh=0.5, 
            nms_thresh = 0.5, 
            pseudolabelling_thresh = 0.8, 
            with_tta=False, 
            multiscale=False, 
            with_wbf=False, 
            wbf_weights=None, 
            clip_val=None,
            with_yolo=True):

    results = []
    pseudo_labels = []

    for img_path in test_files:
        models_results = []
        img = cv2.imread(img_path)
        
        im_w, im_h = img.shape[:2]
        if multiscale:
            resize_scales = [0.8, 0.7]
            resized_imgs = []
            for resize_scale in resize_scales:
                img_resized = cv2.resize(img, (int(resize_scale*im_h), int(resize_scale*im_w)), cv2.INTER_AREA)
                img_resized = cv2.resize(img_resized, (im_h, im_w), cv2.INTER_CUBIC)
                resized_imgs.append(img_resized)
        
        if with_yolo:
            image_yolo = convert_image_to_yolo(img)
            for yolo_model in yolo_models:
                yolo_predictions = make_tta_predictions(yolo_model, image_yolo, score_threshold=yolo_score_threshold, iou_th=yolo_iou_threshold)
                if yolo_predictions.size!=0:
                    models_results.append(yolo_predictions)
                clear_output(wait=True)
                    
        for model_i, model in enumerate(models):
            if with_tta:
                wheat_result_i = detect_tta(model, img)
                if multiscale:
                    for resized_img in resized_imgs:
                        wheat_result_j = detect_tta(model, resized_img)
                        wheat_result_i = np.concatenate((wheat_result_i, wheat_result_j), axis=0)
            else:
                res_values = inference_detector(model, img) 
                wheat_result_i = res_values[0]
            if model_i==0:
                wheat_result = wheat_result_i
            else:
                wheat_result = np.concatenate((wheat_result, wheat_result_i), axis=0)
            models_results.append(wheat_result_i)
        
        if with_wbf:
            from ensemble_boxes import weighted_boxes_fusion
            wbf_weights = [1] * len(models_results)
            bboxes_wbf = [model_result[:,:4]/1024 for model_result in models_results]
            scores_wbf = [model_result[:,4] for model_result in models_results]
            labels_wbf = [np.zeros_like(score_wbf) for score_wbf in scores_wbf]
            wheat_bboxes, wheat_scores, _ = weighted_boxes_fusion(bboxes_wbf, scores_wbf, labels_wbf, weights=wbf_weights, iou_thr=nms_thresh, skip_box_thr=confidence_thresh, allows_overflow=False)    
            wheat_bboxes *= 1024
        else:
            wheat_bboxes, wheat_scores = nms(wheat_result[:,:4], wheat_result[:,4], nms_thresh)

        res_scores = []
        res_bboxes = []

        image_id = img_path.split('/')[-1].replace('.jpg','')

        for bbox, score in zip(wheat_bboxes, wheat_scores):
            if score>=confidence_thresh:
                if clip_val:
                    if score>clip_val:
                        score = clip_val
                res_scores.append(score)
                bbx_int = [int(bbx) for bbx in bbox]
                bbx_int[2] = bbx_int[2] - bbx_int[0]
                bbx_int[3] = bbx_int[3] - bbx_int[1]
                res_bboxes.append(bbx_int)

                if score>=pseudolabelling_thresh:
                    pseudo_labels.append([image_id, 1024, 1024, f'[{bbx_int[0]}, {bbx_int[1]}, {bbx_int[2]}, {bbx_int[3]}]', 'nvnn'])


        result = {
                    'image_id': image_id,
                    'PredictionString': format_prediction_string(res_bboxes, res_scores)
                }


        results.append(result)
        
    return results, pseudo_labels

def rot_bboxes_90(boxes, im_w, im_h):
    ret_boxes =[]
    for box in boxes:
        x1, y1, x2, y2 = box
        x1, y1, x2, y2 = x1-im_w//2, im_h//2 - y1, x2-im_w//2, im_h//2 - y2
        x1, y1, x2, y2 = y1, -x1, y2, -x2
        x1, y1, x2, y2 = int(x1+im_w//2), int(im_h//2 - y1), int(x2+im_w//2), int(im_h//2 - y2)
        x1a, y1a, x2a, y2a = min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)
        ret_boxes.append([x1a, y1a, x2a, y2a])
    return np.array(ret_boxes)

def rot_bboxes_90_n(boxes, im_w, im_h, n=1):
    for _ in range(n):
        boxes = rot_bboxes_90(boxes, im_w, im_h)
    return boxes
    
def flip_bboxes_lr(boxes, im_w, im_h):
    ret_boxes =[]
    for box in boxes:
        x1, y1, x2, y2 = box
        x1a, y1a, x2a, y2a = im_w-x2, y1, im_w-x1, y2
        ret_boxes.append([x1a, y1a, x2a, y2a])
    return np.array(ret_boxes)

def flip_bboxes_ud(boxes, im_w, im_h):
    ret_boxes =[]
    for box in boxes:
        x1, y1, x2, y2 = box
        x1a, y1a, x2a, y2a = x1, im_h - y2, x2, im_h - y1
        ret_boxes.append([x1a, y1a, x2a, y2a])
    return np.array(ret_boxes)

def detect_tta(model, image):
    
    # Original image
    res_values = inference_detector(model, image)
    all_results = res_values[0]

    
    # Flip lr
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    fliplr_image = cv2.flip(image1, 1)
    res_values = inference_detector(model, fliplr_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = flip_bboxes_lr(boxes, im_w, im_h)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
    
    # Flip ud
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    flipud_image = cv2.flip(image1, 0)
    res_values = inference_detector(model, flipud_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = flip_bboxes_ud(boxes, im_w, im_h)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
    
    # Flip lr + ud
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    fliplr_image = cv2.flip(image1, 1)
    flipud_image = cv2.flip(fliplr_image, 0)
    res_values = inference_detector(model, flipud_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = flip_bboxes_ud(boxes, im_w, im_h)
        boxes = flip_bboxes_lr(boxes, im_w, im_h)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
    
    # Rotate 90
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    rotated_image = cv2.rotate(image1, cv2.ROTATE_90_CLOCKWISE)
    res_values = inference_detector(model, rotated_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = rot_bboxes_90_n(boxes, im_w, im_h, n=3)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
    
    # Rotate -90
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    rotated_image = cv2.rotate(image1, cv2.ROTATE_90_COUNTERCLOCKWISE)
    res_values = inference_detector(model, rotated_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = rot_bboxes_90_n(boxes, im_w, im_h, n=1)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
    
    # Rotate 90 + Flip ud
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    rotated_image = cv2.rotate(image1, cv2.ROTATE_90_CLOCKWISE)
    rot_flip_ud_image = cv2.flip(rotated_image, 0)
    res_values = inference_detector(model, rot_flip_ud_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = flip_bboxes_ud(boxes, im_w, im_h)
        boxes = rot_bboxes_90_n(boxes, im_w, im_h, n=3)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
    
    # Rotate -90 + Flip ud
    image1 = image.copy()
    im_w, im_h = image.shape[:2]
    rotated_image = cv2.rotate(image1, cv2.ROTATE_90_COUNTERCLOCKWISE)
    rot_flip_ud_image = cv2.flip(rotated_image, 0)
    res_values = inference_detector(model, rot_flip_ud_image)
    bboxes_scores = res_values[0]
    if bboxes_scores.size!=0:
        boxes = bboxes_scores[:,:4]
        boxes = flip_bboxes_ud(boxes, im_w, im_h)
        boxes = rot_bboxes_90_n(boxes, im_w, im_h, n=1)
        bboxes_scores[:,:4] = boxes
        all_results = np.concatenate((all_results, bboxes_scores), axis=0)
        
    
    del image1
    
    return all_results

## YOLOv4 part

# Inference

In [None]:
from mmdet.apis import init_detector, inference_detector
import mmcv 

MMDETECTION_PATH = '/mmdetection/'
TEST_IMAGES_DIR = '/kaggle/input/global-wheat-detection/test/'
# TEST_IMAGES_DIR = '/kaggle/input/global-wheat-detection/train/'
test_files=glob.glob(f'{TEST_IMAGES_DIR}*.jpg')

In [None]:
CONFIGS_PATHS = ['/kaggle/input/glf-configs/',
                 '/kaggle/input/glf-configs/',
#                  '/kaggle/input/detectors-configs/',
                 '/kaggle/input/glf-configs/']

WEIGHTS_PATHS = ['/kaggle/input/glf-weights/',
                 '/kaggle/input/glf-weights/',
#                  '/kaggle/input/detectors-weights/',
                 '/kaggle/input/glf-weights/']

WEIGHTS_NAMES = ['m1_s2_epoch_24.pth',
                 'm2_s7_epoch_30_multiscale.pth',
#                  'epoch_24.pth',
                 'm2_s3_epoch_24.pth']

MODEL_NAMES = ['gfl_x101',
               'gfl_x101',
#                'detecto_rs_model',
               'gfl_x101']


models = []

for CONFIGS_PATH, WEIGHTS_PATH, WEIGHTS_NAME, MODEL_NAME in zip(CONFIGS_PATHS, WEIGHTS_PATHS, WEIGHTS_NAMES, MODEL_NAMES):
    config_file = os.path.join(CONFIGS_PATH, MODEL_NAME + '.py')
    checkpoint_file = os.path.join(WEIGHTS_PATH,WEIGHTS_NAME)
    models.append(init_detector(config_file, checkpoint_file, device='cuda:0'))

if len(models)==1:
    wbf_weights = None
else:
    wbf_weights = [1] * (len(models)+1)

#### Params

In [None]:
confidence_thresh = 0.4
nms_thresh = 0.5
pseudolabelling_thresh = 0.4
with_tta = True
with_yolo = True

#### Visualize single image

In [None]:
file_name = test_files[9]
image = cv2.imread(file_name)

models_results = []
if with_yolo:
    image_yolo = convert_image_to_yolo(image)
    yolo_predictions = make_tta_predictions(image_yolo, score_threshold=yolo_score_threshold, iou_th=yolo_iou_threshold)
    models_results.append(yolo_predictions)
    
for model_i, model in enumerate(models):
    if with_tta:
        wheat_result_i = detect_tta(model, image)
    else:
        res_values = inference_detector(model, image) 
        wheat_result_i = res_values[0]
    models_results.append(wheat_result_i)
bboxes_wbf = [model_result[:,:4]/1024 for model_result in models_results]
scores_wbf = [model_result[:,4] for model_result in models_results]
labels_wbf = [np.zeros_like(score_wbf) for score_wbf in scores_wbf]

# wbf_weights = [2,1]
wheat_bboxes, wheat_scores, _ = weighted_boxes_fusion(bboxes_wbf, scores_wbf, labels_wbf, weights=wbf_weights, iou_thr=nms_thresh, skip_box_thr=confidence_thresh)

print('Before WBF')
print(f'scores: {scores_wbf}')
print('After WBF')
print(f'scores: {wheat_scores}')

wheat_bboxes *= 1024
            
print(f'Before wbf {sum([len(model_result) for model_result in models_results])}')
print(f'After wbf {len(wheat_bboxes)}')

num_bboxes = 0
print(f'Before confidence thresholding {len(wheat_bboxes)}')
for bbox, score in zip(wheat_bboxes, wheat_scores):
    if score>=confidence_thresh:
        bbx_int = [int(bbx) for bbx in bbox]
        cv2.rectangle(image, (bbx_int[0],bbx_int[1]), (bbx_int[2],bbx_int[3]), (255,0,0), 2)
        num_bboxes+=1
print(f'After confidence thresholding {num_bboxes}')

fig=plt.figure(figsize=(18, 16))
plt.imshow(image)
plt.show()


#### Inference on all test data

In [None]:
results, pseudo_labels = predict(models, 
                                 test_files, 
                                 confidence_thresh=confidence_thresh, 
                                 nms_thresh = nms_thresh, 
                                 pseudolabelling_thresh = pseudolabelling_thresh, 
                                 with_tta=with_tta, 
                                 with_wbf=True, 
                                 wbf_weights=wbf_weights,
                                 with_yolo=with_yolo)

# Pseudolabelling

In [None]:
PSEUDOLABEL = False
# if len(os.listdir('../input/global-wheat-detection/test/'))>11:
#      PSEUDOLABEL = True

In [None]:
if PSEUDOLABEL:
    !mkdir -p /pseudo/images
    !cp /kaggle/input/global-wheat-detection/train/* /pseudo/images
    !cp /kaggle/input/global-wheat-detection/test/* /pseudo/images
    !ls -l /pseudo/images | wc -l
    
    train_df = pd.read_csv('/kaggle/input/global-wheat-detection/train.csv')
    pseudolabels_file_name = '/pseudo/train_with_pseudo.csv'
    pseudolabels_df = pd.DataFrame(pseudo_labels, columns =['image_id', 'width', 'height', 'bbox', 'source'])

    total_df = pd.concat([pseudolabels_df,train_df], ignore_index=True)
    total_df.to_csv(pseudolabels_file_name, index=False)
    total_df.head()

#### Train with pseudolabels

In [None]:
if PSEUDOLABEL:
    os.chdir('/mmdetection')
    !python tools/train.py /kaggle/input/glf-configs/gfl_x101.py --work-dir /pseudo/model1/ --no-validate --resume-from /kaggle/input/glf-weights/m2_s7_epoch_30_multiscale.pth
#     !python tools/train.py /kaggle/input/glf-configs/gfl_x101.py --work-dir /pseudo/model1/ --no-validate --resume-from /kaggle/input/glf-weights/m1_s2_epoch_24.pth
#     !python tools/train.py /kaggle/input/glf-configs/gfl_x101.py --work-dir /pseudo/model2/ --no-validate --resume-from /kaggle/input/glf-weights/m2_s3_epoch_24.pth
    os.chdir('/kaggle/working')
    
#     CONFIGS_PATHS = ['/kaggle/input/glf-configs/',
#                      '/kaggle/input/glf-configs/']
#     MODEL_NAMES = ['gfl_x101',
#                    'gfl_x101']
#     WEIGHTS_PATHS = ['/pseudo/model1/epoch_26.pth',
#                      '/pseudo/model2/epoch_26.pth']
    CONFIGS_PATHS = ['/kaggle/input/glf-configs/']
    MODEL_NAMES = ['gfl_x101']
    WEIGHTS_PATHS = ['/pseudo/model1/epoch_32.pth']
    models_with_pseudo = []
    for CONFIGS_PATH, WEIGHTS_PATH, MODEL_NAME in zip(CONFIGS_PATHS, WEIGHTS_PATHS, MODEL_NAMES):
        config_file = os.path.join(CONFIGS_PATH, MODEL_NAME + '.py')
        models_with_pseudo.append(init_detector(config_file, WEIGHTS_PATH, device='cuda:0'))
    
    confidence_thresh = 0.4
    nms_thresh = 0.5
    results, pseudo_labels = predict(models_with_pseudo, 
                                     test_files, 
                                     confidence_thresh=confidence_thresh, 
                                     nms_thresh = nms_thresh, 
                                     pseudolabelling_thresh = 0.4, 
                                     with_tta=True, 
                                     with_wbf=True)

In [None]:
!ls /pseudo/

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
test_df.to_csv('/kaggle/working/submission.csv', index=False)

In [None]:
# !rm -r /pseudo