In [None]:
# Save Name: 703 + eca-nfnet + nfnetf0-haiku x2 + effnetb3

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from tqdm import tqdm
import sys
import gc
import warnings
warnings.filterwarnings("ignore")
import torch
from PIL import Image
import ast

import cv2

from albumentations import Normalize, Resize, Compose
from albumentations.pytorch import ToTensorV2

sys.path.append('../input/tensorflow-great-barrier-reef')
sys.path.append('/kaggle/input/weightedboxesfusion/')

In [None]:
from ensemble_boxes import *


In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
import greatbarrierreef
env = greatbarrierreef.make_env()# initialize the environment
iter_test = env.iter_test()      # an iterator which loops over the test set and sample submission

In [None]:
YOLO_MODEL_PTHS = [
#     "/kaggle/input/bestf2/fold0_aug_2560.pt",  # best fold 0
#     "/kaggle/input/bestf2/best_f1.pt",  # best fold 1
    "/kaggle/input/yolov5cots/s6_3600_bs3_fit_hflip_ezaug_v1-LDM.pt",  # best fold 2
#     "/kaggle/input/l63600uflipvm5f1all/weights/best.pt",
    "/kaggle/input/yolov5s6/f2_sub2.pt",
]

In [None]:
models = []
for index, model_pth in enumerate(YOLO_MODEL_PTHS):
    model = torch.hub.load('../input/yolov5-lib-ds', 
#     model = torch.hub.load('./yolov5-lib-ds',  
                       'custom', 
                       path=model_pth,
                       source='local',
                       force_reload=True)  # local repo
    if index == 0:
        model.conf = 0.28
    else:
        model.conf = 0.28
        
    model.iou = 0.7
        
    models.append(model)
    del model
    gc.collect()

In [None]:
CLASSIFIERS_PATHS = [
    '../input/cotsclassifiers/effnet-b3-spret-classification-model.pth',
#     '../input/cotsclassifiers/resnet152-padbbox-e27-sgd-v1.pth',
    '../input/cotsclassifiers/effnet-b3-padbbox-e19-sgd-v1.pth',
#     '../input/cotsclassifiers/eca-nfnet-f0-padbbox-e19-adam-v1.pth',
#     '../input/cotsclassifiers/nfnet-f0-haiku-padbbox-e11-sgd-agc-v3.pth',
]

In [None]:
def load_classifier(path):
    classification_model = torch.load(path)
    classification_model.to(device)
    classification_model.eval()
    print(f'loaded - {path}')
    
    return classification_model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

sys.path.append("../input/efficientnet-pytorch")
from efficientnet_pytorch import EfficientNet

%cd /kaggle/working

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
import timm

In [None]:
sys.path.append('../input/nfnets-pytorch/content/nfnets_pytorch')
from nfnets import pretrained_nfnet

In [None]:
cls_models = []

for cls_path in CLASSIFIERS_PATHS:
    cls_models.append(load_classifier(cls_path))

In [None]:
def show_img(img, bboxes, bbox_format='yolo'):
    names  = ['starfish']*len(bboxes)
    labels = [0]*len(bboxes)
    img    = draw_bboxes(img = img,
                           bboxes = bboxes, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = bbox_format,
                           line_thickness = 2)
    return Image.fromarray(img).resize((800, 400))

In [None]:
def voc2yolo(bboxes, image_height=720, image_width=1280):
    """
    voc  => [x1, y1, x2, y1]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]/ image_height
    
    w = bboxes[..., 2] - bboxes[..., 0]
    h = bboxes[..., 3] - bboxes[..., 1]
    
    bboxes[..., 0] = bboxes[..., 0] + w/2
    bboxes[..., 1] = bboxes[..., 1] + h/2
    bboxes[..., 2] = w
    bboxes[..., 3] = h
    
    return bboxes


def yolo2coco(bboxes, image_height=720, image_width=1280):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes


def voc2coco(bboxes, image_height=720, image_width=1280):
    bboxes  = voc2yolo(bboxes, image_height, image_width)
    bboxes  = yolo2coco(bboxes, image_height, image_width)
    return bboxes

In [None]:
def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness)
            
    elif bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')

    return image

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row


np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

In [None]:
def format_prediction(bboxes, confs):
    annot = ''
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
        annot = annot.strip(' ')
    return annot

In [None]:
def predict_wbf(models, img, size=3600, augment=False):
    height, width = img.shape[:2]
    
    boxes_list = []
    scores_list = []
    labels_list = []
    
    weights = [1]*len(models)
    iou_thr = 0.50

    for index, model in enumerate(models):
        
        if index == 0:
            results = model(img, size=9600, augment=augment)
        else:
            results = model(img, size=6400, augment=augment)
            
        preds   = results.pandas().xyxy[0]
        bboxes  = preds[['xmin','ymin','xmax','ymax']].values
        scores  = preds.confidence.values  
        boxes_list.append(bboxes)
        scores_list.append(scores)
        labels_list.append([0]*len(scores))
    
    bboxes, scores, bbclasses = weighted_boxes_fusion(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr, skip_box_thr=0.0)
    
    
    return bboxes, scores, bbclasses
    
    
#     if len(bboxes):
#         bboxes  = voc2coco(bboxes,height,width).astype(int)
#         confs = scores
#         return bboxes, confs
#     else:
#         return [],[]

In [None]:
# model = torch.hub.load('../input/yolov5-lib-ds', 
#                        'custom', 
#                        path='../input/yolov5cots/s6_3600_bs3_fit_hflip_ezaug_v1-LDM.pt',
#                        source='local',
#                        force_reload=True)  # local repo
# model.conf = 0.20

In [None]:
# for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
#     anno = ''
#     r = model(img, size=3600, augment=True)
#     if r.pandas().xyxy[0].shape[0] == 0:
#         anno = ''
#     else:
#         for idx, row in r.pandas().xyxy[0].iterrows():
#             if row.confidence > 0.15:
#                 anno += '{} {} {} {} {} '.format(row.confidence, int(row.xmin), int(row.ymin), int(row.xmax-row.xmin), int(row.ymax-row.ymin))
# #                 pred.append([row.confidence, row.xmin, row.ymin, row.xmax-row.xmin, row.ymax-row.ymin])
#     pred_df['annotations'] = anno.strip(' ')
#     env.predict(pred_df)

In [None]:
transforms = Compose([
#             Resize(256, 256), 
            Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), p=1), 
            ToTensorV2()
        ])

def crop_img_2_bbox(img, box):
    return img[int(box[1]):int(box[1]+box[3]), int(box[0]):int(box[0]+box[2])]

# def crop_img_2_bbox(img, box, crop_size):
    
#     origin = [box[0] + box[2]//2, box[1] + box[3]//2]
#     x_new = int(origin[0] - crop_size//2)
#     y_new = int(origin[1] - crop_size//2)
    
#     if x_new < 0:
#         x_new = 0
#     if x_new > 1280:
#         x_new = 1280
        
#     if y_new < 0:
#         y_new = 0
#     if y_new > 720:
#         y_new = 720
        
#     print(img.shape)
    
#     return img[
#         y_new:y_new + crop_size if y_new + crop_size < 720 else 720,
#         x_new:x_new + crop_size if x_new + crop_size < 1280 else 1280
#     ]


def classifier_predict(crop_sample, cls_models):
    crop_sample_01 = transforms(image=crop_sample)['image']
    crop_sample_01 = crop_sample_01.unsqueeze(0).to(device)
    
    crop_sample = np.flipud(crop_sample)
    crop_sample_02 = transforms(image=crop_sample)['image']
    crop_sample_02 = crop_sample_02.unsqueeze(0).to(device)

    results = []
#     softmax = torch.nn.Softmax(dim=0)
    with torch.no_grad():
        for cls_model in cls_models:
            result = cls_model(crop_sample_01)
#             print("->", softmax(result[0]))
            result = result.sigmoid()
            result = result[:, 1].item()
#             print(result)
            results.append(result)
            
            result = cls_model(crop_sample_02)
#             print("->", softmax(result[0]))
            result = result.sigmoid()
            result = result[:, 1].item()
#             result = result.argmax(dim=1)   # 0 - cots, 1 - not cots
#             result = abs(1 - result)        # 1 - cots, 0 - not cots
#             print(result)
            results.append(result)
    
    result = np.mean(results)

    return result

In [None]:
img = cv2.imread("../input/tensorflow-great-barrier-reef/train_images/video_0/100.jpg")[..., ::-1]

bboxes, confis, _ = predict_wbf(models, img, size=9000, augment=True)
bboxes, confis = np.array(bboxes), np.array(confis)
bboxes=bboxes[confis > 0.25]

bboxes[..., 2] = bboxes[..., 2] - bboxes[..., 0]
bboxes[..., 3] = bboxes[..., 3] - bboxes[..., 1]

display(show_img(img, bboxes, bbox_format='coco'))

padding = 0
temp_boxes = []
STARFISH_CONF = 0.3
for conf, box in zip(confis, bboxes):
    print(box)
    print(conf)

    # simple scale tta from classifier
#     if box[1] > 0.2 * 720: # 1/5 and lower
    crop_sample = crop_img_2_bbox(img, box)
    display(Image.fromarray(crop_sample).resize((256, 256)))
    result = classifier_predict(crop_sample, cls_models)

    print(f'[[  Result: {result}  ]]')
    if result < STARFISH_CONF: 
        print("\t-- not cots found -- ")
        continue
    print("\t-- cots found !!! -- ")

    temp_boxes.append(box)

print(temp_boxes)
# [343 464  81  64]


# small boxes shrink 5%
# t_boxes = []
# for box in temp_boxes:
#     box = resize_bbox(box, 500, -10, 'pad')
#     t_boxes.append(box)
# temp_boxes = np.array(t_boxes)

# print(bboxes, confis)
# display(show_img(img, temp_boxes, bbox_format='coco'))

In [None]:
# norfair dependencies
%cd /kaggle/input/norfair031py3/
!pip install commonmark-0.9.1-py2.py3-none-any.whl -f ./ --no-index
!pip install rich-9.13.0-py3-none-any.whl

!mkdir /kaggle/working/tmp
!cp -r /kaggle/input/norfair031py3/filterpy-1.4.5/filterpy-1.4.5/ /kaggle/working/tmp/
%cd /kaggle/working/tmp/filterpy-1.4.5/
!pip install .
!rm -rf /kaggle/working/tmp

# norfair
%cd /kaggle/input/norfair031py3/
!pip install norfair-0.3.1-py3-none-any.whl -f ./ --no-index

In [None]:
%cd /kaggle/working/

In [None]:
def resize_bbox(bbox, area_thr, shape_percent, tp='shrink'):
    area = bbox[2] * bbox[3] 
    
    # TODO: refactor this -- change condition check
    if tp == 'shrink':
        if area < area_thr:
            center_xy = [bbox[0] + bbox[2]//2, bbox[1] + bbox[3]//2]
            new_w = shape_percent * bbox[2] 
            new_h = shape_percent * bbox[3] 

            new_x = center_xy[0] - new_w//2
            new_y = center_xy[1] - new_h//2

            if new_x < 0:
                new_x = 0

            if new_x + new_w > 1280:
                new_w = 1280 - new_x

            if new_y < 0:
                new_y = 0

            if new_y + new_h > 720:
                new_h = 720 - new_y


            bbox = [
                new_x,
                new_y,
                new_w, 
                new_h
            ]
            
    if tp == 'pad':
        if area > area_thr:
            center_xy = [bbox[0] + bbox[2]//2, bbox[1] + bbox[3]//2]
            new_w = shape_percent + bbox[2] 
            new_h = shape_percent + bbox[3] 

            new_x = center_xy[0] - new_w//2
            new_y = center_xy[1] - new_h//2

            if new_x < 0:
                new_x = 0

            if new_x + new_w > 1280:
                new_w = 1280 - new_x

            if new_y < 0:
                new_y = 0

            if new_y + new_h > 720:
                new_h = 720 - new_y


            bbox = [
                new_x,
                new_y,
                new_w, 
                new_h
            ]
            
    
    
    return bbox

In [None]:
##############################################################
#                      Tracking helpers                      #
##############################################################

import numpy as np
from norfair import Detection, Tracker

# Helper to convert bbox in format [x_min, y_min, x_max, y_max, score] to norfair.Detection class
def to_norfair(detects, frame_id):
    result = []
    for x_min, y_min, x_max, y_max, score in detects:
        xc, yc = (x_min + x_max) / 2, (y_min + y_max) / 2
        w, h = x_max - x_min, y_max - y_min
        result.append(Detection(points=np.array([xc, yc]), scores=np.array([score]), data=np.array([w, h, frame_id])))
        
    return result

# Euclidean distance function to match detections on this frame with tracked_objects from previous frames
def euclidean_distance(detection, tracked_object):
    return np.linalg.norm(detection.points - tracked_object.estimate)
        

In [None]:
#######################################################
#                      Tracking                       #
#######################################################

# Tracker will update tracks based on detections from current frame
# Matching based on euclidean distance between bbox centers of detections 
# from current frame and tracked_objects based on previous frames
# You can check it's parameters in norfair docs
# https://github.com/tryolabs/norfair/blob/master/docs/README.md
tracker = Tracker(
    distance_function=euclidean_distance, 
    distance_threshold=30,
    hit_inertia_min=3,
    hit_inertia_max=6,
    initialization_delay=1,
)

# Save frame_id into detection to know which tracks have no detections on current frame
frame_id = 0
#######################################################

BBOX_THR = 0.2
STARFISH_CONF = 0.32

for idx, (img, sample_prediction_df) in enumerate(tqdm(iter_test)):
    anno = ''
    predictions = []
    detects = []
    
    height, width = img.shape[:2]            
    bboxes, confs, bbclasses  = predict_wbf(models, img, size=9000, augment=True)
    
    
    
    # small boxes shrink 5%
    t_boxes = []
    for box in bboxes:
        box = resize_bbox(box, 300, 0.95, 'shrink')
        t_boxes.append(box)
    bboxes = np.array(t_boxes)
    
#     t_boxes = []
#     for box in bboxes:
#         box = resize_bbox(box, 500, -10, 'pad')
#         t_boxes.append(box)
#     bboxes = np.array(t_boxes)
    
    
    tmp_bboxes = []
    for box, conf in  zip(bboxes, confs):
        if conf > 0.25:
            x_min = int(box[0])
            y_min = int(box[1])
            x_max = int(box[2])
            y_max = int(box[3])
            score = conf
            
            
            bbox_width = x_max - x_min
            bbox_height = y_max - y_min
            
            
            # CLASSIFIER 
            # classify the box crop if it is starfish 
            # ---- simple scale tta for classifier
            _box = [x_min, y_min, bbox_width, bbox_height]
            
            # Predict on lower part of the video | remove FP
            if _box[1] > 0.2 * 720: # 1/5 and lower
#             crop_sample = crop_img_2_bbox(img, _box)
#             result = classifier_predict(crop_sample)
        
#             if not (result):
#                 continue # skip not cots


                crop_sample = crop_img_2_bbox(img, _box)
                result = classifier_predict(crop_sample, cls_models)
                if result < STARFISH_CONF: 
                    continue

            detects.append([x_min, y_min, x_max, y_max, score])
            
            tmp_bboxes.append([x_min, y_min, bbox_width, bbox_height])
            predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
            
            

    if idx<3:
        print(confs)
        display(show_img(img, tmp_bboxes, bbox_format='coco'))
    
    
    #######################################################
    #                      Tracking                       #
    #######################################################
    
    # Update tracks using detects from current frame
    tracked_objects = tracker.update(detections=to_norfair(detects, frame_id))
    for tobj in tracked_objects:
        bbox_width, bbox_height, last_detected_frame_id = tobj.last_detection.data
        
        if last_detected_frame_id == frame_id:  # Skip objects that were detected on current frame
            continue
        
            
        # Add objects that have no detections on current frame to predictions
        xc, yc = tobj.estimate[0]
        x_min, y_min = int(round(xc - bbox_width / 2)), int(round(yc - bbox_height / 2))
        score = tobj.last_detection.scores[0]

        predictions.append('{:.2f} {} {} {} {}'.format(score, x_min, y_min, bbox_width, bbox_height))
    #######################################################
    
    prediction_str = ' '.join(predictions)
    sample_prediction_df['annotations'] = prediction_str
    env.predict(sample_prediction_df)

#     print('Prediction:', prediction_str)
    frame_id += 1

In [None]:
sub_df = pd.read_csv('submission.csv')
sub_df.head()