# Prepare envirment

In [None]:
import greatbarrierreef

env = greatbarrierreef.make_env()   # initialize the environment

In [None]:
iter_test = env.iter_test()  

# Slices data

In [None]:
import os
import sys
import re
import random
import numpy as np
import pandas as pd
import copy

import cv2
import matplotlib.pyplot as plt

import torch
import torchvision

In [None]:
# just for visual test
def image_tiler(img, s_h=180, s_w=320):
    stride_h = int(s_h/2)
    stride_w = int(s_w/2)
    tiles = np.array([img[x:x+s_h,y:y+s_w] for x in range(0,img.shape[0]-stride_h,stride_h) for y in range(0,img.shape[1]-stride_w,stride_w)])    
    return tiles

# EfficientNet classification

In [None]:
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import timm
from timm.models.efficientnet import *

In [None]:
class Net(nn.Module):    
    def __init__(self):
        super(Net, self).__init__()

        e = tf_efficientnetv2_s(pretrained=False, drop_path_rate=0.2)
        
        self.b0 = nn.Sequential(
            e.conv_stem,
            e.bn1,
            e.act1,
        )
        self.b1 = e.blocks[0]
        self.b2 = e.blocks[1]
        self.b3 = e.blocks[2]
        self.b4 = e.blocks[3]
        self.b5 = e.blocks[4]
        self.b6 = e.blocks[5]
        
        self.b7 = nn.Sequential(
            e.conv_head, #384, 1536
            e.bn2,
            e.act2,
        )

        self.logit = nn.Linear(1280, 2)
        
        self.mask = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, kernel_size=1, padding=0),
        )

    # @torch.cuda.amp.autocast()
    def forward(self, image):        
        batch_size = len(image)
        x = 2*image-1
        x = self.b0(x) ; #print (x.shape)  # torch.Size([2, 32, 256, 256])
        x = self.b1(x) ; #print (x.shape)  # torch.Size([2, 32, 256, 256])
        x = self.b2(x) ; #print (x.shape)  # torch.Size([2, 56, 128, 128])
        x = self.b3(x) ; #print (x.shape)  # torch.Size([2, 64, 23, 40])
        
        mask = self.mask(x)
        
        x = self.b4(x) ; #print (x.shape)  # torch.Size([2, 128, 12, 20])        
        x = self.b5(x) ; #print (x.shape)  # torch.Size([2, 192, 32, 32])
        x = self.b6(x) ; #print (x.shape)  # torch.Size([2, 328, 16, 16])        
        x = self.b7(x) ; #print (x.shape)  # torch.Size([2, 2152, 16, 16])
        x = F.adaptive_avg_pool2d(x,1).reshape(batch_size,-1)
        x = F.dropout(x, 0.5, training=self.training)
        logit = self.logit(x)
        return logit, mask

class AmpNet(Net):
    @torch.cuda.amp.autocast()
    def forward(self,*args):
        return super(AmpNet, self).forward(*args)

In [None]:
import torch.cuda.amp as amp
    
scaler = amp.GradScaler()
net = AmpNet().cuda()

# todo check point
initial_checkpoint = '../input/yolov5data/00037020_model.pth'
net.load_state_dict(torch.load(initial_checkpoint)['state_dict'], strict=True)

In [None]:
def efficinetPredict(batch):
    # predict on batch 
    net.eval()
    with torch.no_grad():
        logit, mask = net(batch)    
        logit = F.softmax(logit, -1)
        res = logit[:, 1] > 0.7
    return res

# Yolo Detection

In [None]:
def load_model(ckpt_path, conf=0.25, iou=0.40):
    model = torch.hub.load('/kaggle/input/yolov5-code',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=False)  # local repo
    model.conf = conf  # NMS confidence threshold
    model.iou  = iou  # NMS IoU threshold
    #model.classes = None   # (optional list) filter by class, i.e. = [0, 15, 16] for persons, cats and dogs
    #model.multi_label = False  # NMS multiple labels per box
    #model.max_det = 20  # maximum number of detections per image
    return model

In [None]:
def predict(model, img, size=640, augment=False):
    #height, width = img.shape[:2]
    results = model(img, size=size, augment=augment)  # custom inference size
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes):
        #bboxes  = voc2coco(bboxes,height,width).astype(int)
        confs   = preds.confidence.values
        return bboxes, confs
    else:
        return [],[]

In [None]:
!mkdir -p /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/

In [None]:
CKPT_PATH = '../input/yolov5data/best.pt'
CONF = 0.25
IOU = 0.40
model = load_model(CKPT_PATH, conf=CONF, iou=IOU)

def yolov5(slice):
    
    # predict on single slices
    bboxes, confs  = predict(model, slice)

    # return p x y w h 
    return bboxes, confs

In [None]:
def yoloPredict(slicesBatch, indexBatch):    
    x = indexBatch%7
    y = torch.div(indexBatch, 7, rounding_mode='floor')
    _offsetW = 160 * x
    _offsetH = 90 * y
    #print(x, y, _offsetH, _offsetW)
    predictions_list = torch.empty((0,4), dtype=torch.float32)
    scores_list = torch.empty((0), dtype=torch.float32)
    for i in range(len(slicesBatch)):
        bboxs, scores = yolov5(slicesBatch[i])
        #bboxs, scores = yolov5(slicesBatch[i][:, :, ::-1])
        
        #img_show = cv2.cvtColor(slicesBatch[i], cv2.COLOR_BGR2RGB)
        #plt.figure(figsize=(20, 16))
        #plt.imshow(img_show)
        
        for bbox in bboxs:
            # p x y w h 
            bbox[0] += int(_offsetW[i])
            bbox[1] += int(_offsetH[i])
            bbox[2] += int(_offsetW[i])
            bbox[3] += int(_offsetH[i])
            predictions_list = torch.cat((predictions_list, torch.tensor([bbox], dtype=torch.float32)), 0)
        scores_list = torch.cat((scores_list, torch.tensor(scores, dtype=torch.float32)), 0)
    # post process
    post_predictions = postProcess(predictions_list, scores_list)  
    return post_predictions

In [None]:
def postProcess(bboxes, scores):
    # nms 
    _index = torchvision.ops.nms(bboxes, scores, 0.4) # NMS    
    predictions = []    
    for i in _index:
        _bbox = bboxes[i]
        _score = scores[i]
        #predictions.append('{:.2f} {} {} {} {}'.format(_score, int(_bbox[0]), int(_bbox[1]), int(_bbox[2]-_bbox[0]), int(_bbox[3]-_bbox[1])))
        predictions.append([float(_score), int(_bbox[0]), int(_bbox[1]), int(_bbox[2]-_bbox[0]), int(_bbox[3]-_bbox[1])])
    return predictions

In [None]:
%matplotlib inline

In [None]:
# quick test
# detect = yoloPredict(_activeSlice, (batch_active == True).nonzero())

DEBUG = False

if DEBUG:
    #test_img = "../input/tensorflow-great-barrier-reef/train_images/video_2/5742.jpg"
    #img = cv2.imread(test_img)
    img = copy.deepcopy(pixel_array)
    img_show = img
    #img_show = img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #img_show = img[:, :, ::-1]
    plt.figure(figsize=(20, 16))
   

    tiles = image_tiler(img)
    
    batch = np.stack(copy.deepcopy(tiles)[:, :, :, ::-1])
    batch = batch.transpose((0, 3, 1, 2))
    batch = np.ascontiguousarray(batch)
    batch = batch.astype(np.float16) / 255
    
    # predict batch tile 
    batch = torch.from_numpy(batch)
    batch = batch.cuda()
    
    batch_active = efficinetPredict(batch)
    #print((batch_active == True).nonzero())
    # [TODO] need rewrite to 
    _activeSlice = tiles[batch_active.cpu()]
    _list = (batch_active == True).nonzero().cpu().numpy()
    #print(_list)
    for s in _list:
        x = s%7
        y = s// 7
        _offsetW = 160 * x
        _offsetH = 90 * y
        
        #print(_offsetW, _offsetH)
        _offsetW = _offsetW[0]
        _offsetH = _offsetH[0]
        cv2.rectangle(img_show, (_offsetW, _offsetH), (_offsetW+320, _offsetH+180), (0, 255, 0), 2)
            
    
    if len(_activeSlice) > 0:
        detect = yoloPredict(_activeSlice, (batch_active == True).nonzero())
        print(detect)
        for d in detect:
            cv2.rectangle(img_show, (d[1], d[2]), (d[1]+d[3], d[2] + d[4]), (255, 0, 0), 2)
        prediction_str = ' '.join(['{:.2f} {} {} {} {}'.format(*i) for i in detect])
        print(prediction_str)
    
    plt.imshow(img_show)

# Prediction

In [None]:
# efficientnet is training use bgr but yolo is useing rgb
# pixel_array is rgb 
for (pixel_array, sample_prediction_df) in iter_test:
    tiles = image_tiler(pixel_array)
    
    # convert to bgr [TODO] keep efficientnet useing same rgb rather than bgr
    batch = np.stack(copy.deepcopy(tiles)[:, :, :, ::-1])
    
    batch = batch.transpose((0, 3, 1, 2))
    batch = np.ascontiguousarray(batch)
    batch = batch.astype(np.float16) / 255
    
    # predict batch tile 
    batch = torch.from_numpy(batch)
    batch = batch.cuda()
    
    batch_active = efficinetPredict(batch)
    
    # [TODO] need rewrite to use tensor
    _activeSlice = tiles[batch_active.cpu()]

    if len(_activeSlice) > 0:
        detect = yoloPredict(_activeSlice, (batch_active == True).nonzero())
    else:
        detect = []
        
    prediction_str = ' '.join(['{:.2f} {} {} {} {}'.format(*i) for i in detect])
    sample_prediction_df['annotations'] = prediction_str #'0.3 0 0 50 50 0.5 10 10 30 30'  #  p x y w h 
    env.predict(sample_prediction_df)

sub_df = pd.read_csv('submission.csv')
sub_df.head()