In [None]:
import sys
sys.path.append('../input/tfcots-ext')

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from yolox.models import *
from yolox.my_yolox_head import *
from yolox.models.boxes import postprocess

#https://www.kaggle.com/remekkinas/yolox-training-pipeline-cots-dataset-lb-0-507
import greatbarrierreef

is_cuda = True

In [None]:
# model definition
configure={
    'yolo-s': {
        'depth' : 0.33,
        'width' : 0.50,
        'in_channels': [256, 512, 1024],
        #'pretrain_file': root_dir + '/code/reference-00/YOLOX-main/weights/yolox_s.pth'
    },
    'yolo-x': {
        'depth' : 1.33,
        'width' : 1.25,
        'in_channels': [256, 512, 1024],
        #'pretrain_file': root_dir + '/code/reference-00/YOLOX-main/weights/yolox_x.pth'
    },
}
arch = 'yolo-s'


class Net(nn.Module):
    def __init__(self,arch=arch):
        super().__init__()
        depth = configure[arch]['depth']
        width = configure[arch]['width']
        in_channels = configure[arch]['in_channels']
        self.output_mode = 'none'

        self.backbone = YOLOPAFPN(
            depth=depth,
            width=width,
            in_features=('dark3', 'dark4', 'dark5'),
            in_channels=in_channels,
            depthwise=False,
            act='silu',
        )
        self.head = MyYOLOXHead(
            num_class=1,
            width=width,
            in_channel=in_channels,
            act='silu',
        )

        if 1:
            for m in self.modules():
                if isinstance(m, nn.BatchNorm2d):
                    m.eps = 1e-3
                    m.momentum = 0.03

        if 0:
            pretrain_file =  configure[arch]['pretrain_file'] #root_dir + '/code/reference-00/YOLOX-main/weights/yolox_s.pth'
            state_dict = torch.load(pretrain_file, map_location=lambda storage, loc: storage)['model']
            for k in list(state_dict.keys()):
                if any(i in k for i in ['head.cls_preds', ]): del state_dict[k]
            self.load_state_dict(state_dict, strict=False)


    def forward(self, x, target=None):
        # fpn output content features of [dark3, dark4, dark5]
        feature = self.backbone(x)
        predict = self.head(feature)

        if  self.output_mode == 'none':
            return predict

        if  self.output_mode == 'loss':
            loss = self.head.predict_to_loss(predict, target)
            return loss

        if  self.output_mode == 'inference':
            predict = self.head.predict_to_inference(predict)
            return predict
        

In [None]:
checkpoint = '../input/tfcots-ext/yolox-s-1440x2560-aug3-fold0-00005250.model.pth'

net = Net(arch)
f = torch.load(checkpoint, map_location=lambda storage, loc: storage)
net.load_state_dict(f['state_dict'], strict=True)
net = net.eval() 
if is_cuda: net = net.cuda() 

In [None]:
inference_size =  (1440,2560)
confthre    = 0.40
nmsthre     = 0.45

def resize_with_pad(image, size=inference_size):
    height,width = image.shape[:2]
    h,w = size

    if h==height and w==width:
        pad_image = image
    else:
        pad_image = np.full((h,w, 3), fill_value=0, dtype=np.uint8)
        r = min(h/height, w/width)
        h,w =int(height * r), int(width * r)
        if r==1:
            small = image
        else:
            small = cv2.resize( image, (w, h), interpolation=cv2.INTER_LINEAR,)
        pad_image[:h, :w] = small

    return pad_image

def image_to_tensor(image):
    image = resize_with_pad(image)
    image = np.ascontiguousarray(image.transpose(2,0,1))
    x = torch.from_numpy(image).unsqueeze(0)
    x = x.float() #/255
    return x

def yolox_inference(image, net):
    x = image_to_tensor(image) 
    if is_cuda: x = x.cuda() 
    
    net.eval()
    net.output_mode = 'inference'
    with torch.no_grad():
        #predict = net(x)
        #predict = net.head.predict_to_inference(predict)
        predict = net(x)
        predict = postprocess( predict, 1, confthre, nmsthre, class_agnostic=True )

    if predict[0] is None:
        p_score  = np.zeros((0), np.float32)
        p_label  = np.zeros((0), np.int32)
        p_bbox   = np.zeros((0, 4), np.int32)
        
    else: 
        predict = predict[0].data.cpu().numpy() 
        bbox  = predict[:, 0:4]
        score = predict[:, 4] * predict[:, 5]
        label = predict[:, 5]

        bbox /= min(x.shape[2] / image.shape[0], x.shape[3] / image.shape[1])
        bbox[:, 2:] = bbox[:, 2:] - bbox[:, :2]  # x,y,w,h format
        
        p_bbox  = np.round(bbox).astype(np.int32)
        p_score = score
        p_label = label.astype(np.int32)
        
    return p_bbox, p_score, p_label



In [None]:
if 1:
    env = greatbarrierreef.make_env()   # initialize the environment
    iter_test = env.iter_test()  

    for (image, sample_df) in iter_test:

        # change to opencv BGR
        image = np.ascontiguousarray(image[:,:,::-1])

        if 0:
            print(image.shape)
            print(sample_df)

            fig = plt.figure(figsize=(24, 24))
            plt.imshow(image[:,:,::-1])
            #break


        if 1:  
            p_bbox, p_score, p_label = yolox_inference(image, net)

            prediction = ''
            for i in range(len(p_bbox)):
                x, y, w, h = p_bbox[i] 
                score = p_score[i]
                if score < confthre:
                    continue

                prediction += ' %0.8f %d %d %d %d'%(score, x, y, w, h)  
            sample_df['annotations'] = prediction

        env.predict(sample_df) 
        print('prediction:', sample_df)


    #submit_df = pd.read_csv('submission.csv')
    #submit_df.head()

In [None]:
#check function

def run_check_net():
    batch_size = 4
    C, H, W = 3, 736, 1280
    image = torch.randn(batch_size, C, H, W)

    net = Net()
    net.output_mode = 'inference'
    predict = net(image)

    print('image ', image.shape)
    print('predict ', predict.shape)
    

def run_check_detect_one():
    #ground truth
    annotation = [{'x': 515, 'y': 511, 'width': 71, 'height': 68}, {'x': 613, 'y': 364, 'width': 55, 'height': 51}, {'x': 666, 'y': 300, 'width': 57, 'height': 50}]
    t_bbox = [list(a.values()) for a in annotation]
    
    image_file = '../input/tensorflow-great-barrier-reef/train_images/video_1/6876.jpg'
    image = cv2.imread(image_file, cv2.IMREAD_COLOR)
    
    
   
    p_bbox, p_score, p_label = yolox_inference(image, net) 
    
    for i, (x, y, w, h) in enumerate(t_bbox):
        x = int(round(x))
        y = int(round(y))
        w = int(round(w))
        h = int(round(h))  
        cv2.rectangle(image, (x, y), (x + w, y + h), (225, 225, 225), 6)
        
    for i, (x, y, w, h) in enumerate(p_bbox):
        x = int(round(x))
        y = int(round(y))
        w = int(round(w))
        h = int(round(h)) 
        s = int(p_score[i]*255)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, s), 3)
        
    
    fig = plt.figure(figsize=(24, 24))
    plt.imshow(image[:,:,::-1])
    
    print(p_bbox)
    print(p_score)
    
    
    
    
#-------------------------------------------
#run_check_net()
#run_check_detect_one()