In [None]:
import torch
import cv2
import sys
from PIL import Image,ImageDraw
from torchvision import transforms
from matplotlib import pyplot as plt
import os
from tqdm.notebook import tqdm
sys.path.insert(1, '../input/lanetmodelzip/LANet')

DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
!mkdir -p /root/.config/Ultralytics
!cp ../input/yolov5-font/Arial.ttf /root/.config/Ultralytics/
%matplotlib inline

## LANet Utils

In [None]:
transform_list = [transforms.Resize((180, 1280)), transforms.ToTensor()]
tsfm = transforms.Compose(transform_list)

def predict_LANet(LANet,img_original):    
    #proccess images
    y_blocks = 4
    H = 720
    W = 1280
    
    crp_img = []
    for y in range(y_blocks):
        left = 0
        top = int(y * (H/y_blocks))
        right = W
        bottom = int((y+1) * (H/y_blocks))
        
        # Cropped image of above dimension
        # (It will not change original image)
        img_crop = img_original.crop((left, top, right, bottom))

        image = tsfm(img_crop)
        tensor_image = image.unsqueeze(0).to(DEVICE)
        
        result = LANet(tensor_image)[0].cpu().detach().numpy()[0].swapaxes(0,2)

        result = cv2.flip(result, 1)
        result = cv2.rotate(result, cv2.ROTATE_90_COUNTERCLOCKWISE)

        crp_img.append(result)

    #concat the images on the y axis to restore the orginal size
    final = cv2.vconcat([crp_img[i] for i in range(y_blocks)])
    
    #fig = plt.subplot()
    #plt.imshow(final)
    #plt.show()
    
    final_normalized = ((final - final.min())/(final.max()-final.min()) * 255).astype('uint8')
    
    #fig = plt.subplot()
    #plt.imshow(final_normalized)
    #plt.show()
    
    #print(final_normalized.min(),final_normalized.max())
    
    #final = cv2.cvtColor(final, cv2.COLOR_BGR2RGB)
    
    return final_normalized

## Yolo Utils

In [None]:
def predict_yolo(model, img, size=1280, augment=False):
    height, width = img.shape[:2]
    results = model(img)#, size=size, augment=augment)  # custom inference size
    preds   = results.pandas().xyxy[0]
    if len(preds):
        predictions=preds.apply(lambda p:(p['confidence'],[p["xmin"],p["ymin"],p["xmax"]-p["xmin"],p["ymax"]-p["ymin"]]),axis=1)
        return predictions.tolist()
    else:
        return []

def load_yolo(ckpt_path, conf=0.15, iou=0.50):
    model = torch.hub.load('../input/yolov5',
                           'custom',
                           path=ckpt_path,
                           source='local',
                           force_reload=True)  # local repo
    model.conf = conf  # NMS confidence threshold
    model.iou  = iou  # NMS IoU threshold
    model.classes = None   # (optional list) filter by class, i.e. = [0, 15, 16] for persons, cats and dogs
    model.multi_label = False  # NMS multiple labels per box
    model.max_det = 1000  # maximum number of detections per image
    return model

In [None]:
def predict(img,LANet,YOLO):
    img = predict_LANet(LANet,img)
    
    predictions = predict_yolo(YOLO, img, size=1280, augment=False)
    
    return predictions

## Test with train images

In [None]:
LANet = torch.load("../input/lanetmodelzip/LANet/checkpoints/model_epoch_40.pk")["model"].to(DEVICE)
_=LANet.eval()

YOLO = load_yolo("../input/yolov5s6/best.pt").to(DEVICE)
_=YOLO.eval()

In [None]:
import pandas as pd
df = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
df = df[(df["annotations"].astype(str)!='[]') & (df["video_id"]==2)]

folder_path = "../input/tensorflow-great-barrier-reef/train_images/video_2/"

for frame_id in tqdm(df["video_frame"]):
    print(predict(Image.open(f'{folder_path}{frame_id}.jpg'),LANet,YOLO))
    break

## Infer

In [None]:
def format_prediction(predictions):
    annot = ''
    
    for conf,bbox in predictions:
        annot += f'{conf} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]} '
        
    return annot.strip()

def show_img(img, predictions):
    draw = ImageDraw.Draw(img)
    
    for p in predictions:
        anot,bboxes = p
        draw.rectangle([(bb[0],bb[1]),(bb[0]+bb[2],bb[1]+bb[3]),],outline = "red")
    
    return img

In [None]:
import greatbarrierreef
env = greatbarrierreef.make_env()# initialize the environment
iter_test = env.iter_test()      # an iterator which loops over the test set and sample submission

In [None]:
for idx, (img, pred_df) in enumerate(tqdm(iter_test)):
    img = Image.fromarray(img)
    predictions = predict(img,LANet,YOLO)
    annot          = format_prediction(predictions)
    print(annot)
    pred_df['annotations'] = annot
    env.predict(pred_df)
    #if idx<3:
        #display(show_img(img, predictions))