##  Notebooks: Yolov4 pour prédire les COTS


#### Ce notebook a pour objectif de présenter une application de Yolov4 avec Darknet (voir https://www.kaggle.com/gimarcecaml/build-darknet-yolo4 ) à la compétition Great Barrier Reef. 
#### Utilisation de yolov4-tiny.conv.29


#### Kernel de référence:
https://www.kaggle.com/gimarcecaml/cots-det-yolov4-darknet-install-train-infer

In [None]:
!apt update
!apt install --yes python-opencv
!apt install --yes libopencv-dev
!/bin/bash -c 'echo "/opt/conda/lib/" > /etc/ld.so.conf.d/opencv.conf'
!ldconfig
!pip install imagesize

In [None]:
import pandas as pd
import os
import pickle
import matplotlib.pyplot as plt
import ast
import glob
import shutil
import sys
import numpy as np
import imagesize
import cv2
from tqdm.notebook import tqdm
from typing import List
import torch
from torchvision.ops import box_iou
from typing import List
import torch
from torchvision.ops import box_iou
import warnings
warnings.filterwarnings('ignore')

# Installation de darknet

In [None]:
!git clone https://github.com/AlexeyAB/darknet.git

%cd darknet

!cp '../../input/libcuda/libcuda.so' .

!sed -i 's/OPENCV=0/OPENCV=1/g' Makefile
!sed -i 's/GPU=0/GPU=1/g' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/g' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/g' Makefile
!sed -i 's/LIBSO=0/LIBSO=1/' Makefile
!sed -i "s/ARCH= -gencode arch=compute_60,code=sm_60/ARCH= ${ARCH_VALUE}/g" Makefile

!sed -i 's/LDFLAGS+= -L\/usr\/local\/cuda\/lib64 -lcuda -lcudart -lcublas -lcurand/LDFLAGS+= -L\/usr\/local\/cuda\/lib64 -lcudart -lcublas -lcurand -L\/kaggle\/working\/darknet -lcuda/' Makefile
!make &> compile.log

In [None]:
# Voir si aucune erreur n'a été détecté 
!tail compile.log

In [None]:
# Voir si  on peut effectivement utiliser les commandes Darknet
!./darknet detector train

# Préparation des données 

In [None]:
ROOT_DIR  = '/kaggle/input'
WORKING_DIR  = '/kaggle/working'
def get_path(row):
    row['image_path'] = f'{ROOT_DIR}/tensorflow-great-barrier-reef/train_images/video_{row.video_id}/{row.video_frame}.jpg'
    row['label_path'] = f'{WORKING_DIR}/darknet/data/obj/video_{row.video_id}_{row.video_frame}.txt'
    return row

In [None]:
df = pd.read_csv(f'{ROOT_DIR}/tensorflow-great-barrier-reef/train.csv')
df = df.apply(get_path, axis=1)
df['annotations'] = df['annotations'].apply(lambda x: ast.literal_eval(x))
display(df.head(2))

In [None]:
df['num_bbox'] = df['annotations'].apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts()/len(df)*100
print('% images without annotations: {}'.format(data[0]))
print('% images with  annotations: {} '.format(data[1]))

In [None]:
df

In [None]:
# Remove data without boxes
df = df.query("num_bbox>0")

In [None]:
df.shape

In [None]:
# Conversion des annotations entre coco et yolo
def coco2yolo(image_height, image_width, bboxes):
    """
    coco => [xmin, ymin, w, h]
    yolo => [xmid, ymid, w, h] (normalized)
    """
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # normalizinig
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]/ image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]/ image_height
    
    # converstion (xmin, ymin) => (xmid, ymid)
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]/2
    
    return bboxes

def yolo2coco(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    coco => [xmin, ymin, w, h]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    # denormalizing
    bboxes[..., [0, 2]]= bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]]= bboxes[..., [1, 3]]* image_height
    
    # converstion (xmid, ymid) => (xmin, ymin) 
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    
    return bboxes

def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None):
    # Plots one bounding box on image img
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo', class_name = False, line_thickness = 2):  
     
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'yolo':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:
            
                x1 = round(float(bbox[0])*image.shape[1])
                y1 = round(float(bbox[1])*image.shape[0])
                w  = round(float(bbox[2])*image.shape[1]/2) #w/2 
                h  = round(float(bbox[3])*image.shape[0]/2)

                voc_bbox = (x1-w, y1-h, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(get_label(cls)),
                             line_thickness = line_thickness)
    else:
        raise ValueError('wrong bbox format')

    return image

def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

def get_imgsize(row):
    row['width'], row['height'] = imagesize.get(row['image_path'])
    return row

In [None]:
df['bboxes'] = df.annotations.apply(get_bbox)
df = df.apply(get_imgsize,axis=1)
display(df.width.unique(), df.height.unique())
display(df.head(2))

In [None]:
#df=df.sample(frac=0.05)

In [None]:
df.shape

# Structure du working 

We need to have the following dir structure according to [YOLOv4 tutorial](https://colab.research.google.com/drive/1_GdoqCJWXsChrOiY8sZMr_zbr_fH-0Fg#scrollTo=POozxsvFdXTu)
```
/Kaggle/working/darknet
    /data
         /obj
             /video_X_XXX.jpg
             /video_X_XXX.txt
         /test
             /video_X_XXX.jpg
             /video_X_XXX.txt
         /train.txt
         /test.txt
        /obj.data
        /obj.names
    /cfg
        /yolov4-custom.cfg
```
- `video_X_XXX.txt`: contains the YOLO normalized annotations (one per line)
- `train(test).txt`: contains the filenames of the images `data/obj(test)/video_X_XXX.jpg`
- `yolov4-custom.cfg`: YOLO config provided by darknet. We updated some values accordingly for this challenge.

In [None]:
%cd data/
!mkdir obj test

cnt = 0
for row_idx in tqdm(range(df.shape[0])):
    row = df.iloc[row_idx]
    image_height = row.height
    image_width = row.width
    bboxes_coco = np.asarray(row.bboxes).astype(np.float32).copy()
    num_bbox = len(bboxes_coco)
    labels = [0]*num_bbox
  
    f = open(row.label_path, 'w')

    if num_bbox < 1:
        annot = ''
        f.write(annot)
        f.close()
        cnt += 1
        continue
  
    bboxes_yolo  = coco2yolo(image_height, image_width, bboxes_coco)

    for i in range(len(bboxes_yolo)):
        annot = [str(labels[i])] + list(bboxes_yolo[i].astype(str)) + (['\n'] if num_bbox!=(i+1) else [''])
        annot = ' '.join(annot)
        annot = annot.strip(' ')
        f.write(annot)
    f.close()

print('Missing boxes ', cnt)

In [None]:
df.head()

In [None]:
!cat obj/video_0_9828.txt

# Split dataset

In [None]:
from sklearn.model_selection import GroupKFold
kf = GroupKFold(n_splits = 5) 
df = df.reset_index(drop=True)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(kf.split(df, y = df.video_id.tolist(), groups=df.sequence)):
    df.loc[val_idx, 'fold'] = fold
display(df.fold.value_counts())

In [None]:
val_df = df[df['fold']==2]
train_df = df[df['fold']!=2]
print(train_df.shape)
print(val_df.shape)

In [None]:
# # Move labels from obj/ to test/ directory
def mv_labels (row):
    old_path = row.label_path
    filename = row.label_path.split('/')[-1]
    new_path = '/'.join(row.label_path.split('/')[:-2]) + '/test/' + filename
    row['label_path'] = new_path
    shutil.move(old_path, new_path)
    return row

val_df= val_df.apply(lambda x: mv_labels(x), axis=1)
val_df.head(2)

In [None]:
# Copy images to working dir
'''
Labels and images must have the same name:
Images: obj/image_XX.jpg
Labels: obj/image_XX.txt
'''
def copy_images (row):
    old_path = row.image_path
    filename = row.label_path.split('/')[-1][:-4] + '.jpg'
    new_path = '/'.join(row.label_path.split('/')[:-1]) + '/' + filename
    shutil.copy(old_path, new_path)
val_df.apply(lambda x: copy_images(x), axis=1)
train_df.apply(lambda x: copy_images(x), axis=1)

In [None]:
!ls obj/*.jpg | wc -l
!ls obj/*.txt | wc -l
!ls test/*.jpg | wc -l
!ls test/*.txt | wc -l

In [None]:
# Generate train.txt and test.txt
%cd ../
train_images = glob.glob('data/obj/*.jpg')
f = open('./data/train.txt', 'w')
annot = [os.path.join(os.getcwd(),t) + ('\n' if i<len(train_images)-1 else '') for i, t in enumerate(train_images)]
annot = ''.join(annot)
annot = annot.strip()
f.write(annot)

val_images = glob.glob('data/test/*.jpg')
f = open('./data/test.txt', 'w')  
annot = [os.path.join(os.getcwd(),t) + ('\n' if i<len(val_images)-1 else '') for i, t in enumerate(val_images)]
annot = ''.join(annot)
annot = annot.strip()
f.write(annot)

In [None]:
!cat data/train.txt | wc -l
!cat data/test.txt | wc -l

In [None]:
val_df.shape

# Visualization

In [None]:
np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

df2 = train_df[(train_df.num_bbox>0)].sample(100) # takes samples with bbox

for idx in range(10):
    row = df2.iloc[idx]
    img           = load_image(row.image_path)
    image_height  = row.height
    image_width   = row.width
    f = open(row.label_path)
    bboxes_yolo = np.asarray([[float(a) for a in l[1:].strip().split(' ')] for l in f.readlines()])

    names         = ['starfish']*len(bboxes_yolo)
    labels        = [0]*len(bboxes_yolo)

    plt.figure(figsize = (12, 8))
    plt.imshow(draw_bboxes(img = img,
                           bboxes = bboxes_yolo, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = 'yolo',
                           line_thickness = 2))
    plt.axis('OFF')
    plt.show()

# configuration des fichiers

On modifie `yolov4-custom.cfg` de darknet pour prédire une seule classe d'éléments (les étoiles de mer)

In [None]:
# Adapt yolov4-custom.cfg to one-class model
# If subdivisions=16 runs into memory issues use 32, otherwise 16 is the optimal
!sed -i 's/subdivisions=16/subdivisions=4/g' ./cfg/yolov4-custom.cfg
# To avoid memory issues with downsized image size from 608 to 416. 
!sed -i 's/width=608/width=160/g' ./cfg/yolov4-custom.cfg
!sed -i 's/height=608/height=160/g' ./cfg/yolov4-custom.cfg

# Make the rest of the changes to the cfg based on how many classes you are training your detector on.
'''
height = 416 (these can be any multiple of 32, 416 is standard, you can sometimes 
improve results by making value larger like 608 but will slow down training)

max_batches = (# of classes) * 2000 (but no less than 6000 so if you are training 
for 1, 2, or 3 classes it will be 6000, however detector for 5 classes would have max_batches=10000)

steps = (80% of max_batches), (90% of max_batches) 
(so if your max_batches = 10000, then steps = 8000, 9000)

filters = (# of classes + 5) * 3 (so if you are training for one class then your 
filters = 18, but if you are training for 4 classes then your filters = 27)
'''
!sed -i 's/max_batches = 500500/max_batches = 1000/g' ./cfg/yolov4-custom.cfg
!sed -i 's/steps=400000,450000/steps=800,900/g' ./cfg/yolov4-custom.cfg
!sed -i 's/classes=80/classes=1/g' ./cfg/yolov4-custom.cfg
!sed -i 's/filters=255/filters=18/g' ./cfg/yolov4-custom.cfg

# Let's build obj.data and obj.names needed by darknet
f = open('./data/obj.data', 'w')
f.write('classes = 1\ntrain = data/train.txt\nvalid = data/test.txt\nnames = data/obj.names\nbackup = backup\n')
f.close()
f = open('./data/obj.names', 'w')
f.write('starfish')
f.close()

#  Train à partir d'un modèle pré-entraîné 

In [None]:
# On utilise le modèle pré-entrainé yolod-tiny.con.29 qui possède 29 couches
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29


In [None]:
# Entraînemet avec la commande train 
#%cd darknet
!./darknet detector train data/obj.data cfg/yolov4-custom.cfg yolov4-tiny.conv.29 -dont_show -map | tee output.log


In [None]:
# on vérifie que les entraînements ont bien été enregistré 
!ls backup

#  Inference

In [None]:
# need to set our custom cfg to test mode 
%cd cfg
!sed -i 's/batch=64/batch=1/' yolov4-custom.cfg
!sed -i 's/subdivisions=16/subdivisions=1/' yolov4-custom.cfg
%cd ..

In [None]:
test_image = './data/obj/video_0_9670.jpg'
# ext_output 
!./darknet detector test data/obj.data cfg/yolov4-custom.cfg backup/yolov4-custom_last.weights {test_image} -thresh 0.1 -ext_output -dont_show

In [None]:
from PIL import Image

np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

IMAGEPATH = test_image

def change_path(row):
    filename = row.image_path.split('/')[-1]
    videoname = row.image_path.split('/')[-2]
    return os.path.join('./data/obj', videoname + '_' + filename)

tmp_df = train_df.copy()
tmp_df['image_path'] = tmp_df.apply(lambda x: change_path(x), 1)

df2 = tmp_df[(tmp_df.image_path==IMAGEPATH)]
row = df2.iloc[0]
img           = load_image(row.image_path)
image_height  = row.height
image_width   = row.width
f = open(row.image_path[:-4] + '.txt')
bboxes_yolo = np.asarray([[float(a) for a in l[1:].strip().split(' ')] for l in f.readlines()])

names         = ['starfish']*len(bboxes_yolo)
labels        = [0]*len(bboxes_yolo)


# 2. Plot in same line, on two rows
plt.figure(figsize = (19, 8))
plt.subplot(1, 2, 1)

plt.imshow(draw_bboxes(img = img,
                      bboxes = bboxes_yolo, 
                      classes = names,
                      class_ids = labels,
                      class_name = True, 
                      bbox_format = 'yolo',
                       colors = colors,
                      line_thickness = 2))

plt.axis('OFF')
plt.title('Ground truth test set')

plt.subplot(1, 2, 2)
#plt.figure(figsize = (12, 8))
plt.axis('OFF')
plt.title('Prediction test set')
img = Image.open('predictions.jpg')
plt.imshow(img)
plt.show()

#### Modèle avec les poids donné par tiny29 avec aucun préprocessing des images

In [None]:
def load_model(conf=0.25, iou=0.50):
    net = cv2.dnn.readNet(f'/kaggle/working/darknet/cfg/yolov4-custom.cfg',
                            f'/kaggle/working/darknet/backup/yolov4-custom_last.weights')
    net = cv2.dnn_DetectionModel(net)
    net.setInputParams(size=(608, 608), scale=1/255, swapRB=True)
    return net

In [None]:
confThreshold = 0.1
confthre = 0.1
IMG_SIZE=416
with open('/kaggle/working/darknet/data/obj.names', 'rt') as f:
    names = f.read().rstrip('\n').split('\n')

def predict(net, img, size=IMG_SIZE):
    confs = []
    bboxes = []
    height, width = img.shape[:2]
    bbclasses, scores, bboxes = net.detect(img, confThreshold=confThreshold, nmsThreshold=0.4)
   
    if len(bboxes):
        confs=[]
        for i in scores:
            confs.append('{:.2f}'.format(i))
        score=np.array(confs,dtype=float)  
        return bboxes, score
    else:
        return [],[]
def format_prediction(bboxes, confs):
    annot = ''
    if len(bboxes)>0:
        for idx in range(len(bboxes)):
            xmin, ymin, w, h = bboxes[idx]
            conf             = confs[idx]
            annot += f'{conf} {xmin} {ymin} {w} {h}'
            annot +=' '
        annot = annot.strip(' ')
    return annot    

In [None]:
img = Image.open('/kaggle/input/tensorflow-great-barrier-reef/train_images/video_0/1010.jpg')
numpydata = np.asarray(img)
net = load_model(conf=0.1, iou=0.1)
bboxes, confs  = predict(net, numpydata, size=IMG_SIZE)
bboxes

In [None]:
val_df_1=val_df.sample(100)

In [None]:
'''
On calcule pour chaque image, les bboxes issues de notre algorithme entrainé 
'''

def annoter_pred(df):
    df['preds'] = df['bboxes']
    net = load_model(conf=0.1, iou=0.40)
    for idx in df.index:
        img= Image.open(df["image_path"][idx])
        numpydata = np.asarray(img)
        bboxes, confs  = predict(net, numpydata, size=IMG_SIZE)
        if bboxes==[]:
            df['preds'][idx]=[]
        else:    
            df['preds'][idx]= bboxes.tolist()

In [None]:
annoter_pred(val_df_1)

## Evualuation

In [None]:
def calculate_score(
    preds: List[torch.Tensor],
    gts: List[torch.Tensor],
    iou_th: float
) -> float:
    num_tp = 0
    num_fp = 0
    num_fn = 0
    for p, GT in zip(preds, gts):
        if len(p) and len(GT):
            gt = GT.clone()
            gt[:, 2] = gt[:, 0] + gt[:, 2]
            gt[:, 3] = gt[:, 1] + gt[:, 3]
            pp = p.clone()
            pp[:, 2] = pp[:, 0] + pp[:, 2]
            pp[:, 3] = pp[:, 1] + pp[:, 3]
            iou_matrix = box_iou(pp, gt)
            tp = len(torch.where(iou_matrix.max(0)[0] >= iou_th)[0])
            fp = len(p) - tp
            fn = len(torch.where(iou_matrix.max(0)[0] < iou_th)[0])
            num_tp += tp
            num_fp += fp
            num_fn += fn
        elif len(p) == 0 and len(GT):
            num_fn += len(GT)
        elif len(p) and len(GT) == 0:
            num_fp += len(p)
    score = 5 * num_tp / (5 * num_tp + 4 * num_fn + num_fp)
    return score

In [None]:
def calculer_f2(df):
    predictions = []
    gts = []
    for i, row in (df.iterrows()):
        if type(row.preds) != float and len(row.preds) > 0:
            preds = torch.tensor(row.preds)
            predictions.append(preds)
        else:
            predictions.append([])
        if type(row.bboxes) != float and len(row.bboxes) > 0:
            gts.append(torch.tensor(row.bboxes))
        else:
            gts.append([])
    iou_ths = np.arange(0.3, 0.85, 0.05)
    scores = [calculate_score(predictions, gts, iou_th) for iou_th in iou_ths]
    return np.mean(scores)

In [None]:
calculer_f2(val_df_1)