# ❗️ Inputs needed
- `libcuda.so` imported from [here](https://www.kaggle.com/denispotapov/libcuda)
- Turn on GPU

<a id="section-one"></a>
## Importing libraries

In [None]:
import os, stat
import cv2
from PIL import Image
train_on_gpu = True

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Setting up Darknet

In [None]:
!git clone https://github.com/AlexeyAB/darknet

In [None]:
#Build Darknet with GPU enable settings
%cd darknet
!cp '../../input/libcuda/libcuda.so' .

# !sed -i 's/OPENCV=0/OPENCV=1/g' Makefile
!sed -i 's/GPU=0/GPU=1/g' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/g' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/g' Makefile
!sed -i 's/LIBSO=0/LIBSO=1/' Makefile
!sed -i "s/ARCH= -gencode arch=compute_60,code=sm_60/ARCH= ${ARCH_VALUE}/g" Makefile

!sed -i 's/LDFLAGS+= -L\/usr\/local\/cuda\/lib64 -lcuda -lcudart -lcublas -lcurand/LDFLAGS+= -L\/usr\/local\/cuda\/lib64 -lcudart -lcublas -lcurand -L\/kaggle\/working\/darknet -lcuda/' Makefile

In [None]:
# verify CUDA
!/usr/local/cuda/bin/nvcc --version

In [None]:
# make darknet (builds darknet so that you can then use the darknet executable file to run or train object detectors)
!make

In [None]:
#Verify build
!./darknet detector train

## Helper functions

In [None]:
def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def get_mask_info(mask_rle: str = '', shape: tuple = (1400, 2100)):
    '''
    Darknet wants a .txt file for each image with a line for each ground truth object in the image that looks like:
    <object-class> <x> <y> <width> <height>
    
    So this method gets x, y, width, and height from the rle (run-length encoding) encoded mask.

    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns array of tuples (x, y, width, height)
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]

    starts -= 1 # array index starts with 0
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    img = np.reshape(img, shape, order='F')

    mask_indices = np.where(img == 1)
    # np.where returns row_indices,col_indices so y corresponds to row and x corresponds to col

    # find max x & y and min x & y - this will give us our bounding box
    min_x = np.amin(mask_indices[1]) + 1
    min_y = np.amin(mask_indices[0]) + 1
    max_x = np.amax(mask_indices[1]) + 1
    max_y = np.amax(mask_indices[0]) + 1

    mask_info = (min_x, min_y, max_x-min_x, max_y-min_y)

    return mask_info

def imShow(path):
    image = cv2.imread(path)
    height, width = image.shape[:2]
    resized_image = cv2.resize(image,(3*width, 3*height), interpolation = cv2.INTER_CUBIC)
    
    fig = plt.gcf()
    fig.set_size_inches(18, 10)
    plt.axis("off")
    plt.imshow(cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB))
    plt.show()

## Data overview

Let's have a look at the data first.

In [None]:
path = '../../input/understanding_cloud_organization'
os.listdir(path)

We have folders with train and test images, file with train image ids and masks and sample submission.

In [None]:
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')

In [None]:
train.head()

In [None]:
n_train = len(os.listdir(f'{path}/train_images'))
n_test = len(os.listdir(f'{path}/test_images'))
print(f'There are {n_train} images in train dataset')
print(f'There are {n_test} images in test dataset')

In [None]:
train['Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

So we have ~5.5k images in train dataset and they can have up to 4 masks: Fish, Flower, Gravel and Sugar.

In [None]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[1]).value_counts()

In [None]:
train.loc[train['EncodedPixels'].isnull() == False, 'Image_Label'].apply(lambda x: x.split('_')[0]).value_counts().value_counts()

But there are a lot of empty masks. In fact only 266 images have all four masks. It is important to remember this.

In [None]:
train['label'] = train['Image_Label'].apply(lambda x: x.split('_')[1])
train['im_id'] = train['Image_Label'].apply(lambda x: x.split('_')[0])


sub['label'] = sub['Image_Label'].apply(lambda x: x.split('_')[1])
sub['im_id'] = sub['Image_Label'].apply(lambda x: x.split('_')[0])

Let's have a look at the images and the masks.

In [None]:
fig = plt.figure(figsize=(25, 16))
for j, im_id in enumerate(np.random.choice(train['im_id'].unique(), 4)):
    for i, (idx, row) in enumerate(train.loc[train['im_id'] == im_id].iterrows()):
        ax = fig.add_subplot(5, 4, j * 4 + i + 1, xticks=[], yticks=[])
        im = Image.open(f"{path}/train_images/{row['Image_Label'].split('_')[0]}")
        plt.imshow(im)
        mask_rle = row['EncodedPixels']
        try: # label might not be there!
            mask = rle_decode(mask_rle)
        except:
            mask = np.zeros((1400, 2100))
        plt.imshow(mask, alpha=0.5, cmap='gray')
        ax.set_title(f"Image: {row['Image_Label'].split('_')[0]}. Label: {row['label']}")

We can see that masks can overlap. Also we can see that clouds are really similar to fish, flower and so on. Another important point: masks are often quite big and can have seemingly empty areas.

## Preparing data for modelling

We want to prepare the data for use with YOLOv4.   
Darknet wants a .txt file for each image with a line for each ground truth object in the image that looks like:
```
<object-class> <x> <y> <width> <height>
```
Where x, y, width, and height are relative to the image's width and height.
Our train.csv file has an image label and EncodedPixels: we are given pairs of a starting pixel and the length of pixels after it that are under the label.

End goal: Have images and label files for each image in an obj folder, which we will then put in /darknet/data/.

First, create **obj** folder which we'll put all of our images and their respective label files in. Copy the image files from **train_images** to this folder.

In [None]:
src = path + '/train_images'
!cp -R "$src"* "../obj"
!ls

# below copies all files from train_images into existing 'obj' directory
# src = path + '/train_images/'
# !cp "$src"* "../obj"

Now, iterate through all of the data from train.csv. This gives us the labels and their encodings for each image, which we store in txt files.

In [None]:
for index, row in train.iterrows():
    mask_rle = row['EncodedPixels']
    try: # label might not be there!
        mask = get_mask_info(mask_rle)
    except:
        mask = ()
        
    image_id = row['Image_Label'].split('.jpg_')[0]
    label = row['Image_Label'].split('.jpg_')[1]
    with open("../obj/" + image_id + ".txt", "a") as f:
        if mask:
            f.write(label + " %d %d %d %d\n" % mask)

Make sure we have the right number of files.

In [None]:
label_count = 0
im_count = 0
for file in os.listdir("../obj"):
    if file.endswith(".txt"):
        label_count += 1
    elif file.endswith(".jpg"):
        im_count += 1
        
print(f'Number of label files {label_count}')
print(f'Number of image files {im_count}')

Check contents from one file to see if it looks correct.

In [None]:
with open('../obj/0011165.txt', 'r') as f:
    content = f.read()
    print(content)

Copy obj folder and test images into /darknet/data/ folder

In [None]:
!cp -R "../obj" "data"
src = path + '/test_images'
!cp -R "$src" "data/test"

Verify that we have everything in our darknet/data folder

In [None]:
os.listdir("data")

### Create train.txt and test.txt

In [None]:
# Generate train.txt and test.txt
image_files = []
os.chdir(os.path.join("data", "obj"))
for filename in os.listdir(os.getcwd()):
    if filename.endswith(".jpg"):
        image_files.append("data/obj/" + filename)
os.chdir("..")
# split train into 80% for train and 20% for validation
# 80% of 5546 total train images is ~4436
with open("train.txt", "w") as outfile:
    for image in image_files[:4436]: 
        outfile.write(image)
        outfile.write("\n")
    outfile.close()
with open("test.txt", "w") as outfile:
    for image in image_files[4436:]: # 20% for validation
        outfile.write(image)
        outfile.write("\n")
    outfile.close()
os.chdir("..")

In [None]:
!ls data/

In [None]:
# Verify we have the right number in each file
with open(r"data/train.txt", 'r') as fp:
    num_lines = sum(1 for line in fp)
    print('Total lines:', num_lines)
with open(r"data/test.txt", 'r') as fp:
    num_lines = sum(1 for line in fp)
    print('Total lines:', num_lines)

## Configure files for training
We must properly configure our custom .cfg, obj.data, obj.names, train.txt and test.txt files.

Let's look at what's currently in our custom cfg file.

In [None]:
'''
with open('darknet/cfg/yolov4-custom.cfg', 'r') as f:
    content = f.read()
    print(content)
'''

Now we need to edit the .cfg to fit our needs based on our object detector. The following code generates a link to the .cfg file. Click on it to download the file to your computer.

In [None]:
from IPython.display import FileLink
FileLink(r'darknet/cfg/yolov4-custom.cfg')
# FileLink(r'darknet/cfg/yolov4-tiny.cfg')

### Editing the .cfg file
batch = 64 and subdivisions = 16 for should give the best results. If you run into any issues then up subdivisions to 32.

We have 4 classes, so make the rest of the changes to the cfg based on this.

How to Configure Your Variables:

width = 416

height = 416 (these can be any multiple of 32, 416 is standard, you can sometimes improve results by making value larger like 608 but will slow down training)

Normally we'd want:
max_batches = (# of classes) * 2000 = 8000

But start with max_batches = 160 for now because it's taking too long

steps = (80% of max_batches), (90% of max_batches) = 6400, 7200

filters = (# of classes + 5) * 3 = (4 + 5) * 3 = 27 (change in every convolutional layer)

Optional: If you run into memory issues or find the training taking a super long time. In each of the three yolo layers in the cfg, change one line from random = 1 to random = 0 to speed up training but slightly reduce accuracy of model. Will also help save memory if you run into any memory issues.

Also, change mosaic=1 to mosaic=0

### Add the .cfg file to kaggle
Rename the file to 'yolov4-obj.cfg'. Click 'Add data', then 'Upload' and upload custom config file under the title customconfig. This places your 'yolov4-obj.cfg' file in kaggle/input/custom-config. Then we can put this in darknet/cfg.

In [None]:
!ls ../../input

In [None]:
# !cp ../input/customconfig/yolov4-obj.cfg darknet/cfg/yolov4-obj.cfg
# !cp ../input/customconfigtiny/yolov4-tiny.cfg darknet/cfg/yolov4-tiny-obj.cfg
!cp ../../input/customconfigtinier/yolov4-tiny.cfg cfg/yolov4-tiny-obj.cfg

Verify that your file looks right

In [None]:
'''
with open('darknet/cfg/yolov4-obj.cfg', 'r') as f:
    content = f.read()
    print(content)
'''

### Create obj.names and obj.data
obj.names should have all of our classes, and obj.data contains the info our detector needs to know which images to train and test on.

In [None]:
!mkdir ../backup

In [None]:
with open('data/obj.names', 'w') as f:
    f.write('Fish\n')
    f.write('Flower\n')
    f.write('Gravel\n')
    f.write('Sugar\n')
    
with open('data/obj.data', 'w') as f:
    f.write('classes = 4\n')
    f.write('train = data/train.txt\n')
    f.write('valid = data/test.txt\n')
    f.write('names = data/obj.names\n')
    f.write('backup = ../backup\n')    # where we will save the weights to of our model throughout training

### Download pre-trained weights for convolutional layers

In [None]:
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.conv.137

### Train the detector!

In [None]:
# !./darknet detector train data/obj.data cfg/yolov4-obj.cfg yolov4.conv.137 -dont_show -map -gpus 0
!./darknet detector train data/obj.data cfg/yolov4-tiny-obj.cfg -dont_show -map -gpus 0

In [None]:
os.listdir("../backup")

Save last weights to your computer so we don't have to retrain if we lose it

### Run our custom detector

In [None]:
# set custom cfg to test mode 
%cd cfg
!sed -i 's/batch=64/batch=1/' yolov4-tiny-obj.cfg
!sed -i 's/subdivisions=16/subdivisions=1/' yolov4-tiny-obj.cfg
%cd ..

In [None]:
# run your custom detector on one image with this command
# (use one of the test images, thresh flag sets accuracy that detection must be in order to show it)
!./darknet detector test data/obj.data cfg/yolov4-tiny-obj.cfg ../backup/yolov4-tiny-obj_last.weights data/test/002f507.jpg -thresh 0.3
imShow('predictions.jpg')

In [None]:
imShow('data/test/002f507.jpg')

In [None]:
os.remove('predictions.jpg')

In [None]:
# run custom detector on a train image we know to see how it does (hopefully we see mostly flower and fish labels)
!./darknet detector test data/obj.data cfg/yolov4-tiny-obj.cfg ../backup/yolov4-tiny-obj_last.weights data/train/0011165.jpg -thresh 0.3
imShow('predictions.jpg')

In [None]:
# run custom detector on a train image we know to see and save output label in yolo format
!./darknet detector test data/obj.data cfg/yolov4-tiny-obj.cfg ../backup/yolov4-tiny-obj_last.weights data/train/0011165.jpg -thresh 0.3 -dont_show -save_labels < data/new_train.txt
imShow('predictions.jpg')

In [None]:
# Calculate mean average precision
!./darknet detector map data/obj.data cfg/yolov4-tiny-obj.cfg ../backup/yolov4-tiny-obj_last.weights

In [None]:
# run detector on all test images, and save results of detection to result.txt  
# !./darknet detector test data/obj.data cfg/yolov4-obj.cfg ../backup/yolov4-obj_last.weights -dont_show -ext_output < data/test.txt > result.txt

In [None]:
# Pseudo-labelling - to process a list of images data/test.txt and save results of detection in Yolo training format for each image as label <image_name>.txt
# ./darknet detector test data/obj.data cfg/yolov4-obj.cfg ../backup/yolov4-obj_last.weights -thresh 0.25 -dont_show -save_labels < data/test.txt