**0. Installation of Darknet and load example data and code**

In [2]:
# For longer training and application, set a GPU under Runtime->Change Runtime settings.
# This is recommended even for the exmaple run.  
# if GPU runtime, get info on GPU
!nvidia-smi

Mon Jan 10 16:02:59 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8    29W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# Load data from github or from local google drive

#Drive
#from google.colab import drive
#drive.mount(‘/content/gdrive’)

#Gitlab - Code and example files
!git clone https://gitlab.com/pfeldens/BoulderDetection.git


Cloning into 'BoulderDetection'...
remote: Enumerating objects: 30, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 30 (delta 1), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (30/30), done.
Checking out files: 100% (22/22), done.


In [3]:
# check CUDA version
# CUDA: Let's check that Nvidia CUDA is already pre-installed and which version is it. In some time from now maybe you 
!/usr/local/cuda/bin/nvcc --version

#Install stuff
!apt install libopencv-dev python-opencv

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0
Reading package lists... Done
Building dependency tree       
Reading state information... Done
libopencv-dev is already the newest version (3.2.0+dfsg-4ubuntu0.1).
The following NEW packages will be installed:
  python-opencv
0 upgraded, 1 newly installed, 0 to remove and 37 not upgraded.
Need to get 535 kB of archives.
After this operation, 2,944 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 python-opencv amd64 3.2.0+dfsg-4ubuntu0.1 [535 kB]
Fetched 535 kB in 2s (270 kB/s)
Selecting previously unselected package python-opencv.
(Reading database ... 155225 files and directories currently installed.)
Preparing to unpack .../python-opencv_3.2.0+dfsg-4ubuntu0.1_amd64.deb ...
Unpacking python-opencv (3.2.0+dfsg-4ubuntu0.1) ..

In [4]:
#Download darknet
!git clone https://github.com/AlexeyAB/darknet

Cloning into 'darknet'...
remote: Enumerating objects: 15395, done.[K
remote: Total 15395 (delta 0), reused 0 (delta 0), pack-reused 15395[K
Receiving objects: 100% (15395/15395), 14.02 MiB | 11.73 MiB/s, done.
Resolving deltas: 100% (10353/10353), done.


In [5]:
# Change Makefile 
!sed -i 's/OPENCV=0/OPENCV=1/g' /content/darknet/Makefile
!sed -i 's/GPU=0/GPU=1/g' /content/darknet/Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/g' /content/darknet/Makefile
!sed -i 's/CUDNN=0/CUDNN=1/g' /content/darknet/Makefile

In [None]:
# Install
%cd /content/darknet
!make
!chmod +x /content/darknet/darknet

In [3]:
# Test if installation is running - this should give an output
!/content/darknet/darknet detector train 

 CUDA-version: 11010 (11020), cuDNN: 7.6.5, CUDNN_HALF=1, GPU count: 1  
 CUDNN_HALF=1 
 OpenCV version: 3.2.0
usage: /content/darknet/darknet detector [train/test/valid/demo/map] [data] [cfg] [weights (optional)]


**1. Training**

*1.1 Preparation*

In [8]:
#install modules
!pip install geopandas

Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[?25l[K     |▎                               | 10 kB 30.6 MB/s eta 0:00:01[K     |▋                               | 20 kB 5.5 MB/s eta 0:00:01[K     |█                               | 30 kB 7.3 MB/s eta 0:00:01[K     |█▎                              | 40 kB 6.9 MB/s eta 0:00:01[K     |█▋                              | 51 kB 4.1 MB/s eta 0:00:01[K     |██                              | 61 kB 4.3 MB/s eta 0:00:01[K     |██▎                             | 71 kB 4.4 MB/s eta 0:00:01[K     |██▌                             | 81 kB 5.0 MB/s eta 0:00:01[K     |██▉                             | 92 kB 5.1 MB/s eta 0:00:01[K     |███▏                            | 102 kB 4.3 MB/s eta 0:00:01[K     |███▌                            | 112 kB 4.3 MB/s eta 0:00:01[K     |███▉                            | 122 kB 4.3 MB/s eta 0:00:01[K     |████▏                           | 133 kB 4.3 MB/s eta 0:00

In [4]:
#import modules
import os
import sys
sys.path.insert(0,'/content/BoulderDetection')
from PIL import Image
import numpy as np
from tqdm import tqdm
import functions as asf

In [10]:
# Make folder structure for current training run
%cd /content/
!mkdir temp
!mkdir temp/train
!mkdir temp/test
!mkdir temp/current_run 
!mkdir temp/empty



/content


In [11]:
# Cut training mosaics into small tiles with overlap
# Cutting around the individual boulders would also be possible (cut_image_around_point.py, see comments in file) 
# but this would mean many examples are present multiple times.

!python /content/BoulderDetection/cut_image_to_tiles.py \
/content/BoulderDetection/mosaics_for_train \
/content/temp/train \
64 tif --overlap=4

Using  1  cores. 
Tiling Mosaics:
1.tif
  0% 0/1 [00:00<?, ?it/s]Cutting with Overlap
0...10...20...30...40...50...60...70...80...90...100 - done.
100% 1/1 [00:17<00:00, 17.71s/it]


In [12]:
# Delete images with low standard deviation (get rid of the background)
!python /content/BoulderDetection/delete_low_stddev_images.py \
/content/temp/train \
.tif 4.0

Using  1  cores. 
Working on  5092  files.
100% 5092/5092 [00:06<00:00, 795.80it/s]


In [13]:
# Split files into test and train set
!python /content/BoulderDetection/create_random_distribution.py \
/content/temp/train /content/temp/test \
0.10 tif

Splitting all files in folder: /content/temp/train
100% 460/460 [00:00<00:00, 23822.74it/s]


In [None]:
# Upscaling would be here


In [14]:
# Relate available Tiles with manually picked boulder positions to create test and training datasets
# This is done separately for the test and train folders
# Also, this needs to be done separately for each class
# The examples are polygons in an SQLITE database exported with WKT (Well Known Text)
# "Empty/Negative" image examples are special and come in the next step

##!!!! the yolo format at the moment use hardcoded integers. 
##!!!! So ONLY 1 CLASS AT THE MOMENT for YOLO format.
##!!!! This needs to be changed in relate_database_and_images 

!python /content/BoulderDetection/relate_database_and_images.py \
--image_directory=/content/temp/train/ \
--wildcards=tif \
--database_directory=/content/BoulderDetection/vector_data/ \
--input_databases=v2pickedstone_utm.sqlite \
--input_classes=stone \
--out_directory=/content/temp/train/ \
--format=yolo

!python /content/BoulderDetection/relate_database_and_images.py \
--image_directory=/content/temp/test/ \
--wildcards=tif \
--database_directory=/content/BoulderDetection/vector_data/ \
--input_databases=v2pickedstone_utm.sqlite \
--input_classes=stone \
--out_directory=/content/temp/test/ \
--format=yolo

['v2pickedstone_utm.sqlite']
['v2pickedstone_utm']
relate point to mosaic:
Working on:  v2pickedstone_utm.sqlite stone v2pickedstone_utm
Working on  7988 training examples
Comparing with  4149 images
Found  4651  matches
Make annotated csv files
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[k] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp[['x_center_norm', 'y_center_norm', 'width_norm', 'height_norm']] = [convert([h,w], box)  for box in zip(temp.pixelxmin,temp.pixelxmax, temp.pixelymin, temp.pixelymax)]
A value is trying to be set on

In [15]:
# Add negative image examples
# This is a bit hacky
# Negative example are just points in an SQLite database
# Again this is done for the test and train folder

!python /content/BoulderDetection/relate_database_and_images.py \
--image_directory=/content/temp/train/ \
--wildcards=tif \
--database_directory=/content/BoulderDetection/vector_data/ \
--input_databases=v2empty_utm.sqlite \
--input_classes=empty \
--out_directory=/content/temp/empty/ \
--format=yolo \
--empty_examples=1

# Empty images need empty text files with the same name for YOLO
!for file in /content/temp/empty/*.txt; do rm $file; touch $file; done
# Move these files to train folder
!mv /content/temp/empty/*.txt /content/temp/train/

#Clear the folder to be sure no files are left
!rm /content/temp/empty/*.txt

!python /content/BoulderDetection/relate_database_and_images.py \
--image_directory=/content/temp/test/ \
--wildcards=tif \
--database_directory=/content/BoulderDetection/vector_data/ \
--input_databases=v2empty_utm.sqlite \
--input_classes=empty \
--out_directory=/content/temp/empty/ \
--format=yolo \
--empty_examples=1

# Empty images need empty text files with the same name for YOLO
!for file in /content/temp/empty/*.txt; do rm $file; touch $file; done

# Move these files to test folder
!mv /content/temp/empty/*.txt /content/temp/test/

#Clear the folder to be sure no files are left
!rm /content/temp/empty/*.txt


['v2empty_utm.sqlite']
['v2empty_utm']
relate point to mosaic:
Working on:  v2empty_utm.sqlite empty v2empty_utm
Working on  2193 training examples
Comparing with  4149 images
Found  379  matches
Make annotated csv files
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[k] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp[['x_center_norm', 'y_center_norm', 'width_norm', 'height_norm']] = [convert([h,w], box)  for box in zip(temp.pixelxmin,temp.pixelxmax, temp.pixelymin, temp.pixelymax)]
A value is trying to be set on a copy of a slice from a

In [16]:
# Delete all images that have no label-file
!python /content/BoulderDetection/yolo_delete_images_with_no_label.py /content/temp/train .tif
!python /content/BoulderDetection/yolo_delete_images_with_no_label.py /content/temp/test .tif

100% 4149/4149 [00:00<00:00, 20252.91it/s]
Deleted  2526  files.
100% 460/460 [00:00<00:00, 18558.87it/s]
Deleted  272  files.


In [14]:
#Convert to grayscale if needed (do not do that normally, all SSS mosaics were exported as grayscale)
#!/content/BoulderDetection/python convert_to_greyscale.py /content/temp/train /content/temp/train tif -o
#!/content/BoulderDetection/python convert_to_greyscale.py /content/temp/test /content/temp/test tif -o

In [17]:
#convert to png and remove tif
!python /content/BoulderDetection/convert_to_png.py /content/temp/train
!python /content/BoulderDetection/convert_to_png.py /content/temp/test

!rm /content/temp/train/*.tif
!rm /content/temp/test/*.tif

Using  1  cores. 
100% 1623/1623 [00:01<00:00, 1045.80it/s]
Using  1  cores. 
100% 188/188 [00:00<00:00, 978.66it/s]


In [18]:
# Rotate images and labels to make dataset larger
!python /content/BoulderDetection/yolo_rotate_img_and_boundaries.py /content/temp/train .png
!python /content/BoulderDetection/yolo_rotate_img_and_boundaries.py /content/temp/test .png

Using  1  cores. 
100% 1623/1623 [00:18<00:00, 88.71it/s]
Using  1  cores. 
100% 188/188 [00:02<00:00, 88.59it/s]


*1.2 Training*

In [19]:
#Make the file lists the the YOLO model requires
!realpath /content/temp/train/*.txt > /content/temp/train/train.txt
!realpath /content/temp/test/*.txt > /content/temp/test/test.txt

!head /content/temp/train/train.txt


/content/temp/train/1_01_66_rot_135.txt
/content/temp/train/1_01_66_rot_180.txt
/content/temp/train/1_01_66_rot_225.txt
/content/temp/train/1_01_66_rot_270.txt
/content/temp/train/1_01_66_rot_325.txt
/content/temp/train/1_01_66_rot_45.txt
/content/temp/train/1_01_66_rot_90.txt
/content/temp/train/1_01_66.txt
/content/temp/train/1_01_68.txt
/content/temp/train/1_02_63.txt


In [20]:
#Change .txt with .jpg or .png, depending on what was converted to
#Probably the realpath could also be run directly on the images, forgot what was my rationale for this step
#Note:LINUX MAC SED DIFFERENCE: see https://stackoverflow.com/questions/19456518/error-when-using-sed-with-find-command-on-os-x-invalid-command-code
#If you are on a OS X, this probably has nothing to do with the sed command. On the OSX version of sed, the -i option expects an extension argument so your command is actually parsed as the extension argument and the file path is interpreted as the command code.
#Try adding the -e argument explicitly and giving '' as argument to -i:
!sed -i 's/txt/png/g' /content/temp/train/train.txt
!sed -i 's/txt/png/g' /content/temp/test/test.txt

!head /content/temp/train/train.txt

/content/temp/train/1_01_66_rot_135.png
/content/temp/train/1_01_66_rot_180.png
/content/temp/train/1_01_66_rot_225.png
/content/temp/train/1_01_66_rot_270.png
/content/temp/train/1_01_66_rot_325.png
/content/temp/train/1_01_66_rot_45.png
/content/temp/train/1_01_66_rot_90.png
/content/temp/train/1_01_66.png
/content/temp/train/1_01_68.png
/content/temp/train/1_02_63.png


In [24]:
#Set-up the classes.txt file. The name of all classes must be in here (preferably in the correct order..)
#There is no need to specifiy the "empty" class

!touch /content/temp/train/classes.txt
!echo "stones" >> /content/temp/train/classes.txt

!touch /content/temp/test/classes.txt
!echo "stones" >> /content/temp/test/classes.txt

#Create the obj.names and obj.classes file

#Make sure the old one does not exist
!rm /content/temp/current_run/obj.names
!rm /content/temp/current_run/obj.data

#Make new obj.names
!touch /content/temp/current_run/obj.names

#Add all class names that are present in the data set on obj.names
!echo "stones" >> /content/temp/current_run/obj.names

#Make new obj.data
!touch /content/temp/current_run/obj.data
!echo "classes = 1" >> /content/temp/current_run/obj.data
!echo "train = /content/temp/train/train.txt" >> /content/temp/current_run/obj.data
!echo "valid = /content/temp/test/test.txt" >> /content/temp/current_run/obj.data
!echo "names = /content/temp/current_run/obj.names" >> /content/temp/current_run/obj.data
!echo "backup = /content/temp/current_run/" >> /content/temp/current_run/obj.data

**OPTIONAL STEP**

Calculation of optimal anchor sizes

In [25]:
# Optional step
# calculate optimal anchor sizes for configuration file
# refer to git of darknet for details
# These need to go to the darknet config file (yolov4.cfg). 
!/content/darknet/darknet detector \
calc_anchors /content/temp/current_run/obj.data -num_of_clusters 9 -width 512 -height 512 -show



 CUDA-version: 11010 (11020), cuDNN: 7.6.5, CUDNN_HALF=1, GPU count: 1  
 CUDNN_HALF=1 
 OpenCV version: 3.2.0

 num_of_clusters = 9, width = 512, height = 512 
Couldn't open file: obj.data


**TRAINING OPTION 1 - quick training**

The follwing two code lines are only for demonstration in Google-Colab because they run quicker.

They should be disregarded otherwise. 

In [30]:
#Do training with "Yolo-tiny", which is much quicker but was less accurate
# This is only for Google-Colab demonstration
!cp /content/darknet/cfg/yolov4-tiny.cfg /content/temp/current_run

# Open /content/temp/current_run/yolov4-tiny.cfg , scroll to the bottom, and set
# classes=1 in each [YOLO] block (2 in total)
# in the convolutional block above each YOLO entry, set 
# filters = 18



In [None]:
# Demonstration part 2

# This will be running for a couple hours. Dont close the broweser
!/content/darknet/darknet \
detector train \
/content/temp/current_run/obj.data \
/content/temp/current_run/yolov4-tiny.cfg \
-map -dont_show

Your model and the graphical training representations will be saved in /content/temp/current_run

**END OF OPTION 1**

**TRAINING OPTION 2**

Training of the full Yolo-v4 model

In [None]:
# Copy the standard configuration file to the current_run folder
# Stop after that and modify the config files to your liking...
!cp /content/BoulderDetection/yolov4.cfg /content/temp/current_run
!cp /content/BoulderDetection/pre_trained_models/yolov4.conv.137 /content/temp/current_run

In [None]:
# Do the actual training with "normal Yolo"
# The model references the pre-trained models that can be downloaded from the github site
# All models and graphs of progress will be in /content/temp/current_run/

!/content/darknet/darknet \
detector train \
/content/temp/current_run/obj.data \
/content/temp/current_run/yolov4.cfg \
/content/temp/current_run/yolov4.conv.137 \
-map -dont_show


The model will and graphical training progress will be saved in /content/temp/current_run

**END OF OPTION 2**

**Final step**

When Done, manually rename the "temp" folder to a suitable name, to have a complete copy of the model training.

# 2. Model application

Copy the mosaik you want to classifiy in the /content/apply/_todo

In [7]:
%mkdir /content/apply
%mkdir /content/apply/_todo
%mkdir /content/apply/_done
%mkdir /content/apply/tiles

base_folder = '/content/apply/'  #end with /
os.chdir(base_folder)

darknet = "/content/darknet/"
model_folder = base_folder + 'trained_models/SSS_v5_128/' #here the path to the trained model
obj_data = model_folder + 'obj.data'.  #put obj.data (see training) in model folder, for classes
cfg = model_folder + 'yolov4.cfg'. #copy the config file from the training run in the model folder
weights = model_folder + 'yolov4_best.weights' #give the model that is tobe used.
thresh = 0.1


apply_folder = base_folder + 'apply/'
mosaic_folder = base_folder + 'apply/_todo/' #Mosaics go here
done_folder = base_folder + 'apply/_done/'   #Results will go here
tile_folder = base_folder + "apply/tiles"     #temporary folder for mosaics subsets

pixels = 128    # should correspond to model pixel size
greyscale = 1 #for SSS images yes, for multiband MBES image no
delete_background_images = 1
make_tfw = 0 #make world files for tiles; useful e.g., for super-resolution induced changes of pixel size
convert_to_png = 1

mkdir: cannot create directory ‘/content/apply’: File exists
mkdir: cannot create directory ‘/content/apply/_todo’: File exists
mkdir: cannot create directory ‘/content/apply/tiles’: File exists


In [None]:
# Preparation
files = asf.getfiles('tif', mosaic_folder)
for file in tqdm(files):
    print("Working on file")
    full_path = mosaic_folder + file
    cmd = "mv " + full_path + " "   + apply_folder
    result = base_folder + file + str(pixels) + "detect_result.json"
    os.system(cmd)
    result_qgis = base_folder + file + str(pixels) + "detect_result.txt"

    #Empty tile folder
    for path, subdirs, files in os.walk(tile_folder):
        for filename in files:
            os.remove(path + '/' + filename)

    # convert to greyscale if needed
    if greyscale > 0:
        print("Conerting tp Greyscale")
        cmd = "python ./convert_to_greyscale.py " + apply_folder + " " + apply_folder + " --tag band1_ .tif "
        os.system(cmd)
        cmd = "mv " + apply_folder + file + " " + done_folder
        os.system(cmd)
        print("Moving ", file , " to Done Folder and continue with first band only")
        file = "band1_" + file

    # Cut into tiles
    print("Cutting")
    cmd = "python ./cut_image_to_tiles.py " + apply_folder + " " + tile_folder + " " +  str(pixels) +" .tif"
    os.system(cmd)

    # delete background images
    if delete_background_images > 0:
        cmd = "python ./delete_low_stddev_images.py " + tile_folder + " .tif 4.0 "
        os.system(cmd)

    if convert_to_png > 0:
        print("Convert to png")
        cmd = "python ./convert_to_png.py " + tile_folder
        os.system(cmd)

    if make_tfw > 0:
        print("Making tfw")
        cmd = "python ./create_tfw.py "+ tile_folder + " .tif 1 .tfw"
        os.system(cmd)

In [None]:
#Make text file with files to be used
detect_path = apply_folder + "detect.txt"
#Make test and train.txt with link to files
with open(detect_path, "w") as a:
    for path, subdirs, files in os.walk(tile_folder):
        for filename in files:
            if filename.endswith(".png"):
                f = os.path.join(path, filename)
                a.write(str(f) + os.linesep)

In [None]:
# Run detection and clean up
os.chdir(darknet)
cmd = "/content/darknet detector test " +  obj_data + " " +  cfg  +" " + weights +" -dont_show -thresh " + str(thresh) + "  -ext_output -out "+ result +"  < "  + detect_path
os.system(cmd)

os.chdir(apply_folder)
cmd = "python /content/BoulderDetection/yolo_parse_results.py --directory=" + result + " --outfile="+result_qgis
os.system(cmd)

for path, subdirs, files in os.walk(tile_folder):
    for filename in files:
      os.remove(path + '/' + filename)

full_path = apply_folder + file
cmd = "mv " + full_path + " " + done_folder
os.system(cmd)