In [None]:
import constants as c

import glob
import os
import cv2
import numpy as np
import time
import random
from shutil import copyfile
from distutils.dir_util import copy_tree

import libs.automold.Automold as am
import imgaug.augmenters as iaa

from tqdm.notebook import tqdm

# Training/Val Data generation
## **1:** Load and augment sequences (rain + fog)

In [None]:
!git clone https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library.git libs/automold

In [None]:
import numpy
numpy.random.bit_generator = numpy.random._bit_generator # legacy _bit_generator used by imgaug...

# different augs for day (more) / night (less)
night_clouds_aug = iaa.CloudLayer(intensity_mean=(220, 255),
            intensity_freq_exponent=(-2.0, -1.5),
            intensity_coarse_scale=2,
            alpha_min=(0.3, 0.5),
            alpha_multiplier=0.3,
            alpha_size_px_max=(2, 8),
            alpha_freq_exponent=(-4.0, -2.0),
            sparsity=0.9,
            density_multiplier=(0.3, 0.4))


day_clouds_aug = iaa.CloudLayer(intensity_mean=240,
            intensity_freq_exponent=-2.0,
            intensity_coarse_scale=2,
            alpha_min=0.4,
            alpha_multiplier=0.6,
            alpha_size_px_max=2,
            alpha_freq_exponent=-4.0,
            sparsity=0.9,
            density_multiplier=0.5)

In [None]:
total_iters = len(glob.glob(f'{c.sequences_dir}/*-orig/*.jpg'))*2
with tqdm(total=total_iters) as pbar:
    for sequence in glob.glob(f'{c.sequences_dir}/*-orig'):
        rain_dir = f'{sequence[:-5]}-rain'
        cloud_dir  = f'{sequence[:-5]}-fog'

        if os.path.exists(cloud_dir): continue
        os.makedirs(cloud_dir)

        if os.path.exists(rain_dir): continue
        os.makedirs(rain_dir)

        for image_path in sorted(glob.glob(f'{sequence}/*.jpg')):
            image = cv2.imread(image_path)
            image_name = os.path.basename(image_path)

            if "night" in sequence or "dawn" in sequence:
                image_rain_path = os.path.join(rain_dir, image_name)
                image_rain = am.add_rain(image, rain_type='heavy', slant=10)
                cv2.imwrite(image_rain_path, image_rain)

                image_clouds_path = os.path.join(cloud_dir, image_name)
                image_clouds = night_clouds_aug.augment_image(image)
                cv2.imwrite(image_clouds_path, image_clouds)
            else:
                image_rain_path = os.path.join(rain_dir, image_name)
                image_rain = am.add_rain(image, rain_type='torrential')
                cv2.imwrite(image_rain_path, image_rain)

                image_clouds_path = os.path.join(cloud_dir, image_name)
                image_clouds = day_clouds_aug.augment_image(image)
                cv2.imwrite(image_clouds_path, image_clouds)
                
            pbar.update()

- Do same for `test_sequenes`!
- fog from automold being to harsh, thus use imgaug)

In [None]:
total_iters = len(glob.glob(f'{c.test_sequences_dir}/*-orig/*.jpg'))*2
with tqdm(total=total_iters) as pbar:
    for sequence in glob.glob(f'{c.test_sequences_dir}/*-orig'):
        rain_dir = f'{sequence[:-5]}-rain'
        cloud_dir  = f'{sequence[:-5]}-fog'

        if os.path.exists(cloud_dir) or os.path.exists(rain_dir):
            pbar.update(len(glob.glob(f'{rain_dir}/*.jpg')))
            pbar.update(len(glob.glob(f'{cloud_dir}/*.jpg')))
            continue
            
        os.makedirs(cloud_dir)
        os.makedirs(rain_dir)

        for image_path in sorted(glob.glob(f'{sequence}/*.jpg')):
            image = cv2.imread(image_path)
            image_name = os.path.basename(image_path)

            if "night" in sequence or "dawn" in sequence:
                image_rain_path = os.path.join(rain_dir, image_name)
                image_rain = am.add_rain(image, rain_type='heavy', slant=10)
                cv2.imwrite(image_rain_path, image_rain)

                image_clouds_path = os.path.join(cloud_dir, image_name)
                image_clouds = night_clouds_aug.augment_image(image)
                cv2.imwrite(image_clouds_path, image_clouds)
            else:
                image_rain_path = os.path.join(rain_dir, image_name)
                image_rain = am.add_rain(image, rain_type='torrential')
                cv2.imwrite(image_rain_path, image_rain)

                image_clouds_path = os.path.join(cloud_dir, image_name)
                image_clouds = day_clouds_aug.augment_image(image)
                cv2.imwrite(image_clouds_path, image_clouds)
                
            pbar.update()

## **2:** Extract ROIs for CNN training data

In [None]:
from bmog import BMOG
from roi_extractor import regions_of_interest

total_iters = len(glob.glob(f'{c.sequences_dir}/*/*.jpg'))
with tqdm(total=total_iters) as pbar:
    for sequence in glob.glob(f'{c.sequences_dir}/*'):
        seq_name = os.path.basename(sequence)
        rois_dir = os.path.join(c.rois_dir, seq_name)

        if os.path.exists(rois_dir): continue
        os.makedirs(rois_dir)

        bgs = BMOG(threshold_l=20, postprocessing_size=15)

        for image_path in sorted(glob.glob(f'{sequence}/*.jpg'))[:c.frames_for_bgs_init]: bgs.apply(cv2.imread(image_path)) # initialize distributions
        for image_path in sorted(glob.glob(f'{sequence}/*.jpg'))[c.frames_for_bgs_init:]:
            image = cv2.imread(image_path)

            fg_mask = bgs.apply(image)
            fg_mask = cv2.dilate(fg_mask,(5,5), iterations=15)

            rois = regions_of_interest(fg_mask , c.min_size, c.max_size, c.aspect_ratio)

            for roi in rois:
                x,y,x2,y2 = roi
                roi_crop = image[y:y2, x:x2]
                cv2.imwrite(f'{rois_dir}/{time.time()}.jpg', roi_crop)
            pbar.update()

## **3:** Sort ROIs in classes **"other" & "agriculture"** ... _have fun_
- create folders

``` python
for sequence in glob.glob(f'{c.rois_dir}/*'):
    seq_name = os.path.basename(sequence)

    for class_label in c.class_labels.values():
        dir = os.path.join(c.rois_sorted_dir, seq_name, class_label)
        if not os.path.exists(dir): os.makedirs(dir)
```

### Use CNN to pre-sort ROIs _(for CNN see `2-fit_cnn_model.ipynb`)_

In [None]:
from libs.efficientnet.efficientnet.tfkeras import preprocess_input
import efn

model = efn.build_model(phi=-5, dropout=0.15)
model.load_weights("../output/old/efn_old/-5-dropout_0.15-up2/checkpoints/fit-gen_epoch-47_loss-0.18.hdf5")
image_shape = model.input_shape[1:3]

total_amount_of_rois = len(glob.glob(f'{c.rois_dir}/*/*.jpg'))
with tqdm(total=total_amount_of_rois) as pbar:
    for sequence in glob.glob(f'{c.rois_dir}/*'):
        seq_name = os.path.basename(sequence)
        sorted_dir = os.path.join(c.rois_sorted_dir, seq_name)

        if os.path.exists(sorted_dir): continue

        for class_label in c.class_labels.values():
            dir = os.path.join(c.rois_sorted_dir, seq_name, class_label)
            os.makedirs(dir)

        for image_path in glob.glob(f'{sequence}/*.jpg'):
            image = cv2.imread(image_path)

            # prepare for predict
            image_for_pred = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
            image_for_pred = cv2.resize(image_for_pred, image_shape)
            image_for_pred = np.expand_dims(image_for_pred, axis=0)
            image_for_pred = preprocess_input(image_for_pred)
            prediction = model.predict(image_for_pred)[0][0] > 0.5# predict and threshold

            image_name = os.path.basename(image_path)
            src = os.path.abspath(image_path)
            dst = os.path.join(sorted_dir, c.class_labels[prediction], image_name)
            #os.symlink(src, dst)
            copyfile(src, dst)
            
            pbar.update()

## **4:** Train / Val split
- _test split is sorted out beforehand (extracted 3 original sequences)_

In [None]:
all_agricultures = [agr for agr in glob.glob(c.rois_sorted_dir+"/*/agriculture/*.jpg")]
all_others       = [other for other in glob.glob(c.rois_sorted_dir+"/*/other/*.jpg")]

In [None]:
train_portion = .75
val_portion = .25

train_size_agr = int(len(all_agricultures)*train_portion)
train_size_other = int(len(all_others)*train_portion)

In [None]:
random.shuffle(all_agricultures)
random.shuffle(all_others)

In [None]:
# train split
if not os.path.exists(c.train_dir):
    for class_label in c.class_labels.values():
        dir = os.path.join(c.train_dir, class_label)
        if not os.path.exists(dir): os.makedirs(dir)
    
    for agr in all_agricultures[:train_size_agr]:
        agr_name = os.path.basename(agr)
        dst = os.path.join(c.train_dir, "agriculture", agr_name)
        copyfile(agr, dst)

    for other in all_others[:train_size_other]:
        other_name = os.path.basename(other)
        dst = os.path.join(c.train_dir, "other", other_name)
        copyfile(other, dst)

# val split
if not os.path.exists(c.val_dir):
    for class_label in c.class_labels.values():
        dir = os.path.join(c.val_dir, class_label)
        if not os.path.exists(dir): os.makedirs(dir)
    
    for agr in all_agricultures[train_size_agr:]:
        agr_name = os.path.basename(agr)
        dst = os.path.join(c.val_dir, "agriculture", agr_name)
        copyfile(agr, dst)

    for other in all_others[train_size_other:]:
        other_name = os.path.basename(other)
        dst = os.path.join(c.val_dir, "other", other_name)
        copyfile(other, dst)

### Balance unweighted dataset

In [None]:
# classes are very unbalanced
print("Agriculutre (all, train, val) \t",
      len(all_agricultures),
      int(len(all_agricultures)*train_portion),
      int(len(all_agricultures)*val_portion))

print("Others (all, train, val) \t",
      len(all_others),
      int(len(all_others)*train_portion),
      int(len(all_others)*val_portion))

# thus augment minority class with factor X
ratio = train_size_agr / train_size_other
aug_factor = 1/ratio
print("Ratio: \t\t\t\t", aug_factor)

In [None]:
# augment minority class (upsample) - 3 additional augmentations for minority class...
import libs.automold.Automold as am
import imgaug.augmenters as iaa

import numpy
#numpy.random.bit_generator = numpy.random._bit_generator

aug = iaa.OneOf([
    iaa.GaussianBlur((0, 3.0)),
    iaa.ChannelShuffle(p=1.0),
    iaa.AdditiveGaussianNoise(scale=0.1*255)
    ])

os.makedirs(os.path.join(c.train_dir_up, "agriculture"))

for agr in all_agricultures[:train_size_agr]:
    agr_name = os.path.basename(agr)
    dst = os.path.join(c.train_dir_up, "agriculture", agr_name)
    copyfile(agr, dst)
    
    orig_img = cv2.imread(agr)
    
    aug_1 = am.darken(orig_img)
    aug_2 = am.brighten(orig_img)
    aug_3 = aug.augment_image(orig_img)
    #aug_4 = chn.augment_image(orig_img)
    #aug_5 = noise.augment_image(orig_img)
    
    cv2.imwrite(dst[:-4]+"_aug_1.jpg", aug_1)
    cv2.imwrite(dst[:-4]+"_aug_2.jpg", aug_2)
    cv2.imwrite(dst[:-4]+"_aug_3.jpg", aug_3)
    #cv2.imwrite(dst[:-4]+"_aug_4.jpg", aug_4)
    #cv2.imwrite(dst[:-4]+"_aug_5.jpg", aug_5)


In [None]:
# also copy majority class 
# should use symlink but creates difficulties for windows / docker usage
copy_tree(os.path.join(c.train_dir, "other"), 
          os.path.join(c.train_dir_up, "other"))