# Second CodaLab competition - Image segmentation

Model finetuning:

To better tune this model with the new set (previously the testset) I trained the previous model (the best performing one) with the last learining rate selected by ReduceLROnPlateau.

Previous description:

For this challenge I have chosen to train the model only on the Bipbip Haricot dataset as, to predict the other dataset, it is only necessary to retrain the model on the other dataset.
Some image preprocessing has to be done on the Pead dataset (perispective transformation to enlarge the farthest elements) and on the Roseau dataset (image color normalization).

I started with a VGG-16 as a encoder layer and a decoder layer made of UpSampling2D and Conv2D (as in the excercise session example).

I trained this model with data augmentation and hyperparameter tuning (depth and filters of the decoder) but the results were not so encouraging so I opted for a U-Net architecture.

I tried two main models one implemented in the library https://github.com/karolzak/keras-unet.

During the trials I've tried different techniques to achiveve the maximum IoU on the test set, for example splitting the the image in different patches: 12 of size (512x512) but the results were not so different from the traditional approach, I tried also to train the model on a single class and then merge the predictions using argmax.

The main problem with the custom U-Net was that different classes were predicted as one, but with a pretty high IoU over both classes.

Finally I've chosen to use relatively high LR with ReduceLROnPlateau, high number of filters per conv layer and use BatchNormalization layers.

During this challenge I used, instead of TensorBoard, Weight and Biases to have statistics about how the model is performing and to perform hyperparameters tuning in a simpler way. Here you can find the entire project with all the runs: https://wandb.ai/lrsb/codalab2


# Download dataset

In [None]:
import json
from google.colab import drive

!pip install --upgrade wandb
!pip install keras-unet

#@markdown Insert here your credentials
wandb_key = ''#@param {type:'string'}

!wandb login {wandb_key}

#drive.mount('/content/drive')

!echo "Copying dataset..."
!cp '/content/drive/MyDrive/Colab Notebooks/CodaLab2/Development_Dataset.zip' '/content/Development_Dataset.zip'
!cp '/content/drive/MyDrive/Colab Notebooks/CodaLab2/Final_Dataset.zip' '/content/Final_Dataset.zip'
!echo "Extracting dataset..."
!unzip -q /content/Development_Dataset.zip
!unzip -q /content/Final_Dataset.zip

# Setup

### Making results more reproducible and setting params

In [None]:
import tensorflow as tf

SEED = 1234#@param {type:'number'}
tf.random.set_seed(SEED)

img_w = 1024#@param {type:'number'}
img_h = 1024#@param {type:'number'}

input_shape = (img_h, img_w, 3)

num_classes = 3
class_names = ['background', 'crop', 'weed']
class_colors = [[0, 0, 0], [255, 255, 255], [216, 67, 82]]

teams = ['Bipbip', 'Pead', 'Roseau', 'Weedelec']
crop_types = ['Haricot', 'Mais']

### Supporting functions

In [None]:
def rle_encode(img):
  '''
  img: numpy array, 1 - foreground, 0 - background
  Returns run length as string formatted
  '''
  pixels = img.flatten()
  pixels = np.concatenate([[0], pixels, [0]])
  runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
  runs[1::2] -= runs[::2]
  return ' '.join(str(x) for x in runs)

def rle_decode(rle, shape):
  s = rle.split()
  starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
  starts -= 1
  ends = starts + lengths
  img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
  for lo, hi in zip(starts, ends):
      img[lo:hi] = 1
  return img.reshape(shape)

def get_patches(img_arr, size=256, stride=256):
    """
    Takes single image or array of images and returns
    crops using sliding window method.
    If stride < size it will do overlapping.
    
    Args:
        img_arr (numpy.ndarray): [description]
        size (int, optional): [description]. Defaults to 256.
        stride (int, optional): [description]. Defaults to 256.
    
    Raises:
        ValueError: [description]
        ValueError: [description]
    
    Returns:
        numpy.ndarray: [description]
    """    
    # check size and stride
    if size % stride != 0:
        raise ValueError("size % stride must be equal 0")

    patches_list = []
    overlapping = 0
    if stride != size:
        overlapping = (size // stride) - 1

    if img_arr.ndim == 3:
        i_max = img_arr.shape[0] // stride - overlapping
        j_max = img_arr.shape[1] // stride - overlapping

        for i in range(i_max):
            for j in range(j_max):
                # print(i*stride, i*stride+size)
                # print(j*stride, j*stride+size)
                patches_list.append(
                    img_arr[
                        i * stride : i * stride + size,
                        j * stride : j * stride + size
                    ]
                )

    elif img_arr.ndim == 4:
        i_max = img_arr.shape[1] // stride - overlapping
        for im in img_arr:
            for i in range(i_max):
                for j in range(i_max):
                    # print(i*stride, i*stride+size)
                    # print(j*stride, j*stride+size)
                    patches_list.append(
                        im[
                            i * stride : i * stride + size,
                            j * stride : j * stride + size,
                        ]
                    )

    else:
        raise ValueError("img_arr.ndim must be equal 3 or 4")

    return np.stack(patches_list)

### Code for creating datasets

In [None]:
import os, math
from PIL import Image

class CustomDataset(tf.keras.utils.Sequence):
  def __init__(self, dataset_dir, which_subset, team, crop_type, img_generator=None,
               preprocessing_function=None, validation_split=0.1, out_shape=[256, 256]):

    self.crop_dir = os.path.join(dataset_dir, team, crop_type)
    self.subset_filenames = os.listdir(os.path.join(self.crop_dir, 'Images'))

    self.which_subset = which_subset
    self.dataset_dir = dataset_dir
    self.team = team
    self.crop_type = crop_type
    self.img_generator = img_generator
    self.preprocessing_function = preprocessing_function
    self.validation_split = validation_split
    self.out_shape = out_shape

  def __len__(self):
    if self.which_subset == 'training':
      return len(self.subset_filenames) - math.floor(len(self.subset_filenames) * self.validation_split)
    return math.floor(len(self.subset_filenames) * self.validation_split)

  def __getitem__(self, index):
    if self.which_subset == 'training':
      curr_filename = self.subset_filenames[index]
    else:
      valid_delta = len(self.subset_filenames) - math.floor(len(self.subset_filenames) * self.validation_split) - 1
      curr_filename = self.subset_filenames[index + valid_delta]

    img = Image.open(os.path.join(self.crop_dir, 'Images', curr_filename))
    mask = Image.open(os.path.join(self.crop_dir, 'Masks', os.path.splitext(curr_filename)[0] + '.png'))

    img = img.resize(self.out_shape)
    mask = mask.resize(self.out_shape)
    
    img_arr = np.array(img)
    mask_arr = np.array(mask)

    new_mask_arr = np.zeros(mask_arr.shape[:2], dtype=mask_arr.dtype)
    new_mask_arr[np.where(np.all(mask_arr == [0, 0, 0], axis=-1))] = 0
    new_mask_arr[np.where(np.all(mask_arr == [216, 124, 18], axis=-1))] = 0
    new_mask_arr[np.where(np.all(mask_arr == [255, 255, 255], axis=-1))] = 1
    new_mask_arr[np.where(np.all(mask_arr == [216, 67, 82], axis=-1))] = 2

    mask_arr = np.expand_dims(new_mask_arr, -1)

    if self.which_subset == 'training':
      if self.img_generator is not None:
        # Perform data augmentation
        # We can get a random transformation from the ImageDataGenerator using get_random_transform
        # and we can apply it to the image using apply_transform
        img_t = self.img_generator.get_random_transform(img_arr.shape)
        img_arr = self.img_generator.apply_transform(img_arr, img_t)
        # ImageDataGenerator use bilinear interpolation for augmenting the images.
        # Thus, when applied to the masks it will output 'interpolated classes', which
        # is an unwanted behaviour. As a trick, we can transform each class mask 
        # separately and then we can cast to integer values (as in the binary segmentation notebook).
        # Finally, we merge the augmented binary masks to obtain the final segmentation mask.
        out_mask = np.zeros_like(mask_arr)
        for c in np.unique(mask_arr):
          if c > 0:
            curr_class_arr = np.float32(mask_arr == c)
            curr_class_arr = self.img_generator.apply_transform(curr_class_arr, img_t)
            # from [0, 1] to {0, 1}
            curr_class_arr = np.uint8(curr_class_arr)
            # recover original class
            curr_class_arr = curr_class_arr * c 
            out_mask += curr_class_arr
      else:
        out_mask = mask_arr
    else:
      out_mask = mask_arr

    # One hot encoding
    out_mask = tf.keras.utils.to_categorical(out_mask, num_classes=num_classes)

    if self.preprocessing_function is not None:
        img_arr = self.preprocessing_function(img_arr)

    return img_arr, np.float32(out_mask)

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def GetDataset(dir, team, crop_type, apply_data_augmentation, validation_split, batch_size, preprocessing_function):
  if apply_data_augmentation:
    img_data_gen = ImageDataGenerator(rotation_range=20,
                                      width_shift_range=10,
                                      height_shift_range=10,
                                      zoom_range=0.2,
                                      rescale=1./255,
                                      horizontal_flip=True,
                                      vertical_flip=True,
                                      fill_mode='reflect')
  else:
    img_data_gen = None

  dataset = CustomDataset(dir, 'training', 
                          team, crop_type,
                          img_generator=img_data_gen, 
                          validation_split=validation_split,
                          preprocessing_function=preprocessing_function,
                          out_shape=[img_h, img_w])
  dataset_valid = CustomDataset(dir, 'validation', 
                                team, crop_type,
                                img_generator=img_data_gen,
                                validation_split=validation_split,
                                preprocessing_function=preprocessing_function,
                                out_shape=[img_h, img_w])

  train_dataset = tf.data.Dataset.from_generator(lambda: dataset,
                                                output_types=(tf.float32, tf.float32),
                                                output_shapes=([img_h, img_w, 3], [img_h, img_w, num_classes]))
  train_dataset = train_dataset.batch(batch_size)
  train_dataset = train_dataset.repeat()

  valid_dataset = tf.data.Dataset.from_generator(lambda: dataset_valid,
                                                output_types=(tf.float32, tf.float32),
                                                output_shapes=([img_h, img_w, 3], [img_h, img_w, num_classes]))
  valid_dataset = valid_dataset.batch(batch_size)
  valid_dataset = valid_dataset.repeat()

  return train_dataset, dataset, valid_dataset, dataset_valid

### Code for saving testset results

In [None]:
import ntpath
from PIL import Image

sizes = {'Bipbip': [1536, 2048], 'Pead': [2464, 3280], 'Roseau': [819, 1227], 'Weedelec': [3456, 5184]}

def PredictDatasets(file, model):
  submission_dict = {}
  for team in teams:
    for crop_type in crop_types:
      test_img_generator = ImageDataGenerator()
      test_gen = test_img_generator.flow_from_directory(os.path.join('/content/Test_Final',
                                        team, 
                                        crop_type),
                                        batch_size=1,
                                        target_size=(img_h, img_w),
                                        classes=['Images'],
                                        shuffle=False)

      predictions = model.predict(test_gen, len(test_gen), verbose=1)
      filenames = test_gen.filenames

      i = 0
      for p in predictions:
        p = tf.image.resize(p, sizes[team], method='nearest').numpy()
        mask_arr = np.argmax(p, -1)
        img_name = os.path.splitext(ntpath.basename(filenames[i]))[0]
        submission_dict[img_name] = {}
        submission_dict[img_name]['shape'] = sizes[team]
        submission_dict[img_name]['team'] = team
        submission_dict[img_name]['crop'] = crop_type
        submission_dict[img_name]['segmentation'] = {}

        rle_encoded_crop = rle_encode(mask_arr == 1)
        rle_encoded_weed = rle_encode(mask_arr == 2)

        submission_dict[img_name]['segmentation']['crop'] = rle_encoded_crop
        submission_dict[img_name]['segmentation']['weed'] = rle_encoded_weed
        i = i + 1
  
  with open(file, 'w') as f:
    json.dump(submission_dict, f)

# U-Net



In [None]:
import wandb
from wandb.keras import WandbCallback
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras import layers
import ntpath
from keras_unet.models import custom_unet

def GetUnetModel():
  inputs = tf.keras.Input(shape=(img_w, img_h) + (3,))

  ### [First half of the network: downsampling inputs] ###

  # Entry block
  x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
  x = layers.BatchNormalization()(x)
  x = layers.Activation("relu")(x)

  previous_block_activation = x  # Set aside residual

  # Blocks 1, 2, 3 are identical apart from the feature depth.
  for filters in [64, 128, 256]:
      x = layers.Activation("relu")(x)
      x = layers.SeparableConv2D(filters, 3, padding="same")(x)
      x = layers.BatchNormalization()(x)

      x = layers.Activation("relu")(x)
      x = layers.SeparableConv2D(filters, 3, padding="same")(x)
      x = layers.BatchNormalization()(x)

      x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

      # Project residual
      residual = layers.Conv2D(filters, 1, strides=2, padding="same")(previous_block_activation)
      x = layers.add([x, residual])  # Add back residual
      previous_block_activation = x  # Set aside next residual

  ### [Second half of the network: upsampling inputs] ###

  for filters in [256, 128, 64, 32]:
      x = layers.Activation("relu")(x)
      x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
      x = layers.BatchNormalization()(x)

      x = layers.Activation("relu")(x)
      x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
      x = layers.BatchNormalization()(x)

      x = layers.UpSampling2D(2)(x)

      # Project residual
      residual = layers.UpSampling2D(2)(previous_block_activation)
      residual = layers.Conv2D(filters, 1, padding="same")(residual)
      x = layers.add([x, residual])  # Add back residual
      previous_block_activation = x  # Set aside next residual

  # Add a per-pixel classification layer
  outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

  # Define the model
  model = tf.keras.Model(inputs, outputs)
  return model

def GetCustomUnetModel():
  model = custom_unet(input_shape=(img_w, img_h, 3),
                      use_batch_norm=True,
                      num_classes=num_classes,
                      filters=64,
                      dropout=0.3,
                      output_activation='sigmoid')

  return model

!nvidia-smi

def GetModelFor(team, crop_type, custom):
  #@markdown Set hyperparameters used during training

  data_augmentation = True#@param {type:'boolean'}
  validation_split = 0.1#@param {type:'number'}
  batch_size = 1#@param {type:'number'}
  epochs = 70#@param {type:'number'}
  use_early_stopping = True#@param {type:'boolean'}

  wandb.init(project='codalab2', config={
      'team': team,
      'crop_type': crop_type,
      'custom': custom,
      'data_augmentation': data_augmentation,
      'validation_split': validation_split,
      'batch_size': batch_size,
      'epochs': epochs,
      'use_early_stopping': use_early_stopping
    })

  datasets = GetDataset('/content/Development_Dataset/Training',
                        team,
                        crop_type,
                        data_augmentation,
                        validation_split,
                        batch_size,
                        None)
  datasets_new = GetDataset('/content/Test_Dev',
                            team,
                            crop_type,
                            data_augmentation,
                            validation_split,
                            1,
                            None)
  
  if custom:
    model = GetCustomUnetModel()
  else:
    model = GetUnetModel()

  callbacks = [WandbCallback(data_type='image', 
                            log_weights=True,
                            input_type='image',
                            output_type='segmentation_mask',
                            class_colors=class_colors)]
  callbacks.append(tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.1,
                                   patience=4,
                                   cooldown=1,
                                   min_delta=0,
                                   verbose=1))                       
                          
  if use_early_stopping:
    callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=8, restore_best_weights=True))
  
  model.compile(optimizer='rmsprop', 
                  loss='categorical_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.MeanIoU(num_classes)])

  model.fit(x=datasets[0],
            epochs=epochs,
            steps_per_epoch=len(datasets[1]) // batch_size,
            validation_data=datasets[2],
            validation_steps=len(datasets[3]) // batch_size, 
            callbacks=callbacks)
  !echo "Train on final..."
  model.fit(x=datasets_new[0],
            epochs=epochs,
            steps_per_epoch=len(datasets_new[1]),
            validation_data=datasets_new[2],
            validation_steps=len(datasets_new[3]), 
            callbacks=callbacks)
  
  return model

model = GetModelFor('Bipbip', 'Haricot', False)
PredictDatasets('/content/drive/MyDrive/Colab Notebooks/CodaLab2/submission_final.json',
                model)

custom_model = GetModelFor('Bipbip', 'Haricot', True)
PredictDatasets('/content/drive/MyDrive/Colab Notebooks/CodaLab2/submission_c_final.json',
                custom_model)

### Results

https://wandb.ai/lrsb/codalab2/runs/2n7mtkb9 (Score on Bipbip Haricot: 0.6443)

https://wandb.ai/lrsb/codalab2/runs/31hoh2e5 (Score on Bipbip Haricot: 0.6149)

# Utilities

### Display results

In [None]:
with open(os.path.join('/content/drive/MyDrive/Colab Notebooks/CodaLab2', 'submission.json'), 'r') as f:
  submission_dict = json.load(f)
  img_name = 'Bipbip_mais_im_04121'
  img_shape = submission_dict[img_name]['shape']

  rle_encoded_crop = submission_dict[img_name]['segmentation']['crop']
  rle_encoded_weed = submission_dict[img_name]['segmentation']['weed']

  # Reconstruct crop and weed binary masks
  crop_mask = rle_decode(rle_encoded_crop, shape=img_shape)
  weed_mask = rle_decode(rle_encoded_weed, shape=img_shape)

  # Reconstruct original mask
  # weed_mask * 2 allows to convert ones into target 2 (weed label)
  reconstructed_mask = crop_mask + (weed_mask * 2)


  # Just for visualisation purposes, save RGB reconstructed mask
  # Use again the dictionary in 'RGBtoTarget.txt'.
  reconstructed_rgb_arr = np.zeros(shape=img_shape + [3])
  reconstructed_rgb_arr[reconstructed_mask == 1] = [255, 255, 255]
  reconstructed_rgb_arr[reconstructed_mask == 2] = [216, 67, 82]

  reconstructed_rgb_img = Image.fromarray(
      np.uint8(reconstructed_rgb_arr))
  
  display(reconstructed_rgb_img)

### Test dataset

In [None]:
import time
from matplotlib import cm
import matplotlib.pyplot as plt

%matplotlib inline

valid_dataset = GetDataset('/content/Development_Dataset/Training', 'Bipbip', 'Haricot', True, 0, 1, None)

iterator = iter(valid_dataset[0])

fig, ax = plt.subplots(3, 12, figsize=(30, 8))

for i in range(0, 3):
  for j in range(0, 4):
    augmented_img, target = next(iterator)
    augmented_img = augmented_img[0]   # First element
    prediction = model.predict(x=np.expand_dims(augmented_img, axis=0))

    target = tf.expand_dims(tf.argmax(target, -1), -1)
    prediction = tf.argmax(prediction, -1)[0]

    prediction_img = np.zeros([prediction.shape[0], prediction.shape[1], 3])
    for k in range(0, num_classes):
      prediction_img[np.where(prediction == k)] = class_colors[k]

    target = np.array(target[0, ..., 0])   # First element (squeezing channel dimension)

    target_img = np.zeros([target.shape[0], target.shape[1], 3])
    for k in range(0, num_classes):
      target_img[np.where(target == k)] = class_colors[k]

    ax[i][j].imshow(np.uint8(augmented_img))
    ax[i][4+j].imshow(np.uint8(target_img))
    ax[i][8+j].imshow(np.uint8(prediction_img))

plt.show()

### Predict dataset using saved model

In [None]:
dependencies = {
    'meanIoU': meanIoU
}

model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/CodaLab2/model-best.h5', custom_objects=dependencies)

PredictDatasets('/content/drive/MyDrive/Colab Notebooks/CodaLab2/submission.json', model)