# Combine the 2 networks into one prediction over the database

In [None]:
import tensorflow as tf
import numpy as np
from PIL import Image
import os
from numpy.random import shuffle
import shutil
import patchify as p
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import time
from matplotlib import cm

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
SEED = 1234
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Script Comprehension

This notebook only shows how to compute and merge the predictions from the two networks.
The two networks were trained on different notebooks similar to the ones provided in the submission, except for a custom generator function which is reported below.

This generator provides an additional parameter called 'class_index' which removes the pixels of the non-pertinent class from masks (e.g., for the 'crop' network it would replace every value=2 - pixel with 0)

In [None]:
class create_generator(tf.keras.utils.Sequence):

  # Beware class_index destroys the mask for given index

  def __init__(self, dataset_dir, which_subset, img_generator=None, mask_generator=None, 
               preprocessing_function=None, class_index = None):
    subset_file = os.path.join(dataset_dir, 'img.txt')
    
    with open(subset_file, 'r') as f:
      lines = f.readlines()
    
    subset_filenames = []
    for line in lines:
      subset_filenames.append(line.strip()) 

    self.which_subset = which_subset
    self.dataset_dir = dataset_dir
    self.subset_filenames = subset_filenames
    self.img_generator = img_generator
    self.mask_generator = mask_generator
    self.preprocessing_function = preprocessing_function
    self.class_index = class_index

  def __len__(self):
    return len(self.subset_filenames)

  def __getitem__(self, index):
    return self.__data__generation(index)
        
  def __data__generation(self, index):
    
    if index >= len(self.subset_filenames):
      return None, None
    curr_filename = self.subset_filenames[index]
    
    img = Image.open(os.path.join(self.dataset_dir, 'Images',
                                      curr_filename))
    mask = Image.fromarray(read_rgb_mask(os.path.join(self.dataset_dir, 'Masks',
                                      curr_filename)))

    img_arr = np.array(img)
    mask_arr = np.array(mask)
    

    if len(mask_arr.shape) == 2: 
      mask_arr = np.expand_dims(mask_arr, -1)

    out_mask = None

    if self.which_subset == 'training':
      if self.img_generator is not None and self.mask_generator is not None:
        
        img_t = self.img_generator.get_random_transform(img_arr.shape)
        img_arr = self.img_generator.apply_transform(img_arr, img_t)
        
        out_mask = np.zeros_like(mask_arr)
        for c in np.unique(mask_arr):
          if c > 0:
            curr_class_arr = np.float32(mask_arr == c)
            curr_class_arr = self.mask_generator.apply_transform(curr_class_arr, img_t)
            
            curr_class_arr = np.uint8(curr_class_arr)
            
            curr_class_arr = curr_class_arr * c 
            out_mask += curr_class_arr
    else:
      out_mask = mask_arr
    
    if self.class_index is not None:
        out_mask[out_mask == self.class_index] = 0
    
    if self.preprocessing_function is not None:
        img_arr = self.preprocessing_function(img_arr)
      
    return img_arr, np.float32(out_mask)

Converting in classes


In [None]:
# Converts images to RGB to Target
def read_rgb_mask(img_path):
    '''
    img_path: path to the mask file
    Returns the numpy array containing target values
    '''

    mask_img = Image.open(img_path)
    mask_arr = np.array(mask_img)

    new_mask_arr = np.zeros(mask_arr.shape[:2], dtype=mask_arr.dtype)

    # Use RGB dictionary in 'RGBtoTarget.txt' to convert RGB to target
    new_mask_arr[np.where(np.all(mask_arr == [216, 124, 18], axis=-1))] = 0
    new_mask_arr[np.where(np.all(mask_arr == [255, 255, 255], axis=-1))] = 1
    new_mask_arr[np.where(np.all(mask_arr == [216, 67, 82], axis=-1))] = 2

    return new_mask_arr

def mask_to_rgb(mask_arr):

    mask = np.squeeze(mask_arr, axis=2)
    new_mask_arr = np.zeros((mask_arr.shape[0], mask_arr.shape[1], 3), dtype=mask_arr.dtype)

    new_mask_arr[np.where(mask == 0)] = np.array([0, 0, 0])
    new_mask_arr[np.where(mask == 1)] = np.array([255, 255, 255])
    new_mask_arr[np.where(mask == 2)] = np.array([216, 67, 82])

    return new_mask_arr

RLE Encoding

In [None]:
def rle_encode(img):
    '''
    img: numpy array, 1 - foreground, 0 - background
    Returns run length as string formatted
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

Script for dividing training set and validation set. Create .txts.

In [None]:
# plant = 'haricot' OR 'mais' OR 'all'
# mode = 'all' OR 'bipbip' OR 'pead' OR 'roseau' OR 'weedelec'
def split_set(base_dir = './Development_Dataset/Training', split = 0.2, plant = 'haricot', mode = 'all', is_test = False):
    
    plants = []
    
    if plant == 'all':
        plants = ['haricot', 'mais']
    else:
        plants.append(plant)
    
    if not is_test:
      datasets = []
      files_ls = []
    
      if mode == 'all':
          datasets.extend(['Bipbip', 'Pead', 'Roseau', 'Weedelec'])
      else:
          datasets.append(mode.capitalize())
        
      for dataset in datasets:
        for p in plants:
            images_dir = os.path.join(base_dir, dataset, p.capitalize(), 'Images')
            files_ls_temp = os.listdir(images_dir)
            files_ls.extend([filename.split('.')[0] + '\n' for filename in files_ls_temp])
            
      shuffle(files_ls)
          
      final_element = int(len(files_ls) * split)
      val_ls = files_ls[:final_element]
      train_ls = files_ls[final_element:]
      print('# images for training: ' + str(len(train_ls)))
      print('# images for validation: ' + str(len(val_ls)))
          
      with open(os.path.join(base_dir,"train.txt"), "w") as train_txt:
          train_txt.writelines(train_ls)
          
      with open(os.path.join(base_dir,"val.txt"), "w") as val_txt:
          val_txt.writelines(val_ls)
    
    else:

      datasets = []
      datasets.extend(['Bipbip', 'Pead', 'Roseau', 'Weedelec'])

      path = os.path.join(base_dir, "temptest")
      if os.path.exists(path):
        shutil.rmtree(path)
      os.mkdir(path)
    
      plants = ['haricot', 'mais']

      for pl in plants:
        for dataset in datasets:
          images_dir = os.path.join(base_dir, dataset, pl.capitalize(), 'Images')
          files_ls_temp = os.listdir(images_dir)
          for file in files_ls_temp:
            shutil.copy(os.path.join(images_dir, file), path)

Code for handling tiling

In [None]:
def produce_patches(img_o, mask_o, window_size, step = 20, with_pad = True):
  # From img get array of patches
  # From mask get array of patches
  # window_size = size of a patch
  # step = stride
  # with_pad = Adds pad for precise recostruction

  img_patches = None
  mask_patches = None

  img = None
  mask = None
  pads = None
  if with_pad:
    if img_o is not None:
      # Prepare image in case not proportional step
      diff = np.array([img_o.shape[0] - window_size, img_o.shape[1] - window_size])
      # Since we need exact patches... padding!
      times = np.ceil(diff/step)
      dims = np.multiply([step, step], times)
      pads = window_size+dims-np.array(img_o.shape[:2])
      img = np.pad(img_o, ((0, int(pads[0])), (0, int(pads[1])), (0, 0)), 'constant')
    if mask_o is not None:
      # Prepare image in case not proportional step
      diff = np.array([mask_o.shape[0] - window_size, mask_o.shape[1] - window_size])
      # Since we need exact patches... padding!
      times = np.ceil(diff/step)
      dims = np.multiply([step, step], times)
      pads = window_size+dims-np.array(mask_o.shape[:2])
      mask = np.pad(mask_o, ((0, int(pads[0])), (0, int(pads[1])), (0, 0)), 'constant')
  else:
    img = img_o
    mask = mask_o

  if img is not None:
    img_patches = []
    img_patches_result = p.patchify(img, (window_size, window_size, 3), step=step)
    for w in range(img_patches_result.shape[0]):
      for h in range(img_patches_result.shape[1]):
        img_patches.append(img_patches_result[w, h, 0])
    img_patches = np.array(img_patches)
  if mask is not None:
    mask_patches = []
    mask_patches_result = p.patchify(mask, (window_size, window_size, 1), step=step)
    for w in range(mask_patches_result.shape[0]):
      for h in range(mask_patches_result.shape[1]):
        mask_patches.append(mask_patches_result[w, h, 0])
    mask_patches = np.array(mask_patches)

  return img_patches, mask_patches, pads

def restore_with_interp(img_pa, or_shape, win_size, step, wi, hi):
    #Recostruct from patches
    # or_shape = original shape with len 3
    # win_size = window_size
    # step = step
    # wi = number of patches row wise
    # he = number of patches height wise
    
    i = 0
    margin = win_size - step
    if len(or_shape) == 2:
      raise Exception("or_shape must be 3D")
    result = np.zeros(or_shape, np.float32)

    dtype = img_pa.dtype

    img_p = None
    if dtype != np.float32:
      img_p = img_pa.astype(np.float32)
    else:
      img_p = img_pa

    for w in range(wi):
      for h in range(hi):
        result[w*win_size-w*margin:(w+1)*win_size-w*margin, h*win_size-h*margin:(h+1)*win_size-h*margin] += img_p[i]
        i+=1

    for w in range(wi):
      for h in range(hi):
        if w == 0:
          result[0:margin, step*(h+1):step*(h+1)+margin] /= 2
        result[w*step+margin:(w+1)*step, step*(h+1):step*(h+1)+margin] /= 2

    for h in range(wi):
      for w in range(hi):
        if h == 0:
          result[step*(w+1):step*(w+1)+margin, 0:margin] /= 2
        result[step*(w+1):step*(w+1)+margin, h*step+margin:(h+1)*step] /= 2

    for w in range(1,wi):
      for h in range(1,hi):
        result[win_size*w-margin*w:win_size*w-margin*(w-1), win_size*h-margin*h:win_size*h-margin*(h-1)] /= 4

    if dtype != np.float32:
      result = result.astype(dtype)
      
    return result
  
def reconstruct_patches(img_patches, mask_patches, or_shape, pads, step=20):
  # From img patches get img
  # From mask patches get mask
  # or_shape = [width, height] like of original image
  # step = stride7

  restoring = or_shape
  channel = or_shape[2]
  or_shape = np.uint(np.array(or_shape[:2]) + pads)
  or_shape = [or_shape[0], or_shape[1], channel]

  # Values for restoration
  win_size = img_patches.shape[1]
  
  img = None
  mask = None
  if img_patches is not None:
    wh = img_patches.shape[0]
    wi = int((or_shape[0] - img_patches.shape[1])//step + 1)
    he = int((or_shape[1] - img_patches.shape[2])//step + 1)
    if wh != he * wi:
      raise Exception("hw != h*w")
    
    img = restore_with_interp(img_patches, or_shape, win_size, step, wi, he)
    img = img[:restoring[0],:restoring[1]]

  if mask_patches is not None:
    wh = mask_patches.shape[0]
    wi = int((or_shape[0] - mask_patches.shape[1])//step + 1)
    he = int((or_shape[1] - mask_patches.shape[2])//step + 1)
    if wh != hi * we:
      raise Exception("hw != h*w")
      
    mask = restore_with_interp(mask_patches, or_shape, win_size, step, wi, he)
    mask = mask[:restoring[0],:restoring[1]]
  return img, mask

Code used for counting number of pixel in mask. Optimized to be fast. 

In [None]:
# Return a dictionary of distinct values of mask
def f_pp(mask, nmax=1000):
    iai32 = mask.ravel()
    colors = list(set(iai32))
    counts = []
    match = None
    for i in range(0, len(colors)):
        counts.append(0)
        match = iai32 == colors[i]
        counts[i] = np.count_nonzero(match)
    colors = [np.uint8(col) for col in colors]
    return {a[0] : a[1] for a in zip(colors, counts)}

Utility script to save the patch in a folder and having them swiftly prepared

In [None]:
def save_patch(save_fld, sub, dat_dir, win_size, exclude = 0.95):
  # This method create patches for entire dataset and save it in a folder
  # A text file with all the images is also created.

  subset_file = None
  if sub == 'training':
      subset_file = os.path.join(dat_dir, 'train.txt')
  elif sub == 'validation':
      subset_file = os.path.join(dat_dir, 'val.txt')
    
  with open(subset_file, 'r') as f:
    lines = f.readlines()
    
  subset_filenames = []
  for line in lines:
    subset_filenames.append(line.strip())
  
  image_names = []
  folder = "Images"
  folder_m = "Masks"

  if os.path.exists(save_fld):
    shutil.rmtree(save_fld)
  os.makedirs(save_fld)
  os.makedirs(os.path.join(save_fld, folder))
  os.makedirs(os.path.join(save_fld, folder_m))
  
  for i in range(0, len(subset_filenames)):
    curr_filename = subset_filenames[i]
    split_filename = curr_filename.split('_')
    curr_dataset = split_filename[0]
    curr_plant = split_filename[1]
    
    if curr_dataset == 'Roseau':
        img = Image.open(os.path.join(dat_dir, curr_dataset, curr_plant.capitalize(), 'Images',
                                      curr_filename + '.png'))
    else:
        img = Image.open(os.path.join(dat_dir, curr_dataset, curr_plant.capitalize(), 'Images',
                                      curr_filename + '.jpg'))
    
    mask = Image.fromarray(read_rgb_mask(os.path.join(dat_dir, curr_dataset, curr_plant.capitalize(),
                                                      'Masks', curr_filename + '.png')))

    img_arr = np.array(img)
    mask_arr = np.array(mask)
    mask_arr = np.expand_dims(mask_arr, -1)
    
    # We put WITH_PAD = FALSE in order to avoid creating overlapping images in training.
    # This is done because introducing overlapping images would cause redundancy.
    img_p, mask_p, _ = produce_patches(img_arr, mask_arr, win_size, step=win_size, with_pad=False)
    img_arr = []
    mask_arr = []
    sum = win_size*win_size
    for j in range(0, mask_p.shape[0]):
      dict_colors = f_pp(mask_p[j])
      if dict_colors.get(0, 0) < sum*exclude:
         filename = "img_" + str(len(image_names)) + ".png"
         img = Image.fromarray(img_p[j])
         mask = Image.fromarray(mask_to_rgb(mask_p[j]))
         img.save(os.path.join(save_fld, folder, filename))
         mask.save(os.path.join(save_fld, folder_m, filename))
         image_names.append(filename + '\n')
    print(i+1, "images processed. Still: ", len(subset_filenames)-i-1)
  with open(os.path.join(save_fld,"img.txt"), "w") as val_txt:
    val_txt.writelines(image_names)

# Data Preparation

In [None]:
cwd = './'

dataset_dir = "Development_Dataset"
training_dir = os.path.join(dataset_dir, "Training")
test_dir = os.path.join(dataset_dir, "Test_Dev")

In [None]:
mode = "bipbip"
plant = "mais"

split_set(base_dir= training_dir, mode=mode, plant=plant)

Dictionary with array colors, used for visualization

In [None]:
dict_rgb = {
    "background" : [254, 124, 18],
    "crop" : [255, 255, 255],
    "weed" : [216, 67, 82]
}

In [None]:
win_size_1 = 512
win_size_2 = 800
step1 = win_size_1 - 6
step2 = win_size_2 - 6

In [None]:
def create_model_1(depth, start_f, num_classes, input_shape, trainable = None):

    model = tf.keras.Sequential()
    
    # Encoder
    vgg = tf.keras.applications.DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)
    model.add(vgg)
    
    start_f = start_f
        
    # Decoder
    for i in range(depth):
        model.add(tf.keras.layers.UpSampling2D(2, interpolation='bilinear'))
        model.add(tf.keras.layers.Conv2D(filters=start_f,
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.ReLU())

        start_f = start_f // 2

    # Prediction Layer
    model.add(tf.keras.layers.Conv2D(filters=num_classes,
                                     kernel_size=(1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     activation='softmax'))
    
    if trainable is not None:
      for layer in vgg.layers:
        layer.trainable = False
      model.load_weights(trainable)
    else:
      for layer in vgg.layers:
        layer.trainable = False
    return model

In [None]:
def create_model_2(depth, start_f, num_classes, input_shape, trainable = None):

    model = tf.keras.Sequential()
    
    # Encoder
    vgg = tf.keras.applications.DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)
    model.add(vgg)
    
    start_f = start_f
        
    # Decoder
    for i in range(depth):
        model.add(tf.keras.layers.UpSampling2D(2, interpolation='bilinear'))
        model.add(tf.keras.layers.Conv2D(filters=start_f,
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same'))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.ReLU())

        start_f = start_f // 2

    # Prediction Layer
    model.add(tf.keras.layers.Conv2D(filters=num_classes,
                                     kernel_size=(1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     activation='softmax'))
    
    if trainable is not None:
      for layer in vgg.layers:
        layer.trainable = False
      model.load_weights(trainable)
    else:
      for layer in vgg.layers:
        layer.trainable = False
    return model

In [None]:
first_network = create_model_1(depth=5, 
                     start_f=512, 
                     num_classes=3, input_shape = (win_size_1, win_size_1, 3), trainable = "/content/class1/weights")

first_network.summary()

In [None]:
second_network = create_model_2(depth=5, 
                     start_f=512, 
                     num_classes=3, input_shape = (win_size_2, win_size_2, 3), trainable = "/content/class2/weights")

first_network.summary()

In [None]:
split_set(base_dir=test_dir, mode='bipbip', plant='haricot', is_test = True)
temptest = os.path.join(test_dir, "temptest")

In [None]:
images = os.listdir(temptest)
submission_dict = {}

if len(images) > 0:
  idx = 0
  for img_name in images:

    imgs = Image.open(os.path.join(temptest, img_name))
    img_arr = np.array(imgs)
    or_shape = img_arr.shape

    predictions_class_1 = []
    predictions_class_2 = []
    # Replace win_size with input size of network, step with win_size - overlap(typically 6)
    img_p1, _, pads1 = produce_patches(img_arr, None, win_size_1, step = step1)
    img_p2, _, pads2 = produce_patches(img_arr, None, win_size_2, step = step2)
  
    for i in range(img_p1.shape[0]):
      out_sigmoid1 = first_network.predict(x=tf.expand_dims(img_p1[i], axis=0))
      predictions_class_1.append(out_sigmoid1)
        
    
    for i in range(img_p2.shape[0]):
      out_sigmoid2 = second_network.predict(x=tf.expand_dims(img_p2[i], axis=0))
      predictions_class_2.append(out_sigmoid2)

    predictions_class_1 = np.concatenate(predictions_class_1, axis = 0)
    predictions_class_2 = np.concatenate(predictions_class_2, axis = 0)
    # Only replace step
    predicted_class_1, _ = reconstruct_patches(predictions_class_1, None, or_shape, pads1, step=step1)
    predicted_class_2, _ = reconstruct_patches(predictions_class_2, None, or_shape, pads2, step=step2)

    # Class Index 1 destroyed in 1
    # Class Index 2 destroyed in 2
    # Bring index 1 in 2
    print(img_name)
    background_prediction = (predicted_class_1[:,:,0] + predicted_class_2[:,:,0])/2
    predicted_class = np.stack([background_prediction, predicted_class_2[:,:,1], predicted_class_1[:,:,2]], axis=-1)
    predicted_class = tf.argmax(predicted_class, -1)
    mask_arr = predicted_class
    mask_arr = np.array(mask_arr)

    fig, ax = plt.subplots(1, 4)
    ax[0].imshow(np.uint8(np.argmax(predicted_class_1, -1)))
    ax[1].imshow(np.uint8(np.argmax(predicted_class_2, -1)))
    ax[2].imshow(np.uint8(mask_arr))
    ax[3].imshow(np.uint8(img_arr))
    plt.show()

    split_filename = img_name.split('_')
    curr_dataset = split_filename[0]
    curr_plant = split_filename[1]
    curr_plant = curr_plant.capitalize()

    img_name = os.path.splitext(img_name)
    img_name = img_name[0]

    submission_dict[img_name] = {}
    submission_dict[img_name]['shape'] = or_shape
    submission_dict[img_name]['team'] = curr_dataset
    submission_dict[img_name]['crop'] = curr_plant
    submission_dict[img_name]['segmentation'] = {}

    # RLE encoding
    # crop
    rle_encoded_crop = rle_encode(mask_arr == 1)
    # weed
    rle_encoded_weed = rle_encode(mask_arr == 2)

    submission_dict[img_name]['segmentation']['crop'] = rle_encoded_crop
    submission_dict[img_name]['segmentation']['weed'] = rle_encoded_weed

    # Please notice that in this example we have a single prediction.
    # For the competition you have to provide segmentation for each of
    # the test images.

        # Finally, save the results into the submission.json file
import json
with open('submission.json', 'w') as f:
    json.dump(submission_dict, f)

import zipfile
zipfile.ZipFile('submission.zip', mode='w').write("submission.json")