This is the architecture that gave us the best result in Pead Mais. 

It is a very simple U-Net, with less than a million parameters. It has been trained with squared tiles of 512x512 pixels, as described in our report.

Here, filtered tiles are used. You need to first run the filter_tiles.py script in the "script" folder.


Note that this notebook is very similar to notebook unet_ADD_SKIP, so please refer to that one for commented code.

In [None]:
 from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

SEED = 1234
img_w = 512
img_h = 512
bs = 8
lr = 1e-3
num_epochs = 100
patience = 7
num_classes = 3


# kfold
val_split_perc = 0.2
k = 5
enable_kfold = True
if not enable_kfold:
  k = int(1 / val_split_perc)


# boolean flags
# what you want to include in training phase
bool_arr = []
bool_arr.append([False, "Bipbip", "Haricot", ".jpg"])
bool_arr.append([False, "Bipbip", "Mais", ".jpg"])
bool_arr.append([False, "Pead", "Haricot", ".jpg"])
bool_arr.append([True, "Pead", "Mais", ".jpg"])
bool_arr.append([False, "Roseau", "Haricot", ".png"])
bool_arr.append([False, "Roseau", "Mais", ".png"])
bool_arr.append([False, "Weedelec", "Haricot", ".jpg"])
bool_arr.append([False, "Weedelec", "Mais", ".jpg"])

# what you want to test
# to generate a valid json though, you need to include all of these
bool_arr_test = []
bool_arr_test.append([True, "Bipbip", "Haricot", ".jpg"])
bool_arr_test.append([True, "Bipbip", "Mais", ".jpg"])
bool_arr_test.append([True, "Pead", "Haricot", ".jpg"])
bool_arr_test.append([True, "Pead", "Mais", ".jpg"])
bool_arr_test.append([True, "Roseau", "Haricot", ".png"])
bool_arr_test.append([True, "Roseau", "Mais", ".png"])
bool_arr_test.append([True, "Weedelec", "Haricot", ".jpg"])
bool_arr_test.append([True, "Weedelec", "Mais", ".jpg"])

model_name = 'unet_pead_mais'

tf.random.set_seed(SEED) 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cwd = os.getcwd() # should be /content

dataset_version = 'Filtered_Development_Dataset_512'

path_to_zip = '/content/drive/My\ Drive/challenge2/dataset/' + dataset_version + '.zip'

if not os.path.exists(os.path.join(cwd, dataset_version)):
    !unzip {path_to_zip}

In [None]:
import pandas as pd

filenames_images = []
filenames_masks = []

base_folder = os.path.join(cwd, dataset_version, "Training")

for i in range(0,8):
  if bool_arr[i][0]:
    bf = []
    base_curr = os.path.join(base_folder, bool_arr[i][1], bool_arr[i][2])
    fn_images = [x for x in os.listdir(os.path.join(base_curr, "Images"))]
    fn_images.sort()
    fn_masks = [x for x in os.listdir(os.path.join(base_curr, "Masks"))]
    fn_masks.sort()

    for index, value in enumerate(fn_images):
      fn_images[index] = os.path.join(base_curr, "Images", value)

    for index, value in enumerate(fn_masks):
      fn_masks[index] = os.path.join(base_curr, "Masks", value)

    filenames_images += fn_images
    filenames_masks += fn_masks

data = pd.DataFrame(columns=["images", "masks"])
data["images"] = filenames_images
data["masks"] = filenames_masks

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

prepr_func = tf.keras.applications.vgg16.preprocess_input

# Create training ImageDataGenerator object
# We need two different generators for images and corresponding masks
train_img_data_gen = ImageDataGenerator(rotation_range=30,
                                        width_shift_range=10,
                                        height_shift_range=10,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        fill_mode='reflect',                                          
                                        rescale=1./255)

train_mask_data_gen = ImageDataGenerator(rotation_range=30,
                                          width_shift_range=10,
                                          height_shift_range=10,
                                          zoom_range=0.3,
                                          horizontal_flip=True,
                                          vertical_flip=True,
                                          fill_mode='reflect')

# Create validation and test ImageDataGenerator objects
valid_img_data_gen = ImageDataGenerator(rescale=1./255)
valid_mask_data_gen = ImageDataGenerator()

In [None]:
def prepare_target(x_, y_):
    return x_, (tf.cast(tf.reduce_any(y_ == 0, axis=-1, keepdims=True), tf.float32)*0 + 
                tf.cast(tf.reduce_any(y_ == 124, axis=-1, keepdims=True), tf.float32)*0 + 
                tf.cast(tf.reduce_any(y_ == 255, axis=-1, keepdims=True), tf.float32)*1 + 
                tf.cast(tf.reduce_any(y_ == 67, axis=-1, keepdims=True), tf.float32)*2) 

In [None]:
from tensorflow.keras.layers import *

def upsampleLayer(in_layer, concat_layer, input_size):
  '''
  Upsampling (=Decoder) layer building block
  Parameters
  ----------
  in_layer: input layer
  concat_layer: layer with which to concatenate
  input_size: input size for convolution
  '''
  upsample = Conv2DTranspose(input_size, (2, 2), strides=(2, 2), padding='same')(in_layer) 
  upsample = concatenate([concat_layer, upsample])
  conv = Conv2D(input_size, (3, 3), activation='relu', padding='same')(upsample)
  return conv

def create_model(num_classes):

  inputs_1 = tf.keras.Input((img_h, img_w, 3))

  # encoder
  e1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs_1)
  e1b = MaxPool2D(pool_size=(2, 2))(e1)

  e2 = Conv2D(64, (3, 3), activation='relu', padding='same')(e1b)
  e2b = MaxPool2D(pool_size=(2, 2))(e2)

  e3 = Conv2D(128, (3, 3), activation='relu', padding='same')(e2b)
  e3b = MaxPool2D(pool_size=(2, 2))(e3)

  # bottleneck
  e4 = Conv2D(256, (3, 3), activation='relu', padding='same')(e3b)

  # decoder
  d1 = upsampleLayer(in_layer=e4, concat_layer=e3, input_size=128)
  d2 = upsampleLayer(in_layer=d1, concat_layer=e2, input_size=64)
  d3 = upsampleLayer(in_layer=d2, concat_layer=e1, input_size=32)

  outputs = Conv2D(num_classes, (1, 1), activation='softmax')(d3)

  model = tf.keras.Model(inputs=inputs_1, outputs=outputs)
  
  return model

In [None]:
def meanIoU(y_true, y_pred):
    # get predicted class from softmax
    y_pred = tf.expand_dims(tf.argmax(y_pred, -1), -1)

    per_class_iou = []

    for i in range(1,num_classes): # exclude the background class 0
      # Get prediction and target related to only a single class (i)
      class_pred = tf.cast(tf.where(y_pred == i, 1, 0), tf.float32)
      class_true = tf.cast(tf.where(y_true == i, 1, 0), tf.float32)
      intersection = tf.reduce_sum(class_true * class_pred)
      union = tf.reduce_sum(class_true) + tf.reduce_sum(class_pred) - intersection
    
      iou = (intersection + 1e-7) / (union + 1e-7)
      per_class_iou.append(iou)

    return tf.reduce_mean(per_class_iou)

def cropIoU(y_true, y_pred):
    y_pred = tf.expand_dims(tf.argmax(y_pred, -1), -1)
    i = 1
    class_pred = tf.cast(tf.where(y_pred == i, 1, 0), tf.float32)
    class_true = tf.cast(tf.where(y_true == i, 1, 0), tf.float32)
    intersection = tf.reduce_sum(class_true * class_pred)
    union = tf.reduce_sum(class_true) + tf.reduce_sum(class_pred) - intersection
    iou = (intersection + 1e-7) / (union + 1e-7)
    return iou

In [None]:
def display(display_list):
  plt.figure(figsize=(15, 15))

  title = ['Input Image', 'True Mask', 'Predicted Mask']

  for i in range(len(display_list)):
    plt.subplot(1, len(display_list), i+1)
    plt.title(title[i])
    plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
    plt.axis('off')
  plt.show()

def create_mask(pred_mask):
  pred_mask = tf.argmax(pred_mask, axis=-1)
  pred_mask = pred_mask[..., tf.newaxis]
  return pred_mask[0]

def show_predictions(dataset=None, num=1):
  if dataset:
    for image, mask in dataset.take(num):
      pred_mask = model.predict(image)
      display([image[0], mask[0], create_mask(pred_mask)])
  else:
    display([train_sample_image, train_sample_mask,
             create_mask(model.predict(train_sample_image[tf.newaxis, ...]))])
    display([valid_sample_image, valid_sample_mask,
             create_mask(model.predict(valid_sample_image[tf.newaxis, ...]))])
    
class DisplayCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    #clear_output(wait=True)
    show_predictions()
    print ('\nSample Prediction after epoch {}\n'.format(epoch+1))

In [None]:
def get_callbacks(exp_dir):
  callbacks = []

  callbacks.append(DisplayCallback())

  ckpt_dir = os.path.join(exp_dir, 'ckpts')
  if not os.path.exists(ckpt_dir):
      os.makedirs(ckpt_dir)

  ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'),
                                                    monitor='val_loss',
                                                    mode='min', 
                                                    save_weights_only=False,
                                                    save_best_only=True,
                                                    verbose=0)  
  callbacks.append(ckpt_callback)

  # Early Stopping
  early_stop = True
  if early_stop:
      es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                    patience=patience, 
                                                    restore_best_weights=True)
      callbacks.append(es_callback)

  return callbacks

In [None]:
from datetime import datetime

exps_dir = '/content/drive/My Drive/challenge2/Teo/Models'
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)

In [None]:
from sklearn.model_selection import KFold

kfold = KFold(n_splits=k, random_state=SEED, shuffle=True)

loop_iteration = 0

loss_arr = []
meanIoU_arr = []

for train_index, val_index in kfold.split(X=data["images"], y=data["masks"]):
  training_data = data.iloc[train_index]
  validation_data = data.iloc[val_index]

  #creation of the couple of generator for images and masks from training
  train_img_data_generator = train_img_data_gen.flow_from_dataframe(training_data,
                                                                    x_col = "images",
                                                                    shuffle = True,
                                                                    class_mode = None,
                                                                    target_size=(img_h, img_w),
                                                                    batch_size=bs,
                                                                    interpolation="nearest",
                                                                    seed=SEED)
  train_mask_data_generator = train_mask_data_gen.flow_from_dataframe(training_data,
                                                                      x_col = "masks",
                                                                      shuffle = True,
                                                                      class_mode = None,
                                                                      target_size=(img_h, img_w),
                                                                      batch_size=bs,
                                                                      interpolation="nearest",
                                                                      seed=SEED)
  train_gen = zip(train_img_data_generator, train_mask_data_generator)

  #creation of the couple of generators for images and masks for validation  
  valid_img_data_generator = valid_img_data_gen.flow_from_dataframe(validation_data,
                                                       x_col = "images",
                                                       shuffle = True,
                                                       class_mode = None,
                                                       target_size=(img_h, img_w),
                                                       batch_size=bs,
                                                       interpolation="nearest",
                                                       seed = SEED)
  valid_mask_data_generator = valid_mask_data_gen.flow_from_dataframe(validation_data, 
                                                      x_col = "masks",
                                                      shuffle = True,
                                                      class_mode = None,
                                                      target_size=(img_h, img_w),
                                                      batch_size=bs,
                                                      interpolation="nearest",
                                                      seed = SEED)
  valid_gen = zip(valid_img_data_generator, valid_mask_data_generator)

  #######################

  train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, img_h, img_w, 3]))
  train_dataset = train_dataset.map(prepare_target)
  train_dataset = train_dataset.repeat()

  valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                                output_types=(tf.float32, tf.float32),
                                                output_shapes=([None, img_h, img_w, 3], [None, img_h, img_w, 3]))
  valid_dataset = valid_dataset.map(prepare_target)
  valid_dataset = valid_dataset.repeat()

  for imageBatch, maskBatch in train_dataset.take(1):
    train_sample_image, train_sample_mask = imageBatch[0], maskBatch[0]

  display([train_sample_image, train_sample_mask])

  for imageBatch, maskBatch in valid_dataset.take(1):
    valid_sample_image, valid_sample_mask = imageBatch[0], maskBatch[0] 

  #####################################################################################################################

  model = create_model(num_classes)
  model.summary()

  loss = tf.keras.losses.SparseCategoricalCrossentropy() 

  optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

  metrics = ['accuracy', meanIoU, cropIoU]

  model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

  callbacks = get_callbacks(exp_dir)

  history = model.fit(x=train_dataset,
              epochs=num_epochs,
              steps_per_epoch=len(train_img_data_generator),
              validation_data=valid_dataset,
              validation_steps=len(valid_img_data_generator), 
              callbacks=callbacks)
  
  minLoss = min(history.history['val_loss'])
  minLossIndex = history.history['val_loss'].index(minLoss)
  loss_arr.append(minLoss)
  meanIoU_arr.append(history.history['val_meanIoU'][minLossIndex])
  
  # print metrics to file
  with open(os.path.join(exp_dir, 'historySplit' + str(loop_iteration) + '.txt'), 'w') as f:
    for key in history.history.keys():
      print(str(key), file=f)
      print(history.history[key], file=f)

  if not enable_kfold:
    break
  
  loop_iteration += 1

with open(os.path.join(exp_dir, 'cv_results' + '.txt'), 'w') as f2:
  print("avg loss = {}".format(np.mean(loss_arr)), file=f2)
  print("avg meanIoU = {}".format(np.mean(meanIoU_arr)), file=f2)

In [None]:
test_images = []

base_folder = os.path.join(cwd, dataset_version, "Test_Dev")

for i in range(0,8):
  if bool_arr_test[i][0]:
    team = []
    crop = []
    names = []
    base_curr = os.path.join(base_folder, bool_arr_test[i][1], bool_arr_test[i][2])
    fn_images = [x for x in os.listdir(os.path.join(base_curr, "Images"))]
    fn_images.sort()
    for entry in fn_images:
      names.append(entry[:-4])

    for j in range(0, len(fn_images)):
        team.append(bool_arr_test[i][1])
        crop.append(bool_arr_test[i][2])
    for index, value in enumerate(fn_images):
      fn_images[index] = os.path.join(base_curr, "Images", value)

    zipped_list = list(zip(fn_images, team, crop, names))

    test_images += zipped_list

In [None]:
def rle_encode(img):
    '''
    img: numpy array, 1 - foreground, 0 - background
    Returns run length as string formatted
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
# tiling utils
tile_size = 512

def get_patches(img_arr, size=256, stride=256):

    patches_list = []
    i_max = img_arr.shape[0] // stride
    j_max = img_arr.shape[1] // stride

    for i in range(i_max):
        for j in range(j_max):
            patches_list.append(
                img_arr[
                    i * stride : i * stride + size,
                    j * stride : j * stride + size
                ]
            )

    return np.stack(patches_list)

def reconstruct_from_patches(img_arr, org_img_size, stride, size):

    if img_arr.ndim == 3:
        img_arr = np.expand_dims(img_arr, axis=0)

    if size is None:
        size = img_arr.shape[1]

    if stride is None:
        stride = size

    nm_layers = img_arr.shape[3]

    i_max = (org_img_size[0] // stride) + 1 - (size // stride)
    j_max = (org_img_size[1] // stride) + 1 - (size // stride)

    total_nm_images = img_arr.shape[0] // (j_max * i_max)
    nm_images = img_arr.shape[0]

    averaging_value = size // stride
    images_list = []
    kk = 0
    for img_count in range(total_nm_images):
        img_bg = np.zeros(
            (org_img_size[0], org_img_size[1], nm_layers), dtype=img_arr[0].dtype
        )

        for i in range(i_max):
            for j in range(j_max):
                for layer in range(nm_layers):
                    img_bg[
                        i * stride : i * stride + size,
                        j * stride : j * stride + size,
                        layer,
                    ] = img_arr[kk, :, :, layer]

                kk += 1

        images_list.append(img_bg)
    return np.stack(images_list)

In [None]:
# json generation

import json
from PIL import Image

submission_dict = {}

for entry in test_images:
  
  image = Image.open(entry[0])
  width, height = image.size

  # resize image and create crops
  image = image.resize(((width // tile_size)*tile_size, (height // tile_size)*tile_size))
  img_arr = np.array(image)
  image_crops = get_patches(img_arr, size=tile_size, stride=tile_size)

  # prediction on each tile stacking each result
  tile_mask_list = []
  for i in range(len(image_crops)):
    tile_arr = image_crops[i]
    tile_arr = tile_arr * 1. / 255
    
    out_sigmoid = model.predict(x=tf.expand_dims(tile_arr, 0))
    
    predicted_class = tf.argmax(out_sigmoid, -1)
    predicted_class = predicted_class[0, ...]

    tile_mask_list.append(np.array(tf.expand_dims(predicted_class, axis=-1)))
  
  mask_crops = np.stack(tile_mask_list)

  # reconstruct and resize
  mask_reconstructed = reconstruct_from_patches(mask_crops, org_img_size=(image.height, image.width), stride=tile_size, size=tile_size)
  
  disegno = np.zeros((image.height, image.width , 3))
  disegno[np.where(mask_reconstructed[0,...,0] == 1)] = [255, 255, 255]
  disegno[np.where(mask_reconstructed[0,...,0] == 0)] = [0,0,0]
  disegno[np.where(mask_reconstructed[0,...,0] == 2)] = [216, 67, 82]


  imm = Image.fromarray(np.uint8(disegno)).resize((width, height))
  mask_arr = np.array(imm)

  new_mask_arr = np.zeros(mask_arr.shape[:2], dtype=mask_arr.dtype)

  new_mask_arr[np.where(np.all(mask_arr == [255, 255, 255], axis=-1))] = 1
  new_mask_arr[np.where(np.all(mask_arr == [216, 67, 82], axis=-1))] = 2

  img_name = entry[3]

  submission_dict[img_name] = {}
  submission_dict[img_name]['shape'] = new_mask_arr.shape
  submission_dict[img_name]['team'] = entry[1]
  submission_dict[img_name]['crop'] = entry[2]
  submission_dict[img_name]['segmentation'] = {}

  # RLE encoding
  # crop
  rle_encoded_crop = rle_encode(new_mask_arr == 1)
  # weed
  rle_encoded_weed = rle_encode(new_mask_arr == 2)

  submission_dict[img_name]['segmentation']['crop'] = rle_encoded_crop
  submission_dict[img_name]['segmentation']['weed'] = rle_encoded_weed


# Finally, save the results into the submission.json file
with open(os.path.join(exp_dir, "submission.json"), 'w') as f:
  json.dump(submission_dict, f)