
# Homework 2 - Image Segmentation
The notebook is divided into several sections:

- Setup - Importing libraries, defining the rle_encode function, mounting Drive and unzipping the dataset in the proper Drive directory. Indeed, the notebook was created using the Drive integration with Colab, therefore the main directory is the folder /AN2DL/ImageSegmentation, which was created in advance with the dataset in it.
- Data preparation:
  - Organizing dataset folders - Masks are converted into integer labels. Additionally, the images and the masks of all the teams are put in a single folder, and the same is done for the test images. The resulting dataset structure is explained at the beginning of the section.
  - Creating the generators and the custom dataset class - The generators for data augmentation are created, and the CustomDataset class is defined to be used later on in the models.
- Models:
  - First Model (F-CNN)
  - Second Model (U-NET)
  - Third Model (VGG-16)

  In each model section, the dataset objects are created, the architecture is defined, the optimization parameters are set, the callbacks are created, the model is trained and finally the predictions on the test set are computed, exporting the results in a csv format.

# Setup

In [None]:
# Importing the necessary libraries and setting the seed(s) to make the code replicable
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os
import tensorflow as tf
import numpy as np
from PIL import Image
import shutil
import json
from datetime import datetime

SEED = 1234
tf.random.set_seed(SEED)
np.random.seed(SEED)

In [None]:
# Defining the rle_encode function, which is used to correctly format the output submission.json file in the prediction phase.
def rle_encode(img):
    '''
    img: numpy array, 1 - foreground, 0 - background
    Returns run length as string formatted
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
# Mounting Drive to Colab, as the Drive folder /AN2DL/ImageSegmentation is the main directory for this homework
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Unzipping the dataset (named "Development_Dataset.zip"), which has to be previously put in the homework directory
!unzip '/content/drive/My Drive/AN2DL/ImageSegmentation/Development_Dataset.zip'

# Saving the directories for the dataset, the training set and the test set (to be used later)
cwd = os.getcwd()                                                               # This is the current working directory, in which the dataset has been unzipped
dataset_dir = os.path.join(cwd, 'Development_Dataset')                          # This is the dataset directory, which contains the training and the test folders, along with the json
training_dir = os.path.join(dataset_dir, 'Training')                            # This is the training directory, which contains the training samples
test_dir = os.path.join(dataset_dir, 'Test_Dev')                                # This is the test directory, which contains the test samples

# Data preparation

## Organizing dataset folders
Here the masks images are converted into integer labels. Additionally, the dataset folder structure is reorganized as follows:
  - Training
    - Images
    - Masks
  - Test_Dev
    - Images

In [None]:
# Defining the read_rgb_mask function, which will be used to convert the RGB masks into integer labels
def read_rgb_mask(img_path):
    '''
    img_path: path to the mask file
    Returns the numpy array containing target values
    '''

    mask_img = Image.open(img_path)
    mask_arr = np.array(mask_img)

    new_mask_arr = np.zeros(mask_arr.shape[:2], dtype=mask_arr.dtype)
  
    # Use RGB dictionary in 'RGBtoTarget.txt' to convert RGB to target
    new_mask_arr[np.where(np.all(mask_arr == [0, 0, 0], axis=-1))] = 0
    new_mask_arr[np.where(np.all(mask_arr == [254, 124, 18], axis=-1))] = 0
    new_mask_arr[np.where(np.all(mask_arr == [255, 255, 255], axis=-1))] = 1
    new_mask_arr[np.where(np.all(mask_arr == [216, 67, 82], axis=-1))] = 2

    return new_mask_arr

In [None]:
# Reading the RGB masks from the different teams and transforming them into integer labels
# The resulting labels are saved into the Training/Masks folder.
target_dir = os.path.join(cwd, training_dir, 'Masks')
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

for f in os.listdir(os.path.join(training_dir, 'Bipbip/Mais/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Bipbip/Mais/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))

for f in os.listdir(os.path.join(training_dir, 'Bipbip/Haricot/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Bipbip/Haricot/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))

for f in os.listdir(os.path.join(training_dir, 'Pead/Mais/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Pead/Mais/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))

for f in os.listdir(os.path.join(training_dir, 'Pead/Haricot/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Pead/Haricot/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))

for f in os.listdir(os.path.join(training_dir, 'Roseau/Mais/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Roseau/Mais/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))

for f in os.listdir(os.path.join(training_dir, 'Roseau/Haricot/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Roseau/Haricot/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))

for f in os.listdir(os.path.join(training_dir, 'Weedelec/Mais/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Weedelec/Mais/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))     

for f in os.listdir(os.path.join(training_dir, 'Weedelec/Haricot/Masks')):
  mask = read_rgb_mask(os.path.join(training_dir, 'Weedelec/Haricot/Masks', f))
  mask_img = Image.fromarray(mask, 'P')
  mask_img.save(os.path.join(target_dir, f))       

In [None]:
# Moving the training images from the different teams into the new folder Training/Images
target_dir = os.path.join(training_dir, 'Images')
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

source_dir = os.path.join(training_dir, 'Bipbip/Mais/Images')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)

source_dir = os.path.join(training_dir, 'Bipbip/Haricot/Images')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)

source_dir = os.path.join(training_dir, 'Pead/Mais/Images')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)

source_dir = os.path.join(training_dir, 'Pead/Haricot/Images')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)  

source_dir = os.path.join(training_dir, 'Roseau/Mais/Images')
for file_name in os.listdir(source_dir):
    img = Image.open(os.path.join(training_dir, 'Roseau/Mais/Images', file_name))
    img.save(os.path.join(target_dir, file_name[:-4] + ".jpg"), "JPEG")

source_dir = os.path.join(training_dir, 'Roseau/Haricot/Images')
for file_name in os.listdir(source_dir):
    img = Image.open(os.path.join(training_dir, 'Roseau/Haricot/Images', file_name))
    img.save(os.path.join(target_dir, file_name[:-4] + ".jpg"), "JPEG")

source_dir = os.path.join(training_dir, 'Weedelec/Mais/Images')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)

source_dir = os.path.join(training_dir, 'Weedelec/Haricot/Images')
for file_name in os.listdir(source_dir):
    shutil.move(os.path.join(source_dir, file_name), target_dir)         

In [None]:
# Deleting the now useless folder, since images and masks have been moved into the new folder.
# This is not mandatory, but it is done just for clarity since there are many folders with the same names along the dataset.

# Deleting the now useless folder Training/Bipbip
shutil.rmtree(os.path.join(training_dir, 'Bipbip'), ignore_errors=True)
# Deleting the now useless folder Training/Pead
shutil.rmtree(os.path.join(training_dir, 'Pead'), ignore_errors=True)
# Deleting the now useless folder Training/Roseau
shutil.rmtree(os.path.join(training_dir, 'Roseau'), ignore_errors=True)
# Deleting the now useless folder Training/Weedelec
shutil.rmtree(os.path.join(training_dir, 'Weedelec'), ignore_errors=True)

In [None]:
# Creating the text files for the training-validation split
train_file = open("/content/Development_Dataset/train.txt", "a")
val_file = open("/content/Development_Dataset/val.txt", "a")

import random
random.seed(1234)
file_names = os.listdir('/content/Development_Dataset/Training/Images')
random.shuffle(file_names)

for i in range(0, len(file_names)):
  if(i == ((0.8*len(file_names))-1) or i == (len(file_names)-1)):
    file_to_write = str(file_names[i][:-4])
  else:
    file_to_write = str(file_names[i][:-4] + "\n")
  if(i < 0.8*len(file_names)):
    train_file.write(file_to_write)
  else:
    val_file.write(file_to_write)

train_file.close()
val_file.close()

In [None]:
# Moving the Test_Dev images of every team into a single folder, which is Test_Dev/Images.
# This is done because the submission.json file must be filled with the prediction over all the Test_Dev images.
# Hence, for simplicity, all the images are moved into that single folder.
target_dir = os.path.join(test_dir, 'Images')
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

source_dir_1 = os.path.join(test_dir, 'Bipbip/Mais/Images')
for file_name in os.listdir(source_dir_1):
    shutil.move(os.path.join(source_dir_1, file_name), target_dir)

source_dir_2 = os.path.join(test_dir, 'Bipbip/Haricot/Images')
for file_name in os.listdir(source_dir_2):
    shutil.move(os.path.join(source_dir_2, file_name), target_dir)

source_dir_3 = os.path.join(test_dir, 'Pead/Mais/Images')
for file_name in os.listdir(source_dir_3):
    shutil.move(os.path.join(source_dir_3, file_name), target_dir)

source_dir_4 = os.path.join(test_dir, 'Pead/Haricot/Images')
for file_name in os.listdir(source_dir_4):
    shutil.move(os.path.join(source_dir_4, file_name), target_dir)

source_dir_5 = os.path.join(test_dir, 'Roseau/Mais/Images')    
for file_name in os.listdir(source_dir_5):
    img = Image.open(os.path.join(source_dir_5, file_name))
    img.save(os.path.join(target_dir, file_name[:-4] + ".jpg"), "JPEG")

source_dir_6 = os.path.join(test_dir, 'Roseau/Haricot/Images')
for file_name in os.listdir(source_dir_6):
    img = Image.open(os.path.join(source_dir_6, file_name))
    img.save(os.path.join(target_dir, file_name[:-4] + ".jpg"), "JPEG")

source_dir_7 = os.path.join(test_dir, 'Weedelec/Mais/Images')    
for file_name in os.listdir(source_dir_7):
    shutil.move(os.path.join(source_dir_7, file_name), target_dir)

source_dir_8 = os.path.join(test_dir, 'Weedelec/Haricot/Images')
for file_name in os.listdir(source_dir_8):
    shutil.move(os.path.join(source_dir_8, file_name), target_dir)

In [None]:
# Deleting the now useless folder, since images and masks have been moved into the new folder.
# This is not mandatory, but it is done just for clarity since there are many folders with the same names along the dataset.

# Deleting the now useless folder Test_Dev/Bipbip
shutil.rmtree(os.path.join(test_dir, 'Bipbip'), ignore_errors=True)
# Deleting the now useless folder Test_Dev/Pead
shutil.rmtree(os.path.join(test_dir, 'Pead'), ignore_errors=True)
# Deleting the now useless folder Test_Dev/Roseau
shutil.rmtree(os.path.join(test_dir, 'Roseau'), ignore_errors=True)
# Deleting the now useless folder Test_Dev/Weedelec
shutil.rmtree(os.path.join(test_dir, 'Weedelec'), ignore_errors=True)

## Creating the generators and the custom dataset class

In [None]:
# Creating the ImageDataGenerator objects - one for images and one for masks - performing data augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_data_gen = ImageDataGenerator(rotation_range=10,
                                  width_shift_range=10,
                                  height_shift_range=10,
                                  zoom_range=0.3,
                                  horizontal_flip=True,
                                  vertical_flip=True,
                                  fill_mode='reflect')
mask_data_gen = ImageDataGenerator(rotation_range=10,
                                   width_shift_range=10,
                                   height_shift_range=10,
                                   zoom_range=0.3,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='reflect')

In [None]:
# Defining the CustomDataset class, which will be used inside each model section to create the training and validation datasets
# This implementation is taken from the lab notebook, adjusting some details
class CustomDataset(tf.keras.utils.Sequence):

  def __init__(self, dataset_dir, which_subset, img_generator=None, mask_generator=None, 
               preprocessing_function=None, out_shape=None):
    if which_subset == 'training':
      subset_file = os.path.join(dataset_dir, 'train.txt')
    elif which_subset == 'validation':
      subset_file = os.path.join(dataset_dir, 'val.txt')
    
    with open(subset_file, 'r') as f:
      lines = f.readlines()
    
    subset_filenames = []
    for line in lines:
      subset_filenames.append(line.strip()) 

    self.which_subset = which_subset
    self.dataset_dir = dataset_dir
    self.subset_filenames = subset_filenames
    self.img_generator = img_generator
    self.mask_generator = mask_generator
    self.preprocessing_function = preprocessing_function
    self.out_shape = out_shape

  def __len__(self):
    return len(self.subset_filenames)

  def __getitem__(self, index):
    curr_filename = self.subset_filenames[index]
    img = Image.open(os.path.join(self.dataset_dir, 'Training/Images', curr_filename + '.jpg'))
    mask = Image.open(os.path.join(self.dataset_dir, 'Training/Masks', curr_filename + '.png'))

    img = img.resize(self.out_shape)
    mask = mask.resize(self.out_shape, resample=Image.NEAREST)
    
    img_arr = np.array(img)
    mask_arr = np.array(mask)
    mask_arr = np.expand_dims(mask_arr, -1)

    if self.which_subset == 'training':
      if self.img_generator is not None and self.mask_generator is not None:
        img_t = self.img_generator.get_random_transform(img_arr.shape, seed=SEED)
        mask_t = self.mask_generator.get_random_transform(mask_arr.shape, seed=SEED)
        img_arr = self.img_generator.apply_transform(img_arr, img_t)

        out_mask = np.zeros_like(mask_arr)
        for c in np.unique(mask_arr):
          if c > 0:
            curr_class_arr = np.float32(mask_arr == c)
            curr_class_arr = self.mask_generator.apply_transform(curr_class_arr, mask_t)
            curr_class_arr = np.uint8(curr_class_arr)
            curr_class_arr = curr_class_arr * c 
            out_mask += curr_class_arr
    else:
      out_mask = mask_arr
    
    if self.preprocessing_function is not None:
        img_arr = self.preprocessing_function(img_arr)

    return img_arr, np.float32(out_mask)

# F-CNN

In [None]:
# Creating the CustomDataset objects for the model, followed by the actual dataset that will be used for segmentation

# Image dimensions to work with
img_h = 512
img_w = 512

# Batch size
bs = 4

# Training
dataset = CustomDataset('/content/Development_Dataset', 'training', 
                        img_generator=img_data_gen, mask_generator=mask_data_gen, out_shape=[img_h, img_w])

train_dataset = tf.data.Dataset.from_generator(lambda: dataset,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([img_h, img_w, 3], [img_h, img_w, 1]))

train_dataset = train_dataset.batch(bs)                                                                 
train_dataset = train_dataset.repeat()

# Validation
dataset_valid = CustomDataset('/content/Development_Dataset', 'validation', out_shape=[img_h, img_w])
valid_dataset = tf.data.Dataset.from_generator(lambda: dataset_valid,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([img_h, img_w, 3], [img_h, img_w, 1]))

valid_dataset = valid_dataset.batch(bs)

In [None]:
# Defining the model structure
# This is taken by the lab notebook: there is a simple encoding + decoding structure
# The encoder is composed by a convolutional a ReLU, and a maxpooling layers for each depth level
# The intermediate bottleneck is composed only by a convolution and a ReLU, and the result is given to the decoder
# The decoder is composed by an upsampling, a convolutional and a ReLU layers for each depth level
# Finally, the prediction is done using 3 filters (as the number of classes) and a SoftMax activation function
def create_model(depth, start_f, num_classes, dynamic_input_shape):

    model = tf.keras.Sequential()
    
    # Encoder
    for i in range(depth):
        
        if i == 0:
            if dynamic_input_shape:
                input_shape = [None, None, 3]
            else:
                input_shape = [img_h, img_w, 3]
        else:
            input_shape=[None]
        
        model.add(tf.keras.layers.Conv2D(filters=start_f, 
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same',
                                         input_shape=input_shape))
        model.add(tf.keras.layers.ReLU())
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2)))

        start_f *= 2

    # Bottleneck
    model.add(tf.keras.layers.Conv2D(filters=start_f, kernel_size=(3, 3), strides=(1, 1), padding='same'))
    model.add(tf.keras.layers.ReLU())
    
    start_f = start_f // 2
        
    # Decoder
    for i in range(depth):
        model.add(tf.keras.layers.UpSampling2D(2, interpolation='bilinear'))
        model.add(tf.keras.layers.Conv2D(filters=start_f,
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same'))
        model.add(tf.keras.layers.ReLU())

        start_f = start_f // 2

    # Prediction Layer
    model.add(tf.keras.layers.Conv2D(filters=num_classes,
                                     kernel_size=(1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     activation='softmax'))
    
    return model

In [None]:
# Creating the model and visualizing a summary of it
model = create_model(depth=5, start_f=8, num_classes=3, dynamic_input_shape=False)

model.summary()

In [None]:
# Optimization parameters
# The custom validation metric is the mean intersection over union defined in the lab notebook

# Loss function
loss = tf.keras.losses.SparseCategoricalCrossentropy()

# Learning rate and optimizer
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Validation metrics
def meanIoU(y_true, y_pred):
    y_pred = tf.expand_dims(tf.argmax(y_pred, -1), -1)

    per_class_iou = []

    for i in range(1,3): # Excluding the background class 0
      class_pred = tf.cast(tf.where(y_pred == i, 1, 0), tf.float32)
      class_true = tf.cast(tf.where(y_true == i, 1, 0), tf.float32)
      intersection = tf.reduce_sum(class_true * class_pred)
      union = tf.reduce_sum(class_true) + tf.reduce_sum(class_pred) - intersection
    
      iou = (intersection + 1e-7) / (union + 1e-7)
      per_class_iou.append(iou)

    return tf.reduce_mean(per_class_iou)
    
metrics = ['accuracy', meanIoU]

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
# Setting up the callbacks and Early Stopping
# The purpose of this piece of code is to create a "multiclass_segmentationexperiments" folder inside the directory of this homework (if not already created).
# Inside it, it creates a folder called "F-CNN_" followed by the date and the time of execution, to recognize the experiment.
# Then, it sets up the callback for the training of the model, saving the model weights after each epoch inside the previously mentioned folder, only if the model improved in meanIoU on the Validation set.
# Moreover, Ealy Stopping is also inserted in the callback, to monitor the loss on the Validation set and to stop the training procedure if it becomes worse for "patience" steps.
# Finally, the model is fitted using the training and validation data defined before.

# Creating the "multiclass_segmentation_experiments" folder if not already created
exps_dir = os.path.join(cwd, 'drive/My Drive/AN2DL/ImageSegmentation/', 'multiclass_segmentation_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

# Creating the folder in which the model weights will be saved
model_name = 'F-CNN'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)

# Setting up the callback to save the model weights after each epoch only if there is an improvement in term of validation meanIoU    
callbacks = []

ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(ckpt_dir, 
                                                   monitor='val_meanIoU',
                                                   mode='max',
                                                   verbose=0,
                                                   save_best_only=True,
                                                   save_weights_only=True)
callbacks.append(ckpt_callback)

# Early Stopping is inserted in the callback, stopping the training procedure if the validation loss increases for too long
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8)
    callbacks.append(es_callback)


# Fitting the model
# It can go on up to 100 epochs, but the Early Stopping callback explained before allows to stop much earlier.
model.fit(x=train_dataset,
          epochs=100,
          steps_per_epoch=(len(dataset) // bs),
          validation_data=valid_dataset,
          validation_steps=(len(dataset_valid) // bs), 
          callbacks=callbacks)

In [None]:
# Loading the best weights of the trained model
full_path = os.path.join('/content/drive/My Drive/AN2DL/ImageSegmentation/multiclass_segmentation_experiments', exp_dir)
latest = tf.train.latest_checkpoint(full_path)
model.load_weights(latest)

In [None]:
# Checking how the model predictions on the validation set
import time
from matplotlib import cm
import matplotlib.pyplot as plt
%matplotlib inline

# Assigning a color to each class
evenly_spaced_interval = np.linspace(0, 1, 20)
colors = [cm.rainbow(x) for x in evenly_spaced_interval]

iterator = iter(valid_dataset)

In [None]:
# Visualizing an image in the validation set, its mask and its model prediction
fig, ax = plt.subplots(1, 3, figsize=(8, 8))
fig.show()
image, target = next(iterator)

image = image[0]
target = target[0, ..., 0]

out_sigmoid = model.predict(x=tf.expand_dims(image, 0))
predicted_class = tf.argmax(out_sigmoid, -1)

predicted_class = predicted_class[0, ...]

target_img = np.zeros([target.shape[0], target.shape[1], 3])
prediction_img = np.zeros([target.shape[0], target.shape[1], 3])

target_img[np.where(target == 0)] = [0, 0, 0]
for i in range(1, 3):
  target_img[np.where(target == i)] = np.array(colors[i-1])[:3] * 255

prediction_img[np.where(predicted_class == 0)] = [0, 0, 0]
for i in range(1, 3):
  prediction_img[np.where(predicted_class == i)] = np.array(colors[i-1])[:3] * 255

ax[0].imshow(np.uint8(image))
ax[1].imshow(np.uint8(target_img))
ax[2].imshow(np.uint8(prediction_img))

fig.canvas.draw()
time.sleep(1)

In [None]:
# Computing the prediction on the Test_Dev images and generating the submission.json file
submission_dict = {}

# Storing the names of the images
test_img_dir = os.path.join(test_dir, 'Images')
file_names_img = os.listdir(test_img_dir)

# For each image, we resize it to the dimensions on which the model has been trained
# We give it as input to the model and then we resize the model output to match the original image size (according to the team it belongs)
# Finally, we compute the argmax to get the predicted class for each pixel and we fill the submission_dict according to the requested structure and encoding
for i in file_names_img:
  image = Image.open(os.path.join(test_img_dir, i)).convert('RGB')
  image = image.resize((img_h, img_w))
  img_array = np.array(image)
  out_softmax = model.predict(tf.expand_dims(img_array, 0))

  if (i.split('_')[0] == 'Bipbip'):
    resized_out_softmax = tf.image.resize(out_softmax, (1536, 2048), method='nearest')
  elif (i.split('_')[0] == 'Pead'):
    resized_out_softmax = tf.image.resize(out_softmax, (2464, 3280), method='nearest')
  elif (i.split('_')[0] == 'Weedelec'):
    resized_out_softmax = tf.image.resize(out_softmax, (3456, 5184), method='nearest')
  elif (i.split('_')[0] == 'Roseau'):
    resized_out_softmax = tf.image.resize(out_softmax, (820, 1225), method='nearest')
  
  predicted_class = tf.argmax(resized_out_softmax, -1)
  predicted_class = np.array(predicted_class)

  submission_dict[i[:-4]] = {}
  submission_dict[i[:-4]]['shape'] = predicted_class.shape
  submission_dict[i[:-4]]['team'] = i.split('_')[0]
  submission_dict[i[:-4]]['crop'] = i.split('_')[1][0].upper() + i.split('_')[1][1:]
  submission_dict[i[:-4]]['segmentation'] = {}

  rle_encoded_crop = rle_encode(predicted_class == 1)
  rle_encoded_weed = rle_encode(predicted_class == 2)

  submission_dict[i[:-4]]['segmentation']['crop'] = rle_encoded_crop
  submission_dict[i[:-4]]['segmentation']['weed'] = rle_encoded_weed

# Exporting the submission_dict created into a submission.json file
with open('/content/drive/My Drive/AN2DL/ImageSegmentation/submission.json', 'w') as f:
    json.dump(submission_dict, f) 

# U-NET

In [None]:
# Creating the CustomDataset objects for the model, followed by the actual dataset that will be used for segmentation
# Note that the images are preprocessed using the VGG-16 preprocess_input function
from tensorflow.keras.applications.vgg16 import preprocess_input

# Image dimensions to work with
img_h = 768
img_w = 768

# Batch size
bs = 4

# Training
dataset = CustomDataset('/content/Development_Dataset', 'training', 
                        img_generator=img_data_gen, mask_generator=mask_data_gen,
                        preprocessing_function=preprocess_input, out_shape=[img_h, img_w])

train_dataset = tf.data.Dataset.from_generator(lambda: dataset,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([img_h, img_w, 3], [img_h, img_w, 1]))

train_dataset = train_dataset.batch(bs)                                                                 
train_dataset = train_dataset.repeat()

# Validation
dataset_valid = CustomDataset('/content/Development_Dataset', 'validation',
                              preprocessing_function=preprocess_input, out_shape=[img_h, img_w])
valid_dataset = tf.data.Dataset.from_generator(lambda: dataset_valid,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([img_h, img_w, 3], [img_h, img_w, 1]))

valid_dataset = valid_dataset.batch(bs)

In [None]:
# Defining the model structure
# This is the U-Net Xception-style model - Taken from https://keras.io/examples/vision/oxford_pets_image_segmentation/
from tensorflow.keras import layers

def get_model(img_size, num_classes):
    inputs = tf.keras.Input(shape=img_size + (3,))

    ### [First half of the network: downsampling inputs] ###

    # Entry block
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    ### [Second half of the network: upsampling inputs] ###

    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.UpSampling2D(2)(x)

        # Project residual
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer
    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

    # Define the model
    model = tf.keras.Model(inputs, outputs)
    return model

# Free up RAM in case the model definition cells were run multiple times
tf.keras.backend.clear_session()

In [None]:
# Creating the model and visualizing a summary of it
model_2 = get_model((img_h, img_w), 3)

model_2.summary()

In [None]:
# Optimization parameters
# The custom validation metric is the mean intersection over union defined in the lab notebook

# Loss function
loss = tf.keras.losses.SparseCategoricalCrossentropy()

# Learning rate and optimizer
lr = 1e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Validation metrics
def meanIoU(y_true, y_pred):
    y_pred = tf.expand_dims(tf.argmax(y_pred, -1), -1)

    per_class_iou = []

    for i in range(1,3): # Excluding the background class 0
      class_pred = tf.cast(tf.where(y_pred == i, 1, 0), tf.float32)
      class_true = tf.cast(tf.where(y_true == i, 1, 0), tf.float32)
      intersection = tf.reduce_sum(class_true * class_pred)
      union = tf.reduce_sum(class_true) + tf.reduce_sum(class_pred) - intersection
    
      iou = (intersection + 1e-7) / (union + 1e-7)
      per_class_iou.append(iou)

    return tf.reduce_mean(per_class_iou)

metrics = ['accuracy', meanIoU]

# Compile Model
model_2.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
# Setting up the callbacks and Early Stopping
# The purpose of this piece of code is to create a "multiclass_segmentationexperiments" folder inside the directory of this homework (if not already created).
# Inside it, it creates a folder called "U-NET_" followed by the date and the time of execution, to recognize the experiment.
# Then, it sets up the callback for the training of the model, saving the model weights after each epoch inside the previously mentioned folder, only if the model improved in meanIoU on the Validation set.
# Moreover, Ealy Stopping is also inserted in the callback, to monitor the loss on the Validation set and to stop the training procedure if it becomes worse for "patience" steps.
# Finally, the model is fitted using the training and validation data defined before.

# Creating the "multiclass_segmentation_experiments" folder if not already created
exps_dir = os.path.join(cwd, 'drive/My Drive/AN2DL/ImageSegmentation/', 'multiclass_segmentation_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

# Creating the folder in which the model weights will be saved
model_name = 'U_NET'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)

# Setting up the callback to save the model weights after each epoch only if there is an improvement in term of validation meanIoU  
callbacks_2 = []

ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(ckpt_dir, 
                                                   monitor='val_meanIoU',
                                                   mode='max',
                                                   verbose=0,
                                                   save_best_only=True,
                                                   save_weights_only=True)
callbacks_2.append(ckpt_callback)

# Early Stopping is inserted in the callback, stopping the training procedure if the validation loss increases for too long
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8)
    callbacks_2.append(es_callback)

# Fitting the model
# It can go on up to 100 epochs, but the Early Stopping callback explained before allows to stop much earlier
model_2.fit(x=train_dataset,
            epochs=100,
            steps_per_epoch=(len(dataset) // bs),
            validation_data=valid_dataset,
            validation_steps=(len(dataset_valid) // bs), 
            callbacks=callbacks_2)

In [None]:
# Loading the best weights of the trained model
full_path = os.path.join('/content/drive/My Drive/AN2DL/ImageSegmentation/multiclass_segmentation_experiments', exp_dir)
latest = tf.train.latest_checkpoint(full_path)
model_2.load_weights(latest)

In [None]:
# Checking how the model predictions on the validation set
import time
from matplotlib import cm
import matplotlib.pyplot as plt
%matplotlib inline

# Assigning a color to each class
evenly_spaced_interval = np.linspace(0, 1, 20)
colors = [cm.rainbow(x) for x in evenly_spaced_interval]

iterator = iter(valid_dataset)

In [None]:
# Visualizing an image in the validation set, its mask and its model prediction
fig, ax = plt.subplots(1, 3, figsize=(8, 8))
fig.show()
image, target = next(iterator)

image = image[0]
target = target[0, ..., 0]

out_sigmoid = model_2.predict(x=tf.expand_dims(image, 0))
predicted_class = tf.argmax(out_sigmoid, -1)

predicted_class = predicted_class[0, ...]

target_img = np.zeros([target.shape[0], target.shape[1], 3])
prediction_img = np.zeros([target.shape[0], target.shape[1], 3])

target_img[np.where(target == 0)] = [0, 0, 0]
for i in range(1, 3):
  target_img[np.where(target == i)] = np.array(colors[i-1])[:3] * 255

prediction_img[np.where(predicted_class == 0)] = [0, 0, 0]
for i in range(1, 3):
  prediction_img[np.where(predicted_class == i)] = np.array(colors[i-1])[:3] * 255

ax[0].imshow(np.uint8(image))
ax[1].imshow(np.uint8(target_img))
ax[2].imshow(np.uint8(prediction_img))

fig.canvas.draw()
time.sleep(1)

In [None]:
# Computing the prediction on the Test_Dev images and generating the submission.json file
submission_dict = {}

# Storing the names of the images
test_img_dir = os.path.join(test_dir, 'Images')
file_names_img = os.listdir(test_img_dir)

# For each image, we resize it to the dimensions on which the model has been trained and we apply the preprocess_input function on it
# We give it as input to the model and then we resize the model output to match the original image size (according to the team it belongs)
# Finally, we compute the argmax to get the predicted class for each pixel and we fill the submission_dict according to the requested structure and encoding
for i in file_names_img:
  image = Image.open(os.path.join(test_img_dir, i)).convert('RGB')
  image = image.resize((img_h, img_w))
  img_array = np.array(image)
  img_array = preprocess_input(img_array)
  out_softmax = model_2.predict(tf.expand_dims(img_array, 0))

  if (i.split('_')[0] == 'Bipbip'):
    resized_out_softmax = tf.image.resize(out_softmax, (1536, 2048), method='nearest')
  elif (i.split('_')[0] == 'Pead'):
    resized_out_softmax = tf.image.resize(out_softmax, (2464, 3280), method='nearest')
  elif (i.split('_')[0] == 'Weedelec'):
    resized_out_softmax = tf.image.resize(out_softmax, (3456, 5184), method='nearest')
  elif (i.split('_')[0] == 'Roseau'):
    resized_out_softmax = tf.image.resize(out_softmax, (820, 1225), method='nearest')
 
  predicted_class = tf.argmax(resized_out_softmax, -1)
  predicted_class = np.array(predicted_class)

  submission_dict[i[:-4]] = {}
  submission_dict[i[:-4]]['shape'] = predicted_class.shape
  submission_dict[i[:-4]]['team'] = i.split('_')[0]
  submission_dict[i[:-4]]['crop'] = i.split('_')[1][0].upper() + i.split('_')[1][1:]
  submission_dict[i[:-4]]['segmentation'] = {}

  rle_encoded_crop = rle_encode(predicted_class == 1)
  rle_encoded_weed = rle_encode(predicted_class == 2)

  submission_dict[i[:-4]]['segmentation']['crop'] = rle_encoded_crop
  submission_dict[i[:-4]]['segmentation']['weed'] = rle_encoded_weed

# Exporting the submission_dict created into a submission.json file
with open('/content/drive/My Drive/AN2DL/ImageSegmentation/submission_2.json', 'w') as f:
    json.dump(submission_dict, f) 

# VGG-16

In [None]:
# Creating the CustomDataset objects for the model, followed by the actual dataset that will be used for segmentation
# The images are preprocessed using the VGG-16 preprocess_input function
from tensorflow.keras.applications.vgg16 import preprocess_input

# Image dimensions to work with
img_h = 2048
img_w = 2048

# Batch size
bs = 4

# Training
dataset = CustomDataset('/content/Development_Dataset', 'training', 
                        img_generator=img_data_gen, mask_generator=mask_data_gen,
                        preprocessing_function=preprocess_input, out_shape=[img_h, img_w])

train_dataset = tf.data.Dataset.from_generator(lambda: dataset,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([img_h, img_w, 3], [img_h, img_w, 1]))

train_dataset = train_dataset.batch(bs)                      

# Validation
dataset_valid = CustomDataset('/content/Development_Dataset', 'validation',
                              preprocessing_function=preprocess_input, out_shape=[img_h, img_w])
valid_dataset = tf.data.Dataset.from_generator(lambda: dataset_valid,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([img_h, img_w, 3], [img_h, img_w, 1]))

valid_dataset = valid_dataset.batch(bs)

In [None]:
# Importing the pre-trained VGG-16 network excluding the top part
vgg = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(img_h, img_w, 3))

# Setting the Fine-Tuning parameter
finetuning = True

if finetuning:
    freeze_until = 13
    for layer in vgg.layers[:freeze_until]:
        layer.trainable = False
else:
    vgg.trainable = False

In [None]:
# Defining the model structure
# The encoder is composed by the imported VGG-16, while the decoder part is taken from the lab notebook
# It is composed by an upsampling, a convolutional and a ReLU layers for each depth level
# Finally, the prediction is done using 3 filters (as the number of classes) and a SoftMax activation function
def create_model(depth, start_f, num_classes):

    model = tf.keras.Sequential()
    
    # Encoder
    model.add(vgg)
    
    start_f = 256
        
    # Decoder
    for i in range(depth):
        model.add(tf.keras.layers.UpSampling2D(2, interpolation='bilinear'))
        model.add(tf.keras.layers.Conv2D(filters=start_f,
                                         kernel_size=(3, 3),
                                         strides=(1, 1),
                                         padding='same'))
        model.add(tf.keras.layers.ReLU())

        start_f = start_f // 2

    # Prediction Layer
    model.add(tf.keras.layers.Conv2D(filters=num_classes,
                                     kernel_size=(1, 1),
                                     strides=(1, 1),
                                     padding='same',
                                     activation='softmax'))
    
    return model

In [None]:
# Creating the model and visualizing a summary of it
model_3 = create_model(depth=5, start_f=8, num_classes=3)

# Visualize created model as a table
model_3.summary()

In [None]:
# Optimization parameters

# Loss function
loss = tf.keras.losses.SparseCategoricalCrossentropy()

# Learning rate and optimizer
lr = 1e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Validation metrics
def meanIoU(y_true, y_pred):
    y_pred = tf.expand_dims(tf.argmax(y_pred, -1), -1)

    per_class_iou = []

    for i in range(1,3): # Excluding the background class 0
      class_pred = tf.cast(tf.where(y_pred == i, 1, 0), tf.float32)
      class_true = tf.cast(tf.where(y_true == i, 1, 0), tf.float32)
      intersection = tf.reduce_sum(class_true * class_pred)
      union = tf.reduce_sum(class_true) + tf.reduce_sum(class_pred) - intersection
    
      iou = (intersection + 1e-7) / (union + 1e-7)
      per_class_iou.append(iou)

    return tf.reduce_mean(per_class_iou)

metrics = ['accuracy', meanIoU]

# Compile Model
model_3.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
# Setting up the callbacks and Early Stopping
# The purpose of this piece of code is to create a "multiclass_segmentationexperiments" folder inside the directory of this homework (if not already created).
# Inside it, it creates a folder called "VGG-16_" followed by the date and the time of execution, to recognize the experiment.
# Then, it sets up the callback for the training of the model, saving the model weights after each epoch inside the previously mentioned folder, only if the model improved in meanIoU on the Validation set.
# Moreover, Ealy Stopping is also inserted in the callback, to monitor the loss on the Validation set and to stop the training procedure if it becomes worse for "patience" steps.
# Finally, the model is fitted using the training and validation data defined before.

# Creating the "multiclass_segmentation_experiments" folder if not already created
exps_dir = os.path.join(cwd, 'drive/My Drive/AN2DL/ImageSegmentation/', 'multiclass_segmentation_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

# Creating the folder in which the model weights will be saved
model_name = 'VGG-16'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)

# Setting up the callback to save the model weights after each epoch only if there is an improvement in term of validation meanIoU 
callbacks_3 = []

ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(ckpt_dir, 
                                                   monitor='val_meanIoU',
                                                   mode='max',
                                                   verbose=0,
                                                   save_best_only=True,
                                                   save_weights_only=True)
callbacks_3.append(ckpt_callback)

# Early Stopping is inserted in the callback, stopping the training procedure if the validation loss increases for too long
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=8)
    callbacks_3.append(es_callback)

# Fitting the model
# It can go on up to 100 epochs, but the Early Stopping callback explained before allows to stop much earlier
model_3.fit(x=train_dataset,
           epochs=100,
           steps_per_epoch=(len(dataset) // bs),
           validation_data=valid_dataset,
           validation_steps=(len(dataset_valid) // bs), 
           callbacks=callbacks_3)

In [None]:
# Loading the best weights of the trained model
full_path = os.path.join('/content/drive/My Drive/AN2DL/ImageSegmentation/multiclass_segmentation_experiments', exp_dir)
latest = tf.train.latest_checkpoint(full_path)
model_3.load_weights(latest)

In [None]:
# Checking how the model predictions on the validation set
import time
from matplotlib import cm
import matplotlib.pyplot as plt
%matplotlib inline

# Assigning a color to each class
evenly_spaced_interval = np.linspace(0, 1, 20)
colors = [cm.rainbow(x) for x in evenly_spaced_interval]

iterator = iter(valid_dataset)

In [None]:
# Visualizing an image in the validation set, its mask and its model prediction
fig, ax = plt.subplots(1, 3, figsize=(8, 8))
fig.show()
image, target = next(iterator)

image = image[0]
target = target[0, ..., 0]

out_sigmoid = model_3.predict(x=tf.expand_dims(image, 0))
predicted_class = tf.argmax(out_sigmoid, -1)

predicted_class = predicted_class[0, ...]

target_img = np.zeros([target.shape[0], target.shape[1], 3])
prediction_img = np.zeros([target.shape[0], target.shape[1], 3])

target_img[np.where(target == 0)] = [0, 0, 0]
for i in range(1, 3):
  target_img[np.where(target == i)] = np.array(colors[i-1])[:3] * 255

prediction_img[np.where(predicted_class == 0)] = [0, 0, 0]
for i in range(1, 3):
  prediction_img[np.where(predicted_class == i)] = np.array(colors[i-1])[:3] * 255

ax[0].imshow(np.uint8(image))
ax[1].imshow(np.uint8(target_img))
ax[2].imshow(np.uint8(prediction_img))

fig.canvas.draw()
time.sleep(1)

In [None]:
# Computing the prediction on the Test_Dev images and generating the submission.json file
submission_dict = {}

# Storing the names of the images
test_img_dir = os.path.join(test_dir, 'Images')
file_names_img = os.listdir(test_img_dir)

# For each image, we resize it to the dimensions on which the model has been trained and we apply the preprocess_input function on it
# We give it as input to the model and then we resize the model output to match the original image size (according to the team it belongs)
# Finally, we compute the argmax to get the predicted class for each pixel and we fill the submission_dict according to the requested structure and encoding
for i in file_names_img:
  image = Image.open(os.path.join(test_img_dir, i)).convert('RGB')
  image = image.resize((img_h, img_w))
  img_array = np.array(image)
  img_array = preprocess_input(img_array)
  out_softmax = model_3.predict(tf.expand_dims(img_array, 0))

  if (i.split('_')[0] == 'Bipbip'):
    resized_out_softmax = tf.image.resize(out_softmax, (1536, 2048), method='nearest')
  elif (i.split('_')[0] == 'Pead'):
    resized_out_softmax = tf.image.resize(out_softmax, (2464, 3280), method='nearest')
  elif (i.split('_')[0] == 'Weedelec'):
    resized_out_softmax = tf.image.resize(out_softmax, (3456, 5184), method='nearest')
  elif (i.split('_')[0] == 'Roseau'):
    resized_out_softmax = tf.image.resize(out_softmax, (820, 1225), method='nearest')
 
  predicted_class = tf.argmax(resized_out_softmax, -1)
  predicted_class = np.array(predicted_class)

  submission_dict[i[:-4]] = {}
  submission_dict[i[:-4]]['shape'] = predicted_class.shape
  submission_dict[i[:-4]]['team'] = i.split('_')[0]
  submission_dict[i[:-4]]['crop'] = i.split('_')[1][0].upper() + i.split('_')[1][1:]
  submission_dict[i[:-4]]['segmentation'] = {}

  rle_encoded_crop = rle_encode(predicted_class == 1)
  rle_encoded_weed = rle_encode(predicted_class == 2)

  submission_dict[i[:-4]]['segmentation']['crop'] = rle_encoded_crop
  submission_dict[i[:-4]]['segmentation']['weed'] = rle_encoded_weed

# Exporting the submission_dict created into a submission.json file
with open('/content/drive/My Drive/AN2DL/ImageSegmentation/submission_3.json', 'w') as f:
    json.dump(submission_dict, f) 