<a href="https://www.kaggle.com/taraprole/cityscapes-segmentation?scriptVersionId=88975331" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

!pip install keras-unet
from keras_unet.models import vanilla_unet


from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose
from tensorflow.keras import Model
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy, binary_crossentropy, SparseCategoricalCrossentropy

In [None]:
import os
import glob
from IPython.display import Image, display

input_dir = sorted(glob.glob('../input/cityscapes-dataset/leftImg8bit_trainvaltest/leftImg8bit/train/*/*.png', recursive=True)) #TRAINING
target_dir = sorted(glob.glob('../input/cityscapes-dataset/gtFine_trainvaltest/gtFine/train/**/*_gtFine_color.png', recursive=True)) #TRAINING ANSWERS


print("Number of samples:", len(input_dir))

imgSize = (1024, 2048)
classes = 30
batchSize = 2

cityscapesDataset = zip(input_dir, target_dir)
print("Length of dataset:", len(list(cityscapesDataset)))

# for input_path, target_path in zip(input_dir, target_dir):
#     print(input_path, "|", target_path)

In [None]:
from IPython.display import Image, display
from tensorflow.keras.preprocessing.image import load_img
import PIL
from PIL import ImageOps
import numpy
from numpy import ndarray
from numpy import asarray




def rgb_to_onehot(rgb_arr, color_dict):
    num_classes = len(color_dict)
    shape = rgb_arr.shape[:2]+(num_classes,)
    arr = np.zeros( shape, dtype=np.int8 )
    for i, cls in enumerate(color_dict):
        arr[:,:,i] = np.all(rgb_arr.reshape( (-1,3) ) == color_dict[i], axis=1).reshape(shape[:2])
    return arr

def onehot_to_rgb(onehot, color_dict):
    single_layer = np.argmax(onehot, axis=-1)
    output = np.zeros( onehot.shape[:2]+(3,) )
    for k in color_dict.keys():
        output[single_layer==k] = color_dict[k]
    return np.uint8(output)

# Display input image #7
display(Image(filename=input_dir[9]))

# Display auto-contrast version of corresponding target (per-pixel categories)
img = PIL.ImageOps.grayscale(load_img(target_dir[9]))
display(img)

imgAsArray = img_to_array(img)
shapeOfArray = imgAsArray.shape
print(shapeOfArray)

In [None]:
class Cityscapes(keras.utils.Sequence):
    #iterate over the data as numpy arrays
    
    def __init__(self, batchSize, imgSize, input_dir, target_dir):
        self.batchSize = batchSize
        self.imgSize = imgSize
        self.input_dir = input_dir
        self.target_dir = target_dir
    
    def __len__(self):
        return len(self.target_dir) // self.batchSize
    
    def __getitem__(self, index):
        i = index * self.batchSize
        batch_input_img_paths = self.input_dir[i:i+self.batchSize]
        batch_target_img_paths = self.target_dir[i:i+self.batchSize]
        imgArray = np.zeros((self.batchSize,) + self.imgSize + (3,), dtype='float32')
        
        for j, path in enumerate(batch_input_img_paths):
            img = load_img(path, target_size = self.imgSize)
            img = img.resize((2048,1024))
            imgArray[j] = img
            
        maskArray = np.zeros((self.batchSize,) + self.imgSize, dtype='float32')
        
        for j, path in enumerate(batch_target_img_paths):
            img = load_img(path, target_size=self.imgSize)
            img = PIL.ImageOps.grayscale(img)
            img = img.resize((2048,1024))
            maskArray[j] = img
        
        return imgArray, maskArray

In [None]:
val_input_img_paths = sorted(glob.glob('../input/cityscapes-dataset/leftImg8bit_trainvaltest/leftImg8bit/val/*/*.png', recursive=True))
val_target_img_paths = sorted(glob.glob('../input/cityscapes-dataset/gtFine_trainvaltest/gtFine/val/*/*_gtFine_color.png', recursive=True))
test_input_img_paths = sorted(glob.glob('../input/cityscapes-dataset/leftImg8bit_trainvaltest/leftImg8bit/test/*/*.png', recursive=True))
test_target_img_paths = sorted(glob.glob('../input/cityscapes-dataset/gtFine_trainvaltest/gtFine/test/*/*_gtFine_color.png', recursive=True))

# Instantiate data Sequences for each split
train_gen = Cityscapes(batchSize, imgSize, input_dir, target_dir)
val_gen = Cityscapes(batchSize, imgSize, val_input_img_paths, val_target_img_paths)
test_gen = Cityscapes(batchSize, imgSize, test_input_img_paths, test_target_img_paths)

In [None]:
# Dice coefficient loss function- credit to Soriba D. on Medium for providing this loss function

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

In [None]:
# # Implementation of the UNet architecture by soribadiaby on Github

# def getUNet():
#     inputs = Input(shape=(1024, 2048, 3))
#     conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
#     conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
#     pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

#     conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
#     conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
#     pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

#     conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
#     conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
#     pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

#     conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
#     conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
#     pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

#     conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
#     conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

#     up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
#     conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
#     conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

#     up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
#     conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
#     conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

#     up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
#     conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
#     conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

#     up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
#     conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
#     conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

#     conv10 = Conv2D(1, (1, 1), activation='softmax')(conv9)

#     model = Model(inputs=[inputs], outputs=[conv10])

# #     model.compile(optimizer=Adam(learning_rate=1e-3), loss=binary_crossentropy, metrics=[dice_coef])

#     return model



In [None]:
# model = getUNet()
model = vanilla_unet(input_shape=(512, 512, 3))
print(model.summary())

In [None]:
# Configure the model for training.
model.compile(optimizer=Adam(learning_rate=0.001), loss=CategoricalCrossentropy())

callbacks = [
    keras.callbacks.ModelCheckpoint("cityscapes_segmentation.h5", save_best_only=True)
]


model.fit(train_gen, epochs=10, validation_data=val_gen, callbacks=callbacks)

In [None]:
test_preds = model.predict(test_gen)

def display_mask(i):
    """Quick utility to display a model's prediction."""
    mask = np.argmax(test_preds[i], axis=-1)
    mask = np.expand_dims(mask, axis=-1)
    img = PIL.ImageOps.autocontrast(keras.preprocessing.image.array_to_img(mask))
    display(img)


# Display results for validation image #10
i = 10

# Display input image
display(Image(filename=test_input_img_paths[i]))

# Display ground-truth target mask
img = PIL.ImageOps.autocontrast(load_img(test_target_img_paths[i]))
display(img)

# Display mask predicted by our model
display_mask(i)  # Note that the model only sees inputs at 150x150.