In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import datasets, layers, models
from tensorflow.keras import utils as U
import os
import cv2
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import MaxPooling2D, Dropout, Conv2DTranspose, Conv2D, concatenate
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation
from tensorflow.keras import Model, Input
import tensorflow.compat.v1
import tensorflow.keras.backend as K

In [2]:
class MaxPoolingWithArgmax2D(Layer):

    def __init__(
            self,
            pool_size=(2, 2),
            strides=2,
            padding='same',
            **kwargs):
        super(MaxPoolingWithArgmax2D, self).__init__(**kwargs)
        self.padding = padding
        self.pool_size = pool_size
        self.strides = strides

    def call(self, inputs, **kwargs):
        padding = self.padding
        pool_size = self.pool_size
        strides = self.strides
        output, argmax = tf.nn.max_pool_with_argmax(
            inputs,
            ksize=pool_size,
            strides=strides,
            padding=padding.upper(),
            output_dtype=tf.int64)
        return output, argmax

In [3]:
def conv2d_block(input_tensor, n_filters=16, filter_size=3, activation='relu', pad='same', batch_norm=True):
    """
    Custom block method to perform consecutive convolutions with optional batch normalization

    Inputs
    --
    input_tensor: tensor
        Input image tensor data structure defined within Keras
    n_filters: int
        Depth for the convolution layer outputs
    filter_size: int
        Dimensions of the filter convolved with the tensor inputs
    activation: string
        Activation function for the intermediate layers between convolutions
    pad: string
        Determination of if input shape is maintained in convolution
    batch_norm: bool
        Flag if batch normalization is used

    Outputs
    --
    x: tensor
        Twice convolved input with optional batch normalization and activation non-linearities
    """
    
    x = Conv2D(filters=n_filters, kernel_size=(filter_size, filter_size),
               kernel_initializer='he_normal', padding=pad)(input_tensor)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation(activation)(x)

    x = Conv2D(filters=n_filters, kernel_size=(filter_size, filter_size),
               kernel_initializer='he_normal', padding=pad)(x)
    if batch_norm:
        x = BatchNormalization()(x)
    x = Activation('relu')(x)

    return x

In [4]:
class MaxUnpooling2D(Layer):
    def __init__(self, size=(2, 2), **kwargs):
        super(MaxUnpooling2D, self).__init__(**kwargs)
        self.size = size

    def call(self, inputs, output_shape=None):
        updates, mask = inputs[0], inputs[1]
        with tf.name_scope(self.name):
            mask = tf.cast(mask, 'int32')
            #input_shape = tf.shape(updates, out_type='int32')
            input_shape = updates.get_shape()

            # This statement is required if I don't want to specify a batch size
            #  calculation new shape
            if output_shape is None:
                output_shape = (
                        input_shape[0],
                        input_shape[1]*self.size[0],
                        input_shape[2]*self.size[1],
                        input_shape[3])

            # calculation indices for batch, height, width and feature maps
            one_like_mask = tf.ones_like(mask, dtype='int32')
            batch_shape = tf.concat(
                    [[input_shape[0]], [1], [1], [1]],
                    axis=0)
            batch_range = tf.reshape(
                    tf.range(output_shape[0], dtype='int32'),
                    shape=batch_shape)
            b = one_like_mask * batch_range
            y = mask // (output_shape[2] * output_shape[3])
            x = (mask // output_shape[3]) % output_shape[2]
            feature_range = tf.range(output_shape[3], dtype='int32')
            f = one_like_mask * feature_range

            # transpose indices & reshape update values to one dimension
            updates_size = tf.size(updates)
            indices = tf.transpose(tf.reshape(
                tf.stack([b, y, x, f]),
                [4, updates_size]))
            values = tf.reshape(updates, [updates_size])
            ret = tf.scatter_nd(indices, values, output_shape)
            ret_shape = tf.TensorShape([None]).concatenate(ret.get_shape()[1:])
            ret = tensorflow.compat.v1.placeholder_with_default(ret, shape=ret_shape)
            return ret

In [5]:
def SegNet(input_shape=(256,256,3), conv_block=conv2d_block, n_filters=64, padding='same',pool_size = 2,batch_norm = True):
    """
    Segnet architecture as originally outlined in https://arxiv.org/pdf/1511.00561.pdf 
    Inputs
    --
    input_shape: tuple(int)
        Tuple in 3D corresponding to the dimensions of the input images
    conv_block: func
        Custom block method to perform consecutive convolutions with optional batch normalization
    n_filters: int
        Number of filters corresponding to depth of input for next layer
    padding: string
        Descriptor determining if padding maintain size during convolutions
    pool_size: int
        Size of window for max pooling and unpolling
    batch_norm: bool
        Determines if batch normalization is used

    Outputs
    --
    model: Model
        Returns model architecture without compile
    """

    tensor = Input(shape=input_shape)
    c1 = conv2d_block(tensor, n_filters * 1, filter_size=3, activation='relu', pad=padding,batch_norm=batch_norm)
    c2 = conv2d_block(c1, n_filters * 1, filter_size=3, activation='relu', pad=padding,batch_norm=batch_norm)
    
    p1,indices1 = MaxPoolingWithArgmax2D()(c2)
    
    c3 = conv2d_block(p1, n_filters * 2, filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c4 = conv2d_block(c3, n_filters * 2, filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    
    p2,indices2 = MaxPoolingWithArgmax2D()(c4)
    
    c5 = conv2d_block(p2, n_filters * 4,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c6 = conv2d_block(c5, n_filters * 4,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c7 = conv2d_block(c6, n_filters * 4,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    
    p3,indices3 = MaxPoolingWithArgmax2D()(c7)
    
    c8 = conv2d_block(p3, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c9 = conv2d_block(c8, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c10 = conv2d_block(c9, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    
    p4,indices4 = MaxPoolingWithArgmax2D()(c10)
    
    c11 = conv2d_block(p4, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c12 = conv2d_block(c11, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c13 = conv2d_block(c12, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    p5,indices5 = MaxPoolingWithArgmax2D()(c13)


    print("Decoding")
    u1 = MaxUnpooling2D(p5,indices5)
    
    c14 = conv2d_block(u1, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c15 = conv2d_block(c14, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c16 = conv2d_block(c15, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    
    u2 = MaxUnpooling2D()([c16,indices4])
    
    c17 = conv2d_block(u2, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c18 = conv2d_block(c17, n_filters * 8,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c19 = conv2d_block(c18, n_filters * 4,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    u3 = MaxUnpooling2D()([c19,indices3])
    
    c20 = conv2d_block(u3, n_filters * 4,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c21 = conv2d_block(c20, n_filters * 4,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c22 = conv2d_block(c21, n_filters * 2,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    
    u4 = MaxUnpooling2D()([c22,indices2])
    c23 = conv2d_block(u4, n_filters * 2,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    c24 = conv2d_block(c23, n_filters * 1,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    
    u5 = MaxUnpooling2D()([c24,indices1])
    c25 = conv2d_block(u5, n_filters * 1,  filter_size=3, activation='relu', pad=padding, batch_norm=batch_norm)
    outputs = conv2d_block(c25,13, 1, activation='softmax',pad = padding)
    model = Model(inputs=[tensor], outputs=[outputs])

    # Return model architecture
    return model

In [None]:
model= SegNet(input_shape=(256,256,3), conv_block=conv2d_block, n_filters=64, padding='same', batch_norm=True)

In [10]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 256, 256, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 256, 256, 64) 256         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 256, 256, 64) 0           batch_normalization[0][0]        
______________________________________________________________________________________________

In [11]:
def dataset_setup(data_dir='', n_ims=2975, offset_bias=0, img_dim=256):
    """
    Method to import the training data from CityScape and divide into image-label pairs

    Inputs
    --
    data_dir: string
        Location for the data that is being imported
    n_ims: int
        Number of images contained in the folder chosen
    offset_bias: int
        Optionally, skip some images by starting at a position further than 0
    img_dim: int
        Expected image dimension (assuming square images)

    Outputs
    --
    X: list
        Images
    y: list
        Image labels per pixel
    """

    flist = os.listdir(data_dir)
    img0 = cv2.imread(data_dir+flist[0])

    y_dim,x_dim,_ = np.shape(img0)
    X = np.zeros((n_ims,y_dim,int(x_dim/2),3))
    y = np.zeros((n_ims,y_dim,int(x_dim/2),3))
    
    k = 0
    for f in flist[offset_bias:offset_bias+n_ims]:
        X[k] = cv2.imread(data_dir+f)[:,:img_dim]/img_dim
        y[k] = cv2.imread(data_dir+f)[:,img_dim:]/img_dim
        
        k = k+1

    return X, y

In [12]:
# Directories containing the train and val data
train_files = "./data/cityscapes_data/train/"
val_files = "./data/cityscapes_data/val/"

# Setup image-label pairs
x_train, y_train = dataset_setup(data_dir=train_files, n_ims=2975, offset_bias=0, img_dim=256)
x_val, y_val = dataset_setup(data_dir=val_files, n_ims=500, offset_bias=0, img_dim=256)

In [13]:
x_train1 = x_train[:20]
y_train1 = y_train[:20]

In [14]:
from sklearn.cluster import KMeans
colors = []
for i in range(len(x_train1)//2):
    colors.append(y_train1[i].reshape(y_train1[i].shape[0]*y_train1[i].shape[1], 3))
colors = np.array(colors)
colors = colors.reshape((colors.shape[0]*colors.shape[1],3))

km = KMeans(13)
km.fit(colors)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=13, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [15]:
def ColorsToClass(seg):
    s = seg.reshape((seg.shape[0]*seg.shape[1],3))
    s = km.predict(s)
    s = s.reshape((seg.shape[0], seg.shape[1]))
    
    n = len(km.cluster_centers_)
    
    cls = np.zeros((seg.shape[0], seg.shape[1], n))
    
    for i in range(n):
        m = np.copy(s)
        m[m!=i] = 0
        m[m!=0] = 1
        
        cls[:,:,i]=m
        
    return cls

In [16]:
x_train1 = x_train[:500]
y_train1 = y_train[:500]

In [17]:
labels = []
for i in range(500):
    labels.append(ColorsToClass(y_train[i]))
labels = np.array(labels)

In [20]:
nb_epoch = 1
batch_size = 1
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])

In [21]:
history = model.fit(x_train[:500], labels, batch_size=batch_size, epochs=nb_epoch,verbose=1)

Train on 500 samples


In [22]:
def LayersToRGBImage(img):
    colors = [(255,0,0), (0,255,0), (0,0,255),
             (255,255,0), (255,0,255), (0,255,255),
             (255,255,255), (200,50,0),(50,200,0),
             (50,0,200), (200,200,50), (0,50,200),
             (0,200,50), (0,0,0)]
    
    nimg = np.zeros((img.shape[0], img.shape[1], 3))
    for i in range(img.shape[2]):
        c = img[:,:,i]
        col = colors[i]
        
        for j in range(3):
            nimg[:,:,j]+=col[j]*c
    nimg = nimg/255.0
    return nimg

In [26]:
pp = model.predict(x_val[0:5,:,:,:])
pred = LayersToRGBImage(pp)
plt.figure(figsize=(10,30))
plt.subplot(ni,3,1)
plt.imshow(x_val[0])
plt.subplot(ni,3,2)
plt.imshow(y_val[0])
plt.subplot(ni,3,3)
plt.imshow(pred)

InvalidArgumentError:  Shapes of all inputs must match: values[0].shape = [1,128,128,64] != values[1].shape = [5,128,128,64]
	 [[node model/max_unpooling2d_4/max_unpooling2d_4/stack (defined at <ipython-input-7-05741c515acf>:44) ]] [Op:__inference_distributed_function_45501]

Errors may have originated from an input operation.
Input Source operations connected to node model/max_unpooling2d_4/max_unpooling2d_4/stack:
 model/max_unpooling2d_4/max_unpooling2d_4/mod (defined at <ipython-input-7-05741c515acf>:37)	
 model/max_unpooling2d_4/max_unpooling2d_4/floordiv (defined at <ipython-input-7-05741c515acf>:36)	
 model/max_unpooling2d_4/max_unpooling2d_4/mul_1 (defined at <ipython-input-7-05741c515acf>:39)	
 model/max_unpooling2d_4/max_unpooling2d_4/mul (defined at <ipython-input-7-05741c515acf>:35)

Function call stack:
distributed_function


In [24]:
x_val[0,:,:,:].shape

(256, 256, 3)

In [None]:
pp = model.predict(x_val[0,:,:,:])
ni = 5
for k in range(ni):
    pred = LayersToRGBImage(pp[k])
    plt.figure(figsize=(10,30))
    plt.subplot(ni,3,1+k*3)
    plt.imshow(x_val[k])
    plt.subplot(ni,3,2+k*3)
    plt.imshow(y_val[k])
    plt.subplot(ni,3,3+k*3)
    plt.imshow(pred)