In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import cv2
from tqdm import tqdm
import random
# %matplotlib inline

from keras.models import Model
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import *
from keras import backend as K
from keras.layers.advanced_activations import *
from keras import metrics
from keras.applications import *
from keras.preprocessing import image
from keras.activations import *
import tensorflow as tf

from IPython.display import Image, display
from keras.utils.vis_utils import plot_model

Using TensorFlow backend.


In [6]:
class HighwayDriving_Process:
    
    def imread(self , name):
        return cv2.cvtColor(cv2.imread(name) , cv2.COLOR_BGR2RGB)

    def image2class(self , image):
        img = np.zeros((image.shape[0],image.shape[1]))
        for i,t in enumerate(self.color_list):
            img[np.all(image == t[0] , axis = -1)] = i
        return img

    def class2onehot(self , classm):
        return (np.arange(self.nb_classes) == classm[...,None]).astype(np.int8)

    def class2image(self , classm):
        img = np.zeros((classm.shape[0],classm.shape[1],3))
        for i,t in enumerate(self.color_list):
            img[classm == i] = np.array(t[0])
        return img.astype(np.uint8)

    def image_file(self , name):
        return self.images_dir+'/'+name

    def label_file(self , name):
        n , _ , _ , s3 = name.split(self.split_char)
        return self.labels_dir+'/'+self.split_char.join([n,self.label_suffix ,s3])




    def __init__(self ,images_dir , labels_dir , classes = None):

        '''
        Original Video sampling by cameras was at 30 Hz
        i.e 30 frames per second
        This then turned into a 1Hz sampling for 1 frame
        per second. The sampling is done for getting rid of 
        reptitive frames

        for our experiment we create these kind of data set 
        organisation

        1. frame => label (training segmentation network)
        2. frame => future label (training sgemententation with optical flow)
                    i.e capture future intuition
                    Try with 1second , Try with 2second , Try with 3 second
        3. frame_seq => frame_seq_label (training segmentation sequence model)
        4. frame_seq => future_frame_seq_label (shifted by two or three frames)
        
        using all these the frame vision field can be limited
        '''

        self.color_list =   [           ([255, 255, 255]         ,  'Undefined'), ## The position is their class so void stays at 0
                                        ([0, 255, 255]     ,  'Sky'),
                                        ([128,128,128]     ,  'Road'),
                                        ([255,255,0]      ,  'Lane'),
                                        ([255,0,0]     ,  'Fence'),
                                        ([128, 0, 128]       ,  'Construction'),
                                        ([255, 0, 128]      ,  'Traffic sign'),
                                        ([0, 0, 128]      ,  'Car'),
                                        ([0,128,128]    ,  'Truck'),
                                        ([0,128,0]   ,  'vegetation'),
                                        
                                    ]

        if classes is not None:
            assert(isinstance(classes,list) ) ,"log:give a list of classes, check dataset or code"
            color_list = [self.color_list[0]]
            for t in self.color_list[1:]:
                if t[1] in classes:
                    color_list.append(t)
            self.color_list = color_list

        self.nb_classes  = len(self.color_list)   
        print("log: after filtering there are ",self.nb_classes," classes for training")                          

        self.label_suffix = 'ColorLabel' ## dataset specific
        self.split_char = '_' ## dataset specific
        image_name_list = os.listdir(images_dir)
        image_name_list.sort()
        labels_name_list = os.listdir(labels_dir)
        
        

        image_sequence_dict = {}

        for name in image_name_list:
#             print(name)
            n , s1 , s2 , s3 = name.split(self.split_char)
            image_sequence_dict.setdefault(n , [])
            image_sequence_dict[n].append(self.split_char.join([s1,s2,s3]))
          

        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.image_name_list = image_name_list
        self.labels_name_list = labels_name_list
        self.image_sequence_dict = image_sequence_dict

        


    def frame2label_val_datagen(self , size = None  , normalise = True):
        '''
            Using this makes sense when validation is True
        '''
        val_images = self.image_name_list[int(len(self.image_name_list)*0.9):]
        random.shuffle(val_images)
        batch_size = len(val_images)
        itr = len(val_images) // batch_size 
        for i in range(itr+1):
            IMAGES = []
            LABELS = []
            beg = i*batch_size
            end = (i*batch_size + batch_size) if (i*batch_size + batch_size) < len(val_images) else -1
            for img_file in val_images[beg:end]:
                image = self.imread(self.image_file(img_file))
                label = self.image2class(self.imread(self.label_file(img_file)))
                
                if size != None:
                    assert( len(size) == 2) , "log: give (y , x) format size"
                    image = cv2.resize(image , size , interpolation = cv2.INTER_NEAREST)
                    label = cv2.resize(label , size , interpolation = cv2.INTER_NEAREST)    
                label = self.class2onehot(label)
                IMAGES.append(image)
                LABELS.append(label)
            if len(IMAGES) == 0 or len(LABELS) == 0:
                continue
            IMAGES = np.array(IMAGES)
            LABELS = np.array(LABELS)
            if normalise:
                IMAGES = IMAGES / 255
            yield np.array(IMAGES),np.array(LABELS)        


    def frame2label_train_datagen(self ,size = None  ,  batch_size = 16 , validation = True , normalise = True):
        
        train_images = self.image_name_list
        
        if validation == True:
            train_images = self.image_name_list[:int(len(self.image_name_list)*0.9)]

        random.shuffle(train_images)
        itr = len(train_images) // batch_size 
        for i in range(itr+1):
            IMAGES = []
            LABELS = []
            beg = i*batch_size
            end = (i*batch_size + batch_size) if (i*batch_size + batch_size) < len(train_images) else -1
            for img_file in train_images[beg:end]:
                image = self.imread(self.image_file(img_file))
                label = self.image2class(self.imread(self.label_file(img_file)))
                
                if size != None:
                    assert( len(size) == 2) , "log: give (y , x) format size"
                    image = cv2.resize(image , size , interpolation = cv2.INTER_NEAREST)
                    label = cv2.resize(label , size , interpolation = cv2.INTER_NEAREST)    
                label = self.class2onehot(label)
                IMAGES.append(image)
                LABELS.append(label)
            IMAGES = np.array(IMAGES)
            LABELS = np.array(LABELS)
            if normalise:
                IMAGES = IMAGES / 255

            yield np.array(IMAGES),np.array(LABELS)

    def jaccard_index(y_pred , y):
        intersection = np.logical_and(y, y_pred)
        union = np.logical_or(y, y_pred)
        iou_score = np.sum(intersection) / np.sum(union)
        return iou_score


    def get_class_weights(self , c=1.02):
        _ , labels = next(self.frame2label_train_datagen(size = (256 , 256)  ,  batch_size = len(self.image_name_list) , 
                    validation = False , normalise = False))
        labels = np.argmax(labels , axis = -1)
        print(labels.shape)
        all_labels = labels.flatten()
        each_class = np.bincount(all_labels, minlength=self.nb_classes)
        prospensity_score = each_class / len(all_labels)
        class_weights = 1 / (np.log(c + prospensity_score))
        return class_weights
        
    

    def frameSequence_train_datagen(self , size = None , batch_size = 16, time_steps = 4 , skip = 4, normalise = True , log=False):
        assert(time_steps >= 2) , 'log: give more than or equal to two time steps'
        assert(skip >= 1), 'log: give time skip >= 1'
      
        for sequence in self.image_sequence_dict.keys():
            if log:
                print("log: sequence started:",sequence)
            train_images = self.image_sequence_dict[sequence]
            assert(len(train_images) <= batch_size+time_steps*skip), "log: batch_size + time_steps*skip exceeds max sequence length of video"

            IMAGES = []
            LABELS_FUTURE = []
            train_batch = train_images


            images_numpy = []
            labels_numpy = []
            for img_file in train_batch:
                if size != None:
                    images_numpy.append(
                      cv2.resize(self.imread(
                          self.image_file(
                              self.split_char.join([sequence,img_file]))) 
                                 ,size , interpolation = cv2.INTER_NEAREST))
                    labels_numpy.append(self.class2onehot(
                      cv2.resize(self.image2class(
                        self.imread(self.label_file(
                           self.split_char.join([sequence,img_file]))))
                                 , size , interpolation = cv2.INTER_NEAREST)))
                else:
                    images_numpy.append(self.imread(
                          self.image_file(
                              self.split_char.join([sequence,img_file]))))
                    labels_numpy.append(self.class2onehot(self.image2class(
                      self.imread(self.label_file(
                          self.split_char.join([sequence,img_file]))))))

            itr = (len(train_batch) - time_steps*skip)
            for j in range(itr):
                frames =[]
                labels = []
                for k in range(time_steps):
                    frames.append( images_numpy[j+k*skip] )
                    labels.append( labels_numpy[j+(k+1)*skip])
                IMAGES.append(frames)
                LABELS_FUTURE.append(labels)


                if (j+1)%batch_size == 0 or (j+1) == itr:
                    IMAGES = np.array(IMAGES)
                    LABELS_FUTURE = np.array(LABELS_FUTURE)
                    if normalise:
                        IMAGES = IMAGES / 255
                    x = IMAGES
                    y = LABELS_FUTURE
                    IMAGES = []
                    LABELS_FUTURE = []
                    yield x , y
                  
                  


    def frame2futurelabel_train_datagen(self , size=None , batch_size = 16, time_step = 3 , normalise = True):
        assert(batch_size > time_step) ,'log: batch size must be greater than time step'
        batch_size = batch_size+time_step
        
        for sequence in self.image_sequence_dict.keys():
            train_images = self.image_sequence_dict[sequence]
            itr = len(train_images) // batch_size
            for i in range(itr+1):
                IMAGES = []
                IMAGES_FUTURE = []
                LABELS_FUTURE = []
                beg = i*batch_size
                end = (i*batch_size + batch_size) if (i*batch_size + batch_size) < len(train_images) else -1
                train_batch = train_images[beg:end ]
                for i in range(len(train_batch) - time_step):
                    frame1 = self.image_file(self.split_char.join([sequence,train_batch[i]]))
                    frame2 = self.image_file(self.split_char.join([sequence,train_batch[i+time_step]]))
                    label2 = self.label_file(self.split_char.join([sequence,train_batch[i+time_step]]))
                    frame1_image = self.imread(frame1)
                    frame2_image = self.imread(frame2)
                    label2_classim = self.image2class(self.imread(label2))
                    if size != None:
                        assert( len(size) == 2) , "log: give (y , x) format size"
                        frame1_image = cv2.resize(frame1_image , size , interpolation = cv2.INTER_NEAREST)
                        frame2_image = cv2.resize(frame2_image , size , interpolation = cv2.INTER_NEAREST)
                        label2_classim = cv2.resize(label2_classim , size , interpolation = cv2.INTER_NEAREST)
                    label2_onehote = self.class2onehot(label2_classim)
                    IMAGES.append(frame1_image)
                    IMAGES_FUTURE.append(frame2_image)
                    LABELS_FUTURE.append(label2_onehote)
                if(len(IMAGES) == 0):
                    continue
                IMAGES = np.array(IMAGES)
                IMAGES_FUTURE = np.array(IMAGES_FUTURE)
                LABELS_FUTURE = np.array(LABELS_FUTURE)

                if normalise:
                    IMAGES_FUTURE = IMAGES_FUTURE / 255
                    IMAGES = IMAGES / 255


                yield IMAGES , IMAGES_FUTURE , LABELS_FUTURE

In [7]:
data_path = "highway/HighwayDataset/images"
labels_path = "highway/HighwayDataset/label"

proc = HighwayDriving_Process(data_path , labels_path , classes = None)

nb_classes = proc.nb_classes

log: after filtering there are  10  classes for training


In [8]:
class Sequenced_Unet:
  
    def jaccard_distance(self , y_true, y_pred, smooth=100):
        intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
        sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
        jac = (intersection + smooth) / (sum_ - intersection + smooth)
        return (1 - jac) * smooth
  
    def conv2d(self , x, n_filters=64 , kernel_size = 3, batchnorm = True  , name = None):

        x = TimeDistributed(Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),
                 kernel_initializer = 'glorot_uniform', padding = 'same', activation=None ) , name = name)(x)
        if batchnorm:
            x = TimeDistributed(BatchNormalization() , name = name+'_bn')(x)
        x = TimeDistributed(Activation('relu') , name = name+'_relu')(x)
        return x


    def conv2d_block(self , x , n_filters=64 , kernel_size = 3, batchnorm = True , name = None):
        x = self.conv2d(x , n_filters , kernel_size , batchnorm  , name= name+"_conv1")
        x = self.conv2d(x , n_filters , kernel_size , batchnorm  , name= name+"_conv2")
        return x
      
    def convlstm2d_block(self , x , filters = 64 , kernel_size = 3 , name = None , return_sequence = True):
        x = ConvLSTM2D(filters = n_filters , kernel_size = kernel_size , strides = 1 , padding = 'same' , return_sequences = True , name = name+"_1")(x)
        x = ConvLSTM2D(filters = n_filters , kernel_size = kernel_size , strides = 1 , padding = 'same' , return_sequences = return_sequence , name = name+"_2")(x)
        return x
  
    def __init__(self ,nb_classes , size = (512 , 512 , 3) , n_filters = 16 , time_step = 4, batchnorm = True):
    
        self.size = size
        self.nb_class = nb_classes

        inp = Input(shape = (time_step,)+size)


        c1 = self.conv2d_block(inp, n_filters * 1, kernel_size = 3, batchnorm = batchnorm , name = 'c1')
        seqc1 = self.convlstm2d_block(c1 , filters = n_filters * 1 , kernel_size = 3 , name = 'seqc1')
        p1 = TimeDistributed(MaxPooling2D((2, 2)) , name='mp1')(seqc1)


        c2 = self.conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm , name="c2")
        seqc2 = self.convlstm2d_block(c2 , filters = n_filters * 2 , kernel_size = 3 , name = 'seqc2')
        p2 = TimeDistributed(MaxPooling2D((2, 2)) , name='mp2')(seqc2)

        c3 = self.conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm , name = "c3")
        seqc3 = self.convlstm2d_block(c3 , filters = n_filters * 4 , kernel_size = 3 , name = 'seqc3')
        p3 = TimeDistributed(MaxPooling2D((2, 2)), name='mp3')(seqc3)

        c4 = self.conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm , name = "c4")
        seqc4 = self.convlstm2d_block(c4 , filters = n_filters * 8 , kernel_size = 3 , name = 'seqc4')
        p4 = TimeDistributed(MaxPooling2D((2, 2)), name='mp4')(seqc4)

        c5 = self.conv2d_block(p4, n_filters * 16, kernel_size = 3, batchnorm = batchnorm , name = "c5")

        seq = self.convlstm2d_block(c5 , filters = n_filters * 16 , kernel_size = 3 , name = 'seq')  ## Remember since we are intending to recreate the frames we are intending it to be an identity function i.e use (1,1) filters


        u6 = TimeDistributed(Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same') , name = "up1")(seq)
        u6 = Concatenate(axis = -1)([u6, seqc4])
        c6 = self.conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm , name = "c6")

        u7 = TimeDistributed(Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')  , name = "up2")(c6)
        u7 = Concatenate(axis = -1)([u7, seqc3])
        c7 = self.conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm , name = "c7")

        u8 = TimeDistributed(Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same') , name = "up3")(c7)
        u8 = Concatenate(axis = -1)([u8, seqc2])
        c8 = self.conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm , name = "c8")

        u9 = TimeDistributed(Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same' ) , name = "up4")(c8)
        u9 = Concatenate(axis = -1)([u9, seqc1])
        c9 = self.conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm , name = "c9")

        out = TimeDistributed(Conv2D(filters = nb_classes, kernel_size = 1 ,kernel_initializer = 'glorot_uniform', padding = 'same' , activation='softmax') , name='output')(c9)

        u_model = Model(inputs = inp , outputs = out)

        model = Model(inputs = inp , outputs = out , name='u_net_segmentation')
        self.model = model

        model.summary()
    
      
    def train(self , epochs = 30 ,lr = 1e-4 ,  batch_size = 16 , mem_rate = 10 , time_steps = 4 , skip = 4, dataset = None  , 
            pretrained = False , class_weights = None , model_name="time_model.h5"):
        assert (dataset is not None),'log: give a dataset class with generator function for validation and training'
        
        if pretrained:
            self.model.load_weights(model_name)
      
        self.model.compile(optimizer = Adam(lr = lr), loss = 'categorical_crossentropy', metrics = [self.jaccard_distance])
        
        train_losses = []
        val_losses = []
        
        for i in range(epochs):
            print("Epoch :", i)
            print("Training .....")
            for x , y in dataset.frameSequence_train_datagen(size=self.size[:-1] , 
                                                           batch_size= batch_size*mem_rate , time_steps = time_steps , skip = skip , normalise = True):
                x[:, :, :, 0:28, :] = 0
                x[:, :, :, 228:256, :] = 0
                
                hist = self.model.fit(x = x , y = y , batch_size=batch_size , epochs = 1 , verbose = 1 , class_weight = class_weights)
                train_losses.append(hist.history['loss'][0])
            self.model.save_weights("saved_models/future_segmentation_generate/" + model_name + "_e" + str(i))
          
            x , y = next(dataset.frameSequence_train_datagen(size = self.size[:-1] ,batch_size = 4 , time_steps = 4 , skip = 4 , normalise = True))
            index = random.randint(0,x.shape[0])
            p = self.model.predict(np.array([x[index]]))            
          

            plt.figure(figsize = (time_steps*time_steps,time_steps*time_steps))
            for i in range(time_steps):
                plt.subplot(3,time_steps,i+1)
                plt.imshow(x[index][i])
                plt.title("frame "+str(i+1))
                plt.subplot(3,time_steps,i+time_steps+1)
                plt.imshow(dataset.class2image(np.argmax(y[index][i] , axis = -1)))
                plt.title("label "+str(i+1))
                plt.subplot(3,time_steps,i+2*time_steps+1)
                plt.imshow(dataset.class2image(np.argmax(p[0][i] , axis = -1)))
                plt.title("label predicted "+str(i+1))
            
            
            plt.show()
            plt.figure(figsize = (10,10))
            plt.plot(train_losses)
            plt.show()
            
        pickle_out = open("saved_models/future_segmentation_generate/history_future_segmentation_generate_e30.pickle","wb")
        pickle.dump(train_losses, pickle_out)
        pickle_out.close()

        
    
size = (256 , 256  , 3)
n_filters = 32
time_step = 4
skip = 4
nb_classes
net = Sequenced_Unet(nb_classes, size , n_filters = n_filters , time_step = time_step)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 4, 256, 256,  0                                            
__________________________________________________________________________________________________
c1_conv1 (TimeDistributed)      (None, 4, 256, 256,  896         input_2[0][0]                    
__________________________________________________________________________________________________
c1_conv1_bn (TimeDistributed)   (None, 4, 256, 256,  128         c1_conv1[0][0]                   
__________________________________________________________________________________________________
c1_conv1_relu (TimeDistributed) (None, 4, 256, 256,  0           c1_conv1_bn[0][0]                
__________________________________________________________________________________________________
c1_conv2 (

In [None]:
epochs = 30
batch_size = 4
lr = 1e-4
net.train(epochs=epochs , lr = 1e-4 , dataset = proc , batch_size = batch_size , time_steps = time_step , skip = skip
             , pretrained = False , mem_rate = 11 , class_weights = None , model_name = "time_model.h5")

Epoch : 0
Training .....
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


In [None]:
checkpointer = ModelCheckpoint(
        filepath=os.path.join('saved_models', 'future_segmentation_generate', 'future_segmentation_generate' + \
            '.{epoch:03d}-{loss:.3f}.hdf5'),
        verbose=1,
        save_weights_only=True,
        save_best_only=True)

proc = HighwayDriving_Process(data_path, labels_path, classes = None)
datagen = proc.frameSequence_train_datagen(size = (256, 256))

history = u_model.fit_generator(generator = datagen, steps_per_epoch = 60, epochs = 5, verbose = 1, workers = 4, shuffle = False, callbacks = [checkpointer], initial_epoch = 0)


In [None]:
proc = HighwayDriving_Process(data_path, labels_path, classes = None)

for data in proc.frameSequence_train_datagen(size = (256, 256)):
    x, y = data
    print(str(x.shape) + str(y.shape))