In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPool2D, AvgPool2D, Activation, Concatenate, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0, EfficientNetB4
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.metrics import Recall, Precision
import tensorflow.keras.backend as K
import tensorflow_addons as tfa
import datetime
import numpy as np
import pandas as pd
import PIL
from PIL import ImageOps
import os
from pathlib import Path
from IPython.display import Image, display

In [2]:
#EfficientNetB4().summary()

In [3]:
tf.__version__

'2.3.0'

In [4]:
df = pd.read_csv('~/Desktop/freelancer_drone/data/class_dict_seg.csv')
num_classes = df.shape[0]

In [5]:
IMG_SIZE = 512

In [6]:
input_dir = 'data/dataset/semantic_drone_dataset/original_images'
target_dir = 'data/dataset/semantic_drone_dataset/label_images_semantic'
color_dir = 'data/RGB_color_image_masks/RGB_color_image_masks'

In [7]:
input_img_paths = sorted([i for i in Path(input_dir).glob('**/*.jpg')])
input_lbl_paths = sorted([i for i in Path(target_dir).glob('**/*.png')])
color_paths = sorted([i for i in Path(color_dir).glob('**/*.png')])

In [8]:
i_ = np.zeros((10,) + (255, 255) + (1, ), dtype='uint8')
img = load_img('data/dataset/semantic_drone_dataset/label_images_semantic/491.png', target_size=(255,255), color_mode='grayscale')

In [9]:
np.unique(np.expand_dims(img, 2))

array([ 1,  6,  8, 15, 22], dtype=uint8)

In [10]:
i_[0] = np.expand_dims(img, 2)

In [11]:
np.unique(i_[0])

array([ 1,  6,  8, 15, 22], dtype=uint8)

In [12]:
class CDG(tf.keras.utils.Sequence):
    def __init__(self, batch_size, img_size, input_image_path, target_image_path, num_classes):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_image_path = input_image_path
        self.target_image_path = target_image_path
        self.num_classes = num_classes
    
    def __len__(self):
        return len(self.target_image_path) // self.batch_size
    
    def __random_flip(self, x, y):
        rand = np.random.randint(0, 21)
        if rand % 3 == 0:
            x = tf.image.flip_left_right(x)
            y = tf.image.flip_left_right(y)

        return x, y
                    
    def __random_cont(self, x):
        x = tf.image.random_contrast(x, .1, .2)
        #display(tf.keras.preprocessing.image.array_to_img(x[0]))
        return x
    
    def __random_zoom(self, x, y):
        ow, oh = np.random.randint(0, 100), np.random.randint(0, 100)
        tw, th = np.random.randint(256, self.img_size[0]-ow), np.random.randint(256, self.img_size[0]-oh)
        x = tf.image.crop_to_bounding_box(x, oh, ow, th, tw)
        y = tf.image.crop_to_bounding_box(y, oh, ow, th, tw)
        
        x = tf.image.resize(x, [self.img_size[0], self.img_size[1]])
        y = tf.image.resize(y, [self.img_size[0], self.img_size[1]])
        
        return x, y
        
    def __rotate(self, x, y):
        deg = np.random.randint(0, 359)
        x = tfa.image.rotate(x, deg)
        y = tfa.image.rotate(y, deg)
        return x, y
        
    def __getitem__(self, idx):
        input_batch = self.input_image_path[idx*self.batch_size : idx*self.batch_size + self.batch_size]
        target_batch = self.target_image_path[idx*self.batch_size : idx*self.batch_size + self.batch_size]
        
        x = np.zeros((self.batch_size,) + self.img_size + (3, ), dtype='float32')
        for j, path in enumerate(input_batch):
            img = load_img(path, target_size=self.img_size)
            #img = np.asarray(img) / 255
            img = np.asarray(img)
            x[j] = img
            
        
        y = np.zeros((self.batch_size,) + self.img_size + (1, ), dtype='uint8')
        for j, path in enumerate(target_batch):
            img = load_img(path, target_size=self.img_size, color_mode='grayscale')
            #img = np.asarray(img) / self.num_classes
            y[j] = np.expand_dims(img, 2)
        
        x, y = self.__random_zoom(x, y)
        #x = self.__random_cont(x)
        x, y  = self.__random_flip(x, y)
        x, y = self.__rotate(x, y)
        
        return x, y

In [13]:
def dice_coef(y_true, y_pred, smooth=1e-7):
    '''
    Dice coefficient for 24 categories. Ignores background pixel label 0
    Pass to model as metric during compile statement
    '''
    y_true_f = K.flatten(K.one_hot(K.cast(y_true, 'int32'), num_classes=23)[...,1:])
    y_pred_f = K.flatten(y_pred[...,1:])
    intersect = K.sum(y_true_f * y_pred_f, axis=-1)
    denom = K.sum(y_true_f + y_pred_f, axis=-1)
    return K.mean((2. * intersect / (denom + smooth)))

def dice_loss(y_true, y_pred):
    '''
    Dice loss to minimize. Pass to model as loss during compile statement
    '''
    return 1 - dice_coef(y_true, y_pred)


In [14]:
def conv_block(inputs, num_filters):
    x = Conv2D(num_filters, 3, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(num_filters, 3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    return x

In [15]:
def decoder(inputs, skip, num_filters):
    x = Conv2DTranspose(num_filters,(2, 2), strides=2, padding="same")(inputs)
    x = Concatenate()([x, skip])
    x = conv_block(x, num_filters)
    
    return x

In [16]:
def eff_unet(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    
    enc = EfficientNetB4(include_top=False, weights="imagenet", input_tensor=inputs)
    s1 = enc.get_layer("input_1").output     
    s2 = enc.get_layer("block2a_expand_activation").output    
    s3 = enc.get_layer("block3a_expand_activation").output    
    s4 = enc.get_layer("block4a_expand_activation").output 
    
    bn = enc.get_layer("block6a_expand_activation").output

    
    db1 = decoder(bn, s4, 512)
    db2 = decoder(db1, s3, 256)
    db3 = decoder(db2, s2, 128)
    db4 = decoder(db3, s1, 64)
    
    
    outputs = Conv2D(num_classes, 3, padding='same', activation='softmax')(db4)
    
    model = Model(inputs, outputs)
    return model


In [17]:
x_train, y_train, c_train = input_img_paths[:int(.9*len(input_img_paths))], input_lbl_paths[:int(.9*len(input_img_paths))], color_paths[:int(.9*len(input_img_paths))]
x_val, y_val, c_val = input_img_paths[int(.9*len(input_img_paths)):], input_lbl_paths[int(.9*len(input_img_paths)):], color_paths[int(.9*len(input_img_paths)):] 

In [18]:
x_train[:3]

[PosixPath('data/dataset/semantic_drone_dataset/original_images/000.jpg'),
 PosixPath('data/dataset/semantic_drone_dataset/original_images/001.jpg'),
 PosixPath('data/dataset/semantic_drone_dataset/original_images/002.jpg')]

In [19]:
y_train[:3]

[PosixPath('data/dataset/semantic_drone_dataset/label_images_semantic/000.png'),
 PosixPath('data/dataset/semantic_drone_dataset/label_images_semantic/001.png'),
 PosixPath('data/dataset/semantic_drone_dataset/label_images_semantic/002.png')]

In [20]:
train_gen = CDG(3, (IMG_SIZE, IMG_SIZE), x_train, y_train, num_classes)
val_gen = CDG(3, (IMG_SIZE, IMG_SIZE), x_val, y_val, num_classes)

In [21]:
boxes = tf.random.uniform(shape=(5, 4))
box_indices = tf.random.uniform(shape=(5,), minval=0,
maxval=1, dtype=tf.int32)

In [22]:
np.unique(train_gen[0][1])



array([ 0.        ,  0.09547997,  0.10872078, ..., 21.999239  ,
       21.999266  , 22.        ], dtype=float32)

In [23]:
model_name = 'models/drone_segmentation_512_woscaling_10ep_effnetb4.h5'

In [24]:
input_shape = (IMG_SIZE, IMG_SIZE, 3)
model = eff_unet(input_shape, num_classes)
#opt = tf.keras.optimizers.Nadam(1e-3)
save_best = tf.keras.callbacks.ModelCheckpoint(model_name, monitor="val_loss", verbose=0, save_best_only=True)
#model.compile(optimizer=opt, loss='sparse_categorical_crossentropy')
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
hist = model.fit(train_gen, epochs=10, validation_data=val_gen, callbacks=[tensorboard_callback, save_best])

Epoch 1/10
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 2/10
Epoch 3/10

In [None]:
model = load_model(model_name)

In [None]:
preds = model.predict(val_gen)

In [None]:
preds.shape

In [None]:
def display_mask(i):
    """Quick utility to display a model's prediction."""
    mask = np.argmax(preds[i], axis=-1)
    print(np.unique(mask))
    #mask = np.expand_dims(mask, axis=-1)
    #print(mask.shape)
    
    seg_img = np.zeros((IMG_SIZE, IMG_SIZE, 3))
    seg_img_gs = np.zeros((IMG_SIZE, IMG_SIZE, 3))
    
    for c in range(num_classes): 
        c_codes = [df[' r'][c],  df[' g'][c], df[' b'][c]]
        for i in range(3):
            seg_arr_c = mask[:, :] == c
            seg_img[:, :, i] += ((seg_arr_c)*(c_codes[i])).astype('uint8')
            
    for c in range(num_classes): 
        for i in range(3):
            seg_arr_c_gs = mask[:, :] == c
            seg_img_gs[:, :, i] += ((seg_arr_c_gs)*(c)).astype('uint8')
    
            
    #img = PIL.ImageOps.autocontrast(tf.keras.preprocessing.image.array_to_img(mask))
    img = tf.keras.preprocessing.image.array_to_img(seg_img)
    display(img)
    return seg_img_gs

# Display results for validation image #10
i = 0

# Display input image
display(Image(filename=x_val[i], width=IMG_SIZE, height=IMG_SIZE))

# Display ground-truth target mask
img = PIL.ImageOps.autocontrast(load_img(y_val[i], target_size=(IMG_SIZE, IMG_SIZE)))
display(img)

# Display color mask
display(Image(filename=c_val[i], width=IMG_SIZE, height=IMG_SIZE))

# Display mask predicted by our model
seg_img_gs = display_mask(i)  # Note that the model only sees inputs at 150x150.

In [None]:
mean_iou(preds[0], np.asarray(val_gen[0][1][2]))

In [None]:
import joblib

In [None]:
joblib.dump(model, 'models/model_joblib.pkl')