In [1]:
import csv
import math
import os
import cv2

from PIL import Image, ImageDraw, ImageEnhance
import numpy as np
import pandas as pd
from keras import Model
from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from keras.layers import *
from keras.utils import Sequence
from keras.backend import epsilon
import keras.backend as K
from keras.optimizers import Adam
from keras.regularizers import l2

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import keras.backend as K
DATA_DIR = "images"

# 0.35, 0.5, 0.75, 1.0, 1.3, 1.4
ALPHA = 0.75

# 96, 128, 160, 192, 224
IMAGE_SIZE = 224

EPOCHS = 500
BATCH_SIZE = 32
PATIENCE = 50

MULTI_PROCESSING = True
THREADS = 20

data = pd.read_csv('training.csv')
train, test = train_test_split(data, test_size=0.1, random_state=123)

train.reset_index(inplace=True, drop=True)
test.reset_index(inplace=True, drop=True)

train.to_csv('train.csv', index=False)
test.to_csv('validation.csv', index=False)

TRAIN_CSV = "train.csv"
VALIDATION_CSV = "validation.csv"

Using TensorFlow backend.


In [2]:
df = pd.read_csv('validation.csv')

val_coords = np.zeros((len(df),4))
val_images = np.zeros((len(df), IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)

for index, row in df.iterrows():
    path = os.path.join(DATA_DIR, row['image_name'])
    x1 = row['x1']
    x2 = row['x2']
    y1 = row['y1']
    y2 = row['y2']

    img = Image.open(path)
    width, height = img.size

    val_coords[index, 0] = x1*IMAGE_SIZE / width
    val_coords[index, 1] = y1*IMAGE_SIZE / height
    val_coords[index, 2] = (x2 - x1)* IMAGE_SIZE / width
    val_coords[index, 3] = (y2 - y1)*IMAGE_SIZE / height 
    
    img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
    img = img.convert('RGB')
    pil_img = img
    img = np.array(img, dtype=np.float32)
    val_images[index] = preprocess_input(img.copy())
    pil_img.close()




In [11]:

class DataGenerator(Sequence):

    def __init__(self, csv_file,rnd_rescale=True, rnd_multiply=True, rnd_color=True, rnd_crop=True, rnd_flip=True,
                 batch_size = BATCH_SIZE, rnd_dice=True):
        self.paths = []
        self.coords = []
        self.batch_size = batch_size
        self.rnd_rescale = rnd_rescale
        self.rnd_multiply = rnd_multiply
        self.rnd_color = rnd_color
        self.rnd_crop = rnd_crop
        self.rnd_flip = rnd_flip
        self.rnd_dice = rnd_dice

        with open(csv_file, "r") as file:
            self.coords = np.zeros((sum(1 for line in file)-1, 4))
            
        df = pd.read_csv(csv_file)

        for index, row in df.iterrows():
            path = os.path.join(DATA_DIR, row['image_name'])
            x1 = row['x1']
            x2 = row['x2']
            y1 = row['y1']
            y2 = row['y2']

            img = Image.open(path)
            width, height = img.size

            self.coords[index, 0] = x1
            self.coords[index, 1] = y1
            self.coords[index, 2] = x2 
            self.coords[index, 3] = y2 

            self.paths.append(path)
        
    def __len__(self):
        return math.ceil(len(self.coords)*2 / self.batch_size)

    def __getitem__(self, idx):
        batch_paths = self.paths[idx * self.batch_size//2:(idx + 1) * self.batch_size//2]
        coords = self.coords[idx * self.batch_size//2:(idx + 1) * self.batch_size//2].copy()
        batch_coords = np.zeros((self.batch_size,4))
        batch_images = np.zeros((self.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
        i = 0
        for j, f in enumerate(batch_paths):
            img = Image.open(f)
            x0,y0,x1,y1 = coords[j]
            image_width = img.width
            image_height = img.height
            img2 = img.resize((IMAGE_SIZE, IMAGE_SIZE))
            img2 = img2.convert('RGB')
            img2 = np.array(img2, dtype=np.float32)
            batch_images[i] = preprocess_input(img2.copy())
            
            batch_coords[i, 0] = x0 * IMAGE_SIZE / image_width
            batch_coords[i, 1] = y0 * IMAGE_SIZE / image_height
            batch_coords[i, 2] = (x1 - x0) * IMAGE_SIZE / image_width
            batch_coords[i, 3] = (y1 - y0) * IMAGE_SIZE / image_height 
            
            if self.rnd_dice:
                
                select = np.random.randint(5)
                
                if select==0 and self.rnd_rescale:
                    old_width = img.width
                    old_height = img.height

                    rescale = np.random.uniform(low=0.6, high=1.4)
                    new_width = int(old_width * rescale)
                    new_height = int(old_height * rescale)

                    img = img.resize((new_width, new_height))

                    x0 *= new_width / old_width
                    y0 *= new_height / old_height
                    x1 *= new_width / old_width
                    y1 *= new_height / old_height

                if select==1 and self.rnd_crop:
                    start_x = np.random.randint(0, high=np.floor(0.15 * img.width))
                    stop_x = img.width - np.random.randint(0, high=np.floor(0.15 * img.width))
                    start_y = np.random.randint(0, high=np.floor(0.15 * img.height))
                    stop_y = img.height - np.random.randint(0, high=np.floor(0.15 * img.height))

                    img = img.crop((start_x, start_y, stop_x, stop_y))

                    x0 = max(x0 - start_x, 0)
                    y0 = max(y0 - start_y, 0)
                    x1 = min(x1 - start_x, img.width)
                    y1 = min(y1 - start_y, img.height)

                    if np.abs(x1 - x0) < 5 or np.abs(y1 - y0) < 5:
                        print("\nWarning: cropped too much (obj width {}, obj height {}, img width {}, img height {})\n".format(x1 - x0, y1 - y0, img.width, img.height))

                if select==2 and self.rnd_flip:
                    elem = np.random.choice([0, 90, 180, 270, 1423, 1234])
                    if elem % 10 == 0:
                        x = x0 - img.width / 2
                        y = y0 - img.height / 2

                        x0 = img.width / 2 + x * np.cos(np.deg2rad(elem)) - y * np.sin(np.deg2rad(elem))
                        y0 = img.height / 2 + x * np.sin(np.deg2rad(elem)) + y * np.cos(np.deg2rad(elem))

                        x = x1 - img.width / 2
                        y = y1 - img.height / 2

                        x1 = img.width / 2 + x * np.cos(np.deg2rad(elem)) - y * np.sin(np.deg2rad(elem))
                        y1 = img.height / 2 + x * np.sin(np.deg2rad(elem)) + y * np.cos(np.deg2rad(elem))

                        img = img.rotate(-elem)
                    else:
                        if elem == 1423:
                            img = img.transpose(Image.FLIP_TOP_BOTTOM)
                            y0 = img.height - y0
                            y1 = img.height - y1

                        elif elem == 1234:
                            img = img.transpose(Image.FLIP_LEFT_RIGHT)
                            x0 = img.width - x0
                            x1 = img.width - x1

                image_width = img.width
                image_height = img.height

                tmp = x0
                x0 = min(x0, x1)
                x1 = max(tmp, x1)

                tmp = y0
                y0 = min(y0, y1)
                y1 = max(tmp, y1)

                x0 = max(x0, 0)
                y0 = max(y0, 0)

                y0 = min(y0, image_height)
                x0 = min(x0, image_width)
                y1 = min(y1, image_height)
                x1 = min(x1, image_width)

                if select==3 and self.rnd_color:
                    enhancer = ImageEnhance.Color(img)
                    img = enhancer.enhance(np.random.uniform(low=0.5, high=1.5))

                    enhancer2 = ImageEnhance.Brightness(img)
                    img = enhancer.enhance(np.random.uniform(low=0.7, high=1.3))

                img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
                img = img.convert('RGB')
                pil_img = img
                img = np.array(img, dtype=np.float32)
                pil_img.close()
                
                if select==4 and self.rnd_multiply:
                    img[...,0] = np.floor(np.clip(img[...,0] * np.random.uniform(low=0.8, high=1.2), 0.0, 255.0))
                    img[...,1] = np.floor(np.clip(img[...,1] * np.random.uniform(low=0.8, high=1.2), 0.0, 255.0))
                    img[...,2] = np.floor(np.clip(img[...,2] * np.random.uniform(low=0.8, high=1.2), 0.0, 255.0))

                batch_images[i+1] = preprocess_input(img.copy())

                batch_coords[i+1, 0] = x0 * IMAGE_SIZE / image_width
                batch_coords[i+1, 1] = y0 * IMAGE_SIZE / image_height
                batch_coords[i+1, 2] = (x1 - x0) * IMAGE_SIZE / image_width
                batch_coords[i+1, 3] = (y1 - y0) * IMAGE_SIZE / image_height 
                
            i+=2
            
        return batch_images, batch_coords

class Validation(Callback):
    def __init__(self, generator):
        self.generator = generator

    def on_epoch_end(self, epoch, logs):
        mse = 0
        intersections = 0
        unions = 0

        for i in range(len(self.generator)):
            batch_images, gt = self.generator[i]
            pred = self.model.predict_on_batch(batch_images)
            mse += np.linalg.norm(gt - pred, ord='fro') / pred.shape[0]

            pred = np.maximum(pred, 0)

            diff_width = np.minimum(gt[:,0] + gt[:,2], pred[:,0] + pred[:,2]) - np.maximum(gt[:,0], pred[:,0])
            diff_height = np.minimum(gt[:,1] + gt[:,3], pred[:,1] + pred[:,3]) - np.maximum(gt[:,1], pred[:,1])
            intersection = np.maximum(diff_width, 0) * np.maximum(diff_height, 0)

            area_gt = gt[:,2] * gt[:,3]
            area_pred = pred[:,2] * pred[:,3]
            union = np.maximum(area_gt + area_pred - intersection, 0)

            intersections += np.sum(intersection * (union > 0))
            unions += np.sum(union)

        iou = np.round(intersections / (unions + epsilon()), 4)
        logs["val_iou"] = iou

        mse = np.round(mse, 4)
        logs["val_mse"] = mse

        print(" - val_iou: {} - val_mse: {}".format(iou, mse))

        
class ValDataGenerator(Sequence):
    def __init__(self, csv_file, val_images, val_coords, batch_size = BATCH_SIZE):
        self.images = val_images
        self.coords = val_coords
        self.batch_size = batch_size
        
    def __len__(self):
        return math.ceil(len(self.coords) / self.batch_size)
    
    def __getitem__(self, idx):
        batch_coords = self.coords[idx * self.batch_size:(idx + 1) * self.batch_size].copy()
        batch_images = self.images[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_images, batch_coords

def IOU(y_true, y_pred):
    diff_width = K.minimum(y_true[:,0] + y_true[:,2], y_pred[:,0] + y_pred[:,2]) - K.maximum(y_true[:,0], y_pred[:,0])
    diff_height = K.minimum(y_true[:,1] + y_true[:,3], y_pred[:,1] + y_pred[:,3]) - K.maximum(y_true[:,1], y_pred[:,1])
    intersection = K.maximum(diff_width, 0) * K.maximum(diff_height, 0)

    area_gt = y_true[:,2] * y_true[:,3]
    area_pred = y_pred[:,2] * y_pred[:,3]
    union = K.maximum(area_gt + area_pred - intersection, 0)

    #ntersection = K.sum(intersection * (union > 0))
    intersection = K.tf.where(union > 0, intersection, K.zeros_like(intersection))
    intersection = K.sum(intersection)
    union = K.sum(union)
    iou = (intersection / (union + epsilon()))
    return iou

def IOU_loss(y_true, y_pred):
    return -IOU(y_true, y_pred)

In [4]:
def create_model(trainable=False):
    model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
                        include_top=False, alpha=ALPHA, weights=None)

    # to freeze layers

    for layer in model.layers:
        layer.trainable = True

    x = model.layers[-1].output
    x = AveragePooling2D(pool_size=7,data_format="channels_last")(x)

    x = Flatten()(x)
    x = Dense(256,kernel_initializer='he_normal',kernel_regularizer=l2(1e-3))(x)
    x = Dense(4,kernel_initializer='he_normal',kernel_regularizer=l2(1e-3))(x)

    return Model(inputs=model.input, outputs=x)

In [5]:
model = create_model()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 24) 648         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 24) 96          Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu

In [16]:
train_datagen = DataGenerator(TRAIN_CSV)
validation_datagen = Validation(generator=ValDataGenerator(VALIDATION_CSV, val_images, val_coords))

In [7]:
# model.load_weights(WEIGHTS_FILE)

In [8]:
model.compile(loss="mean_absolute_error", optimizer="adam", metrics=['accuracy',IOU])

checkpoint = ModelCheckpoint("mobilenetv2-flatten-{val_iou:.2f}.h5", monitor="val_iou", verbose=1, save_best_only=True,
                             save_weights_only=True, mode="max", period=1)
stop = EarlyStopping(monitor="val_iou", patience=PATIENCE, mode="max")
reduce_lr = ReduceLROnPlateau(monitor="val_iou", factor=0.2, patience=10, min_lr=1e-7, verbose=1, mode="max")

# model.summary()

In [17]:
model.fit_generator(generator=train_datagen,
                    epochs=EPOCHS,
                    callbacks=[validation_datagen, reduce_lr, stop, checkpoint],
                    workers=THREADS,
                    use_multiprocessing=MULTI_PROCESSING,
                    shuffle=True,
                    verbose=1)


Epoch 1/500

 - val_iou: 0.8502000000000001 - val_mse: 201.4027

Epoch 00001: val_iou did not improve from 0.87880
Epoch 2/500
 - val_iou: 0.8544 - val_mse: 185.3677

Epoch 00002: val_iou did not improve from 0.87880
Epoch 3/500

 - val_iou: 0.8858 - val_mse: 172.7478

Epoch 00003: val_iou improved from 0.87880 to 0.88580, saving model to mobilenetv2-flatten-0.89.h5
Epoch 4/500
 - val_iou: 0.8771 - val_mse: 177.7503

Epoch 00004: val_iou did not improve from 0.88580
Epoch 5/500
178/788 [=====>........................] - ETA: 1:31 - loss: 3.8628 - acc: 0.9551 - IOU: 0.9141

 - val_iou: 0.8884000000000001 - val_mse: 172.6704

Epoch 00005: val_iou improved from 0.88580 to 0.88840, saving model to mobilenetv2-flatten-0.89.h5
Epoch 6/500

 - val_iou: 0.8404 - val_mse: 208.56560000000002

Epoch 00006: val_iou did not improve from 0.88840
Epoch 7/500
 89/788 [==>...........................] - ETA: 1:43 - loss: 3.6093 - acc: 0.9712 - IOU: 0.9182

 - val_iou: 0.8323 - val_mse: 193.9399000000000

 - val_iou: 0.8871 - val_mse: 163.7015

Epoch 00028: val_iou did not improve from 0.90080
Epoch 29/500
 - val_iou: 0.8919 - val_mse: 161.2168

Epoch 00029: val_iou did not improve from 0.90080
Epoch 30/500
 - val_iou: 0.8867 - val_mse: 165.1876

Epoch 00030: val_iou did not improve from 0.90080
Epoch 31/500
 - val_iou: 0.8953000000000001 - val_mse: 166.6339

Epoch 00031: val_iou did not improve from 0.90080
Epoch 32/500
 - val_iou: 0.8812000000000001 - val_mse: 164.5499

Epoch 00032: val_iou did not improve from 0.90080
Epoch 33/500
 18/788 [..............................] - ETA: 2:02 - loss: 3.1165 - acc: 0.9722 - IOU: 0.9305

 - val_iou: 0.893 - val_mse: 161.8727

Epoch 00033: val_iou did not improve from 0.90080
Epoch 34/500
 30/788 [>.............................] - ETA: 1:49 - loss: 2.7774 - acc: 0.9698 - IOU: 0.9372

 - val_iou: 0.881 - val_mse: 169.9402

Epoch 00034: val_iou did not improve from 0.90080
Epoch 35/500

 - val_iou: 0.8998 - val_mse: 159.7588

Epoch 00035: ReduceLRO

 - val_iou: 0.9099 - val_mse: 153.5016

Epoch 00055: val_iou did not improve from 0.91060
Epoch 56/500

 - val_iou: 0.9105000000000001 - val_mse: 153.22660000000002

Epoch 00056: val_iou did not improve from 0.91060
Epoch 57/500
 - val_iou: 0.907 - val_mse: 152.31640000000002

Epoch 00057: val_iou did not improve from 0.91060
Epoch 58/500
 - val_iou: 0.9092 - val_mse: 153.4836

Epoch 00058: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.

Epoch 00058: val_iou did not improve from 0.91060
Epoch 59/500
 - val_iou: 0.9098 - val_mse: 153.12040000000002

Epoch 00059: val_iou did not improve from 0.91060
Epoch 60/500
 - val_iou: 0.9102 - val_mse: 153.178

Epoch 00060: val_iou did not improve from 0.91060
Epoch 61/500
 - val_iou: 0.9093 - val_mse: 153.16330000000002

Epoch 00061: val_iou did not improve from 0.91060
Epoch 62/500
 - val_iou: 0.9097000000000001 - val_mse: 153.1686

Epoch 00062: val_iou did not improve from 0.91060
Epoch 63/500

 - val_iou: 0.9093 - val_mse:

 - val_iou: 0.9095000000000001 - val_mse: 153.335

Epoch 00084: ReduceLROnPlateau reducing learning rate to 1.6000001778593287e-06.

Epoch 00084: val_iou did not improve from 0.91100
Epoch 85/500
 - val_iou: 0.9096000000000001 - val_mse: 153.4049

Epoch 00085: val_iou did not improve from 0.91100
Epoch 86/500
 29/788 [>.............................] - ETA: 1:49 - loss: 1.2611 - acc: 0.9860 - IOU: 0.9693

 - val_iou: 0.9098 - val_mse: 153.4219

Epoch 00086: val_iou did not improve from 0.91100
Epoch 87/500
 90/788 [==>...........................] - ETA: 1:40 - loss: 1.3496 - acc: 0.9809 - IOU: 0.9682
 91/788 [==>...........................] - ETA: 1:40 - loss: 1.3470 - acc: 0.9808 - IOU: 0.9682
 - val_iou: 0.9101 - val_mse: 153.466

Epoch 00087: val_iou did not improve from 0.91100
Epoch 88/500

 - val_iou: 0.9098 - val_mse: 153.4504

Epoch 00088: val_iou did not improve from 0.91100
Epoch 89/500
 - val_iou: 0.9099 - val_mse: 153.4442

Epoch 00089: val_iou did not improve from 0.91100
E

 - val_iou: 0.91 - val_mse: 153.446

Epoch 00113: val_iou did not improve from 0.91100
Epoch 114/500
 - val_iou: 0.9101 - val_mse: 153.4411

Epoch 00114: ReduceLROnPlateau reducing learning rate to 1e-07.

Epoch 00114: val_iou did not improve from 0.91100


<keras.callbacks.History at 0x7f473e61fbe0>

In [None]:
sample = pd.read_csv("train.csv")
sample.head()

In [None]:
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt

from keras.applications.mobilenetv2 import preprocess_input

WEIGHTS_FILE = "densenet_no_wts-data_aug-0.91.h5"
IMAGES = "images/*png"

# model = create_model()
model.load_weights(WEIGHTS_FILE)


In [None]:

for filename in glob.glob(IMAGES)[8000:8010]:
    unscaled = cv2.imread(filename)
    image_height, image_width, _ = unscaled.shape

    image = cv2.resize(unscaled, (IMAGE_SIZE, IMAGE_SIZE))
    feat_scaled = preprocess_input(np.array(image, dtype=np.float32))

    region = model.predict(x=np.array([feat_scaled]))[0]

    x0 = int(region[0] * image_width / IMAGE_SIZE)
    y0 = int(region[1] * image_height / IMAGE_SIZE)

    x1 = int((region[0] + region[2]) * image_width / IMAGE_SIZE)
    y1 = int((region[1] + region[3]) * image_height / IMAGE_SIZE)

    cv2.rectangle(unscaled, (x0, y0), (x1, y1), (0, 255, 0), 2)
    plt.imshow(unscaled)
    plt.show()

In [None]:
sample = pd.read_csv("validation.csv")
sample.head()

In [None]:
for index, row in sample.iterrows():
    unscaled = cv2.imread('images/'+row['image_name'])
    image_height, image_width, _ = unscaled.shape

    image = cv2.resize(unscaled, (IMAGE_SIZE, IMAGE_SIZE))
    feat_scaled = preprocess_input(np.array(image, dtype=np.float32))

    region = model.predict(x=np.array([feat_scaled]))[0]

    x1 = int(region[0] * image_width / IMAGE_SIZE)
    y1 = int(region[1] * image_height / IMAGE_SIZE)

    x2 = int((region[0] + region[2]) * image_width / IMAGE_SIZE)
    y2 = int((region[1] + region[3]) * image_height / IMAGE_SIZE)

    sample.iloc[index,1] = x1
    sample.iloc[index,2] = x2
    sample.iloc[index,3] = y1
    sample.iloc[index,4] = y2

sample.head()

In [None]:
sample.to_csv('valid_pred_densenet121.csv', index=False)