
### 1.3 ResNet50V2

DenseNet is almost a similar model to MobileNetV2 with comparable depth, but more parameters and size.

### Imports

In [1]:
import math
from keras import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from keras.layers import *
from keras.utils import Sequence
from keras.backend import epsilon
import keras.backend as K
from keras.optimizers import Adam
from keras.regularizers import l2
import pandas as pd
import os
from PIL import Image, ImageDraw, ImageEnhance
import cv2

IMAGE_SIZE = 224

EPOCHS = 500
BATCH_SIZE = 32
PATIENCE = 50

MULTI_PROCESSING = True
THREADS = 20
DATA_DIR = 'images/'

TRAIN_CSV = "train.csv"
VALIDATION_CSV = "validation.csv"

Using TensorFlow backend.


### Data Generators

In [2]:
class DataGenerator(Sequence):

    def __init__(self, csv_file,rnd_rescale=True, rnd_multiply=True, rnd_color=True, rnd_crop=True, rnd_flip=True,
                 batch_size = BATCH_SIZE, rnd_dice=True):
        self.paths = []
        self.coords = []
        self.batch_size = batch_size
        self.rnd_rescale = rnd_rescale
        self.rnd_multiply = rnd_multiply
        self.rnd_color = rnd_color
        self.rnd_crop = rnd_crop
        self.rnd_flip = rnd_flip
        self.rnd_dice = rnd_dice

        with open(csv_file, "r") as file:
            self.coords = np.zeros((sum(1 for line in file)-1, 4))
            
        df = pd.read_csv(csv_file)

        for index, row in df.iterrows():
            path = os.path.join(DATA_DIR, row['image_name'])
            x1 = row['x1']
            x2 = row['x2']
            y1 = row['y1']
            y2 = row['y2']

            img = Image.open(path)
            width, height = img.size

            self.coords[index, 0] = x1
            self.coords[index, 1] = y1
            self.coords[index, 2] = x2 
            self.coords[index, 3] = y2 

            self.paths.append(path)
        
    def __len__(self):
        return math.ceil(len(self.coords)*2 / self.batch_size)

    def __getitem__(self, idx):
        batch_paths = self.paths[idx * self.batch_size//2:(idx + 1) * self.batch_size//2]
        coords = self.coords[idx * self.batch_size//2:(idx + 1) * self.batch_size//2].copy()
        batch_coords = np.zeros((self.batch_size,4))
        batch_images = np.zeros((self.batch_size, IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)
        i = 0
        for j, f in enumerate(batch_paths):
            img = Image.open(f)
            x0,y0,x1,y1 = coords[j]
            image_width = img.width
            image_height = img.height
            img2 = img.resize((IMAGE_SIZE, IMAGE_SIZE))
            img2 = img2.convert('RGB')
            img2 = np.array(img2, dtype=np.float32)
            batch_images[i] = preprocess_input(img2.copy())
            
            batch_coords[i, 0] = x0 * IMAGE_SIZE / image_width
            batch_coords[i, 1] = y0 * IMAGE_SIZE / image_height
            batch_coords[i, 2] = (x1 - x0) * IMAGE_SIZE / image_width
            batch_coords[i, 3] = (y1 - y0) * IMAGE_SIZE / image_height 
            
            if self.rnd_dice:
                
#                 select = np.random.randint(5)
                
                if self.rnd_rescale:
                    old_width = img.width
                    old_height = img.height

                    rescale = np.random.uniform(low=0.6, high=1.4)
                    new_width = int(old_width * rescale)
                    new_height = int(old_height * rescale)

                    img = img.resize((new_width, new_height))

                    x0 *= new_width / old_width
                    y0 *= new_height / old_height
                    x1 *= new_width / old_width
                    y1 *= new_height / old_height

                if self.rnd_crop:
                    start_x = np.random.randint(0, high=np.floor(0.15 * img.width))
                    stop_x = img.width - np.random.randint(0, high=np.floor(0.15 * img.width))
                    start_y = np.random.randint(0, high=np.floor(0.15 * img.height))
                    stop_y = img.height - np.random.randint(0, high=np.floor(0.15 * img.height))

                    img_temp = img.copy()
                    img = img.crop((start_x, start_y, stop_x, stop_y))
                    
                    x0_temp = x0
                    x1_temp = x1
                    y0_temp = y0
                    y1_temp = y1

                    x0 = max(x0 - start_x, 0)
                    y0 = max(y0 - start_y, 0)
                    x1 = min(x1 - start_x, img.width)
                    y1 = min(y1 - start_y, img.height)

                    if np.abs(x1 - x0) < 40 or np.abs(y1 - y0) < 40:
                        img = img_temp
                        x0 = x0_temp
                        x1 = x1_temp
                        y0 = y0_temp
                        y1 = y1_temp
#                         print("\nWarning: cropped too much (obj width {}, obj height {}, img width {}, img height {})\n".format(x1 - x0, y1 - y0, img.width, img.height))

                if self.rnd_flip:
                    elem = np.random.choice([0, 90, 180, 270, 1423, 1234])
                    if elem % 10 == 0:
                        x = x0 - img.width / 2
                        y = y0 - img.height / 2

                        x0 = img.width / 2 + x * np.cos(np.deg2rad(elem)) - y * np.sin(np.deg2rad(elem))
                        y0 = img.height / 2 + x * np.sin(np.deg2rad(elem)) + y * np.cos(np.deg2rad(elem))

                        x = x1 - img.width / 2
                        y = y1 - img.height / 2

                        x1 = img.width / 2 + x * np.cos(np.deg2rad(elem)) - y * np.sin(np.deg2rad(elem))
                        y1 = img.height / 2 + x * np.sin(np.deg2rad(elem)) + y * np.cos(np.deg2rad(elem))

                        img = img.rotate(-elem)
                    else:
                        if elem == 1423:
                            img = img.transpose(Image.FLIP_TOP_BOTTOM)
                            y0 = img.height - y0
                            y1 = img.height - y1

                        elif elem == 1234:
                            img = img.transpose(Image.FLIP_LEFT_RIGHT)
                            x0 = img.width - x0
                            x1 = img.width - x1

                image_width = img.width
                image_height = img.height

                tmp = x0
                x0 = min(x0, x1)
                x1 = max(tmp, x1)

                tmp = y0
                y0 = min(y0, y1)
                y1 = max(tmp, y1)

                x0 = max(x0, 0)
                y0 = max(y0, 0)

                y0 = min(y0, image_height)
                x0 = min(x0, image_width)
                y1 = min(y1, image_height)
                x1 = min(x1, image_width)

                if self.rnd_color:
                    enhancer = ImageEnhance.Color(img)
                    img = enhancer.enhance(np.random.uniform(low=0.5, high=1.5))

                    enhancer2 = ImageEnhance.Brightness(img)
                    img = enhancer.enhance(np.random.uniform(low=0.7, high=1.3))

                img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
                img = img.convert('RGB')
                pil_img = img
                img = np.array(img, dtype=np.float32)
                pil_img.close()
                
                if self.rnd_multiply:
                    img[...,0] = np.floor(np.clip(img[...,0] * np.random.uniform(low=0.8, high=1.2), 0.0, 255.0))
                    img[...,1] = np.floor(np.clip(img[...,1] * np.random.uniform(low=0.8, high=1.2), 0.0, 255.0))
                    img[...,2] = np.floor(np.clip(img[...,2] * np.random.uniform(low=0.8, high=1.2), 0.0, 255.0))

                batch_images[i+1] = preprocess_input(img.copy())

                batch_coords[i+1, 0] = x0 * IMAGE_SIZE / image_width
                batch_coords[i+1, 1] = y0 * IMAGE_SIZE / image_height
                batch_coords[i+1, 2] = (x1 - x0) * IMAGE_SIZE / image_width
                batch_coords[i+1, 3] = (y1 - y0) * IMAGE_SIZE / image_height 
                
            i+=2
            
        return batch_images, batch_coords

### Loss Function

In [3]:
def IOU(y_true, y_pred):
    diff_width = K.minimum(y_true[:,0] + y_true[:,2], y_pred[:,0] + y_pred[:,2]) - K.maximum(y_true[:,0], y_pred[:,0])
    diff_height = K.minimum(y_true[:,1] + y_true[:,3], y_pred[:,1] + y_pred[:,3]) - K.maximum(y_true[:,1], y_pred[:,1])
    intersection = K.maximum(diff_width, 0) * K.maximum(diff_height, 0)

    area_gt = y_true[:,2] * y_true[:,3]
    area_pred = y_pred[:,2] * y_pred[:,3]
    union = K.maximum(area_gt + area_pred - intersection, 0)

    #ntersection = K.sum(intersection * (union > 0))
    intersection = K.tf.where(union > 0, intersection, K.zeros_like(intersection))
    intersection = K.sum(intersection)
    union = K.sum(union)
    iou = (intersection / (union + epsilon()))
    return iou

### Loading Validation Data into the Memory

Unlike training data validation can still be performed via loading data into memory, and will help improve training time. Also, as we are not doing any augmentation in the validation data we can save some time by defining another data generator just for validation purpose.

In [4]:
from keras.applications.nasnet import preprocess_input

valData = pd.read_csv('validation.csv')

val_coords = np.zeros((len(valData),4))
val_images = np.zeros((len(valData), IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32)

for index, row in valData.iterrows():
    path = os.path.join(DATA_DIR, row['image_name'])
    x1 = row['x1']
    x2 = row['x2']
    y1 = row['y1']
    y2 = row['y2']

    img = Image.open(path)
    width, height = img.size

    val_coords[index, 0] = x1*IMAGE_SIZE / width
    val_coords[index, 1] = y1*IMAGE_SIZE / height
    val_coords[index, 2] = (x2 - x1)* IMAGE_SIZE / width
    val_coords[index, 3] = (y2 - y1)*IMAGE_SIZE / height 
    
    img = img.resize((IMAGE_SIZE, IMAGE_SIZE))
    img = img.convert('RGB')
    pil_img = img
    img = np.array(img, dtype=np.float32)
    val_images[index] = preprocess_input(img.copy())
    pil_img.close()

class ValDataGenerator(Sequence):
    def __init__(self, val_images, val_coords, batch_size = BATCH_SIZE):
        self.images = val_images
        self.coords = val_coords
        self.batch_size = batch_size
        
    def __len__(self):
        return math.ceil(len(self.coords) / self.batch_size)
    
    def __getitem__(self, idx):
        batch_coords = self.coords[idx * self.batch_size:(idx + 1) * self.batch_size].copy()
        batch_images = self.images[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_images, batch_coords

Let's go ahead and define our model, having base model as DenseNet121.

In [5]:
from keras.applications.nasnet import NASNetMobile

def nasnet_model(trainable=True):
    model = NASNetMobile(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, weights='imagenet')

    for layer in model.layers:
        layer.trainable = trainable

    x = model.layers[-1].output
    x = Conv2D(256, kernel_size=3, name="mn1", activation='relu')(x)
    x = Conv2D(4, kernel_size=5, name="mn2", activation='relu')(x)
    x = Reshape((4,))(x)

    return Model(inputs=model.input, outputs=x)

**Note: We have set `weights=None` because of the competition requirements of "no transfer learning" on the discussion board, but setting `weights='imagenet'` improves the prediction as well as the training time.**


Currently, weights are initialized randomly and thus takes a lot more time to train than starting off with imagenet weights.

In [None]:
nnmob = nasnet_model()
nnmob.summary()

Now that we have defined the model, let's compile it and intialize other variables for training including checkpoint, early stopping and scheduling learning rate.

In [13]:
nnmob.compile(loss="mean_absolute_error", optimizer="adam", metrics=['accuracy',IOU])

checkpoint = ModelCheckpoint("nasnet-{val_IOU:.2f}.h5", monitor="val_IOU", verbose=1, save_best_only=True, save_weights_only=True, mode="max", period=1)

stop = EarlyStopping(monitor="val_IOU", patience=PATIENCE, mode="max")

reduce_lr = ReduceLROnPlateau(monitor="val_IOU", factor=0.2, patience=5, min_lr=1e-7, verbose=1, mode="max")

Now, let's finally intialize our data generators and train the model.

In [14]:
train_datagen = DataGenerator(TRAIN_CSV)
validation_datagen = ValDataGenerator(val_images, val_coords)

In [None]:
nnmob.fit_generator(generator=train_datagen,
                    validation_data=validation_datagen, 
                    epochs=EPOCHS,
                    callbacks=[reduce_lr, stop, checkpoint],
                    workers=THREADS,
                    use_multiprocessing=MULTI_PROCESSING,
                    shuffle=True,
                    verbose=1)

Epoch 1/500

Epoch 00001: val_IOU improved from -inf to 0.59068, saving model to nasnet-0.59.h5
Epoch 2/500

Epoch 00002: val_IOU did not improve from 0.59068
Epoch 3/500

Epoch 00003: val_IOU did not improve from 0.59068
Epoch 4/500

Epoch 00002: val_IOU did not improve from 0.59068
Epoch 3/500

Epoch 00004: val_IOU did not improve from 0.59068
Epoch 5/500

Epoch 00005: val_IOU did not improve from 0.59068
Epoch 6/500

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Epoch 00006: val_IOU did not improve from 0.59068
Epoch 7/500

Epoch 00007: val_IOU did not improve from 0.59068
Epoch 8/500

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.

Epoch 00008: val_IOU did not improve from 0.59068
Epoch 9/500

After training, we need to load weights from the best validation IoU file we got, and then make predictions on the test data set.

In [None]:
## Since the models will take a huge amount of time to train, you can download the trained weights from 
## this Google Drive folder into the directory
## https://drive.google.com/drive/folders/1rOVkxqHaCKjamji7c3XfdomvMbqksgU6?usp=sharing

WEIGHTS_FILE = "nasnet-0.90.h5"

nnmob.load_weights(WEIGHTS_FILE)

In [None]:
test = pd.read_csv("test.csv")
test.head()

In [None]:
for index, row in test.iterrows():
    unscaled = cv2.imread(DATA_DIR+row['image_name'])
    image_height, image_width, _ = unscaled.shape

    image = cv2.resize(unscaled, (IMAGE_SIZE, IMAGE_SIZE))
    feat_scaled = preprocess_input(np.array(image, dtype=np.float32))

    region = nnmob.predict(x=np.array([feat_scaled]))[0]

    x1 = (region[0] * image_width / IMAGE_SIZE)
    y1 = (region[1] * image_height / IMAGE_SIZE)

    x2 = ((region[0] + region[2]) * image_width / IMAGE_SIZE)
    y2 = ((region[1] + region[3]) * image_height / IMAGE_SIZE)

    test.iloc[index,1] = x1
    test.iloc[index,2] = x2
    test.iloc[index,3] = y1
    test.iloc[index,4] = y2
    
    if index%1000==0:
        print(index)

test.head()

In [None]:
test.to_csv('nnmob-prediction.csv', index=False)

Since we also require predictions on the complete training and validation set, we will also create and save CSV files for both.

In [None]:
train = pd.read_csv("training_set.csv")

for index, row in train.iterrows():
    unscaled = cv2.imread(DATA_DIR+row['image_name'])
    image_height, image_width, _ = unscaled.shape

    image = cv2.resize(unscaled, (IMAGE_SIZE, IMAGE_SIZE))
    feat_scaled = preprocess_input(np.array(image, dtype=np.float32))

    region = nnmob.predict(x=np.array([feat_scaled]))[0]

    x1 = (region[0] * image_width / IMAGE_SIZE)
    y1 = (region[1] * image_height / IMAGE_SIZE)

    x2 = ((region[0] + region[2]) * image_width / IMAGE_SIZE)
    y2 = ((region[1] + region[3]) * image_height / IMAGE_SIZE)

    train.iloc[index,1] = x1
    train.iloc[index,2] = x2
    train.iloc[index,3] = y1
    train.iloc[index,4] = y2
    
    if index%1000==0:
        print(index)
    
train.to_csv('nnmob-training.csv', index=False)