In [1]:
%config IPCompleter.greedy=True

In [2]:
import cv2
import numpy as np 

import pandas as pd
import tensorflow as tf

from efficientnet.keras import *
from classification_models.keras import Classifiers

import keras
from keras import backend as K
from keras import Input
from keras.models import Model
from keras.utils import *
from keras.layers import *
from keras.losses import categorical_crossentropy

from albumentations import *
from albumentations.core.transforms_interface import DualTransform
from albumentations.augmentations import functional as F

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, KFold

from tensorflow import set_random_seed
from tqdm import tnrange, tqdm_notebook
import matplotlib.pyplot as plt

set_random_seed(2020)
np.random.seed(2020)

import os
import gc

Using TensorFlow backend.


In [3]:
class GridMask(DualTransform):
    def __init__(self, num_grid=3, fill_value=0, rotate=0, mode=0, always_apply=False, p=0.5):
        super(GridMask, self).__init__(always_apply, p)
        if isinstance(num_grid, int):
            num_grid = (num_grid, num_grid)
        if isinstance(rotate, int):
            rotate = (-rotate, rotate)
        self.num_grid = num_grid
        self.fill_value = fill_value
        self.rotate = rotate
        self.mode = mode
        self.masks = None
        self.rand_h_max = []
        self.rand_w_max = []

    def init_masks(self, height, width):
        if self.masks is None:
            self.masks = []
            n_masks = self.num_grid[1] - self.num_grid[0] + 1
            for n, n_g in enumerate(range(self.num_grid[0], self.num_grid[1] + 1, 1)):
                grid_h = height / n_g
                grid_w = width / n_g
                this_mask = np.ones((int((n_g + 1) * grid_h), int((n_g + 1) * grid_w))).astype(np.uint8)
                for i in range(n_g + 1):
                    for j in range(n_g + 1):
                        this_mask[
                             int(i * grid_h) : int(i * grid_h + grid_h / 2),
                             int(j * grid_w) : int(j * grid_w + grid_w / 2)
                        ] = self.fill_value
                        if self.mode == 2:
                            this_mask[
                                 int(i * grid_h + grid_h / 2) : int(i * grid_h + grid_h),
                                 int(j * grid_w + grid_w / 2) : int(j * grid_w + grid_w)
                            ] = self.fill_value
                
                if self.mode == 1:
                    this_mask = 1 - this_mask

                self.masks.append(this_mask)
                self.rand_h_max.append(grid_h)
                self.rand_w_max.append(grid_w)

    def apply(self, image, mask, rand_h, rand_w, angle, **params):
        h, w = image.shape[:2]
        mask = F.rotate(mask, angle) if self.rotate[1] > 0 else mask
        mask = mask[:,:,np.newaxis] if image.ndim == 3 else mask
        image *= mask[rand_h:rand_h+h, rand_w:rand_w+w].astype(image.dtype)
        return image

    def get_params_dependent_on_targets(self, params):
        img = params['image']
        height, width = img.shape[:2]
        self.init_masks(height, width)

        mid = np.random.randint(len(self.masks))
        mask = self.masks[mid]
        rand_h = np.random.randint(self.rand_h_max[mid])
        rand_w = np.random.randint(self.rand_w_max[mid])
        angle = np.random.randint(self.rotate[0], self.rotate[1]) if self.rotate[1] > 0 else 0

        return {'mask': mask, 'rand_h': rand_h, 'rand_w': rand_w, 'angle': angle}

    @property
    def targets_as_params(self):
        return ['image']

    def get_transform_init_args_names(self):
        return ('num_grid', 'fill_value', 'rotate', 'mode')

In [4]:
'''
    TRAINING CONFIG
'''
IMG_SIZE = (128, 128, 1)
BATCH_SIZE = 64
IMAGE_DATA = '../data/train_images_128x128_raw'
data_name = '128x128_raw'
test_size=0.15

model_name = 'densenet169'
weights = None
pretrained_weights = '../data/model_weights/densenet169/densenet169_0.9347_128x128_raw.h5'

initial_epoch = 7
initial_lr = 0.001
min_lr = 0.000001
no_of_epochs = 100
epochs_per_cycle = 100

df = pd.read_csv('../data/train.csv')

train_gen = Compose([
                    ShiftScaleRotate(shift_limit=0.1,
                                     scale_limit=0.1,
                                     rotate_limit=20,
                                     border_mode=cv2.BORDER_CONSTANT,
                                     value=0),
                    Cutout(num_holes=1, 
                           max_h_size=IMG_SIZE[0] // 2, 
                           max_w_size=IMG_SIZE[1] // 2)
            ])
val_gen = None

In [5]:
def build_model():
    base_model = 0
    if 'efficientnet' not in model_name:
        M, _ = Classifiers.get(model_name)
        base_model = M(weights=weights, include_top=False, input_shape=IMG_SIZE)
    else:
        base_model = EfficientNetB0(weights=weights, include_top=False, input_shape=IMG_SIZE)
    x = base_model.output
    x = Dropout(0.2)(x)
    x = GlobalAveragePooling2D()(x)
    o1 = Dense(168, activation='softmax', kernel_initializer='he_normal', name='grapheme')(x)
    o2 = Dense(11, activation='softmax', kernel_initializer='he_normal', name='vowel')(x)
    o3 = Dense(7, activation='softmax', kernel_initializer='he_normal', name='consonant')(x)
    model = Model(inputs=[base_model.input], outputs=[o1,o2,o3])
    if pretrained_weights is not None:
        model.load_weights(pretrained_weights)
    model.compile(optimizer=keras.optimizers.Adam(lr=initial_lr), 
              loss='categorical_crossentropy', 
              metrics=[recall])
    return model
        
def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def scheduler(epoch, lr):
    return min_lr + (initial_lr - min_lr) * (1 + np.cos(np.pi * (epoch % epochs_per_cycle) / epochs_per_cycle)) / 2

def label_smoothing_loss(y_true, y_pred):
    return categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0.2)
    

In [6]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, df, list_id, generator=None, batch_size=32):
        self.data = self.build_data(df)
        self.list_id = list_id
        self.generator = generator
        self.batch_size = batch_size
    
    def build_data(self, df):
        image_id = df['image_id']
        grapheme_root = df['grapheme_root']
        vowel_diacritic = df['vowel_diacritic']
        consonant_diacritic = df['consonant_diacritic']
        data = {}
        for i in range(len(image_id)):
            data[image_id[i]] = [grapheme_root[i], vowel_diacritic[i], consonant_diacritic[i]]
        return data
    
    def get_data(self, image_id):
        image_path = os.path.join(IMAGE_DATA, image_id + '.png')
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = np.expand_dims(image, axis=2)
        label = self.data[image_id]
        return image, label
    
    def __len__(self):
        return int(np.floor(len(self.list_id) / self.batch_size))
    
    def on_epoch_end(self):
        np.random.shuffle(self.list_id)
        
    def __getitem__(self, index):
        list_id = self.list_id[index*self.batch_size:(index+1)*self.batch_size]
        x = np.empty((len(list_id), *IMG_SIZE), dtype=np.uint8)
        y1 = np.zeros((len(list_id), 168), dtype='float32')
        y2 = np.zeros((len(list_id), 11), dtype='float32')
        y3 = np.zeros((len(list_id), 7), dtype='float32')
        for i in range(len(list_id)):
            image_id = list_id[i]
            image, label = self.get_data(image_id)
            if self.generator is not None:
                image = self.generator(image=image)['image']
            x[i] = image
            y1[i, label[0]] = 1
            y2[i, label[1]] = 1
            y3[i, label[2]] = 1

        return x, [y1, y2, y3]

In [7]:
list_id = np.asarray(df['image_id'])
train_list, val_list = train_test_split(list_id, test_size=test_size, random_state=2020)

In [None]:
callbacks = [keras.callbacks.ReduceLROnPlateau(monitor='val_grapheme_recall', 
                                            factor=0.5, 
                                            patience=10, 
                                            verbose=1, 
                                            mode='max', 
                                            min_delta=0.001, 
                                            cooldown=0, 
                                            min_lr=min_lr),
             keras.callbacks.ModelCheckpoint('../data/model_weights/' + model_name + '/' + model_name + '_{val_grapheme_recall:.4f}_' + data_name + '.h5',
                                         monitor='val_grapheme_recall', 
                                         verbose=1, 
                                         save_best_only=True, 
                                         save_weights_only=True, 
                                         mode='max', 
                                         period=1),
            keras.callbacks.CSVLogger('{}_{}.csv'.format(model_name, data_name))]

train_datagen = DataGenerator(df, train_list, generator=train_gen, batch_size=BATCH_SIZE)
valid_datagen = DataGenerator(df, val_list, generator=val_gen, batch_size=BATCH_SIZE)

model = build_model()
model.fit_generator(train_datagen,
                    steps_per_epoch=len(train_datagen),
                    epochs=no_of_epochs,
                    validation_data=valid_datagen,
                    validation_steps=len(valid_datagen),
                    callbacks=callbacks,
                    verbose=2,
                    initial_epoch=initial_epoch)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 8/100
 - 772s - loss: 0.8059 - grapheme_loss: 0.5053 - vowel_loss: 0.1610 - consonant_loss: 0.1396 - grapheme_recall: 0.8265 - vowel_recall: 0.9375 - consonant_recall: 0.9505 - val_loss: 0.3753 - val_grapheme_loss: 0.2603 - val_vowel_loss: 0.0588 - val_consonant_loss: 0.0562 - val_grapheme_recall: 0.9187 - val_vowel_recall: 0.9838 - val_consonant_recall: 0.9834

Epoch 00008: val_grapheme_recall improved from -inf to 0.91868, saving model to ../data/model_weights/densenet169/densenet169_0.9187_128x128_raw.h5
Epoch 9/100
 - 730s - loss: 0.7656 - grapheme_loss: 0.4778 - vowel_loss: 0.1558 - consonant_loss: 0.1320 - grapheme_recall: 0.8341 - vowel_recall: 0.9393 - consonant_recall: 0.9528 - val_loss: 0.2930 - val_grapheme_loss: 0.1847 - val_vowel_loss: 

 - 731s - loss: 0.5300 - grapheme_loss: 0.3309 - vowel_loss: 0.1077 - consonant_loss: 0.0914 - grapheme_recall: 0.8822 - vowel_recall: 0.9566 - consonant_recall: 0.9665 - val_loss: 0.1952 - val_grapheme_loss: 0.1212 - val_vowel_loss: 0.0346 - val_consonant_loss: 0.0394 - val_grapheme_recall: 0.9639 - val_vowel_recall: 0.9911 - val_consonant_recall: 0.9894

Epoch 00023: val_grapheme_recall did not improve from 0.96423
Epoch 24/100
 - 731s - loss: 0.5173 - grapheme_loss: 0.3209 - vowel_loss: 0.1061 - consonant_loss: 0.0903 - grapheme_recall: 0.8859 - vowel_recall: 0.9571 - consonant_recall: 0.9670 - val_loss: 0.1919 - val_grapheme_loss: 0.1156 - val_vowel_loss: 0.0367 - val_consonant_loss: 0.0396 - val_grapheme_recall: 0.9658 - val_vowel_recall: 0.9903 - val_consonant_recall: 0.9891

Epoch 00024: val_grapheme_recall improved from 0.96423 to 0.96579, saving model to ../data/model_weights/densenet169/densenet169_0.9658_128x128_raw.h5
Epoch 25/100
 - 732s - loss: 0.5163 - grapheme_loss: 0.3