In [1]:
import numpy as np
from keras import layers
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Flatten
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import ZeroPadding2D
from keras.layers import AveragePooling2D
from keras.layers import GlobalAveragePooling2D
from keras.layers import BatchNormalization
from keras.layers import SeparableConv2D
from keras.models import Model
import keras.preprocessing.image
import keras.backend as K

from keras.layers import Reshape
from keras.layers import Conv2DTranspose
from keras.layers.advanced_activations import LeakyReLU

import os
import sys
import glob
import tensorflow as tf
from keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
import keras.backend.tensorflow_backend as KTF
from keras import optimizers
import datetime
import re
import math
import pandas as pd
import json
import keras.optimizers


import cv2
import imgaug as ia
from imgaug import augmenters as iaa
import logging
import skimage.io
import skimage.color
import skimage.transform
from keras.utils import Sequence, to_categorical
import datetime

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
CLASS_COUNT = 285
ATTR_DIM = 30
INPUT_DIM = 64
WORD_DIM = 300
OUTPUT_DIM = 512

BATCH_SIZE_TRAIN = 64
FILE_TAG = '180920_all'
EPOCH_TOTAL= 20000
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

In [3]:
def precision(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def recall(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def preprocess_input(x):
    x /= 255.
    x *= 2.
    x -= 1
    return x

In [4]:
df_label2attr = pd.read_csv("./data/DatasetB_20180919/attributes_per_class.txt", sep='\t', header=None)
g_label2index = {label:index for index, label in enumerate(sorted(list(df_label2attr[0])))}
g_index2label = {index:label for label, index in g_label2index.items()}
g_attr_vec = df_label2attr.sort_index(axis=0).iloc[:,1:].values.astype('float32')

In [5]:
df_label2name = pd.read_csv("./data/DatasetB_20180919/label_list.txt", sep='\t', header=None)

In [6]:
df_name2label_dict = {df_label2name.iloc[i,1]: df_label2name.iloc[i,0] for i in range(df_label2name.shape[0])}

In [7]:
df_name2word = pd.read_csv("./data/DatasetB_20180919/class_wordembeddings.txt", sep=' ', header=None)
for i in range(df_name2word.shape[0]):
    df_name2word.iloc[i,0] = df_name2label_dict[df_name2word.iloc[i,0]]

In [8]:
g_word_vec = df_name2word.sort_index(axis=0).iloc[:,1:].values.astype('float32')

In [9]:
import imgaug as ia
from imgaug import augmenters as iaa
import skimage.io
import skimage.color
from keras.utils import Sequence, to_categorical

def load_image(imgfile, augment):
    #img = cv2.imread(imgfile)
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    #img = cv2.resize(img, (1280, 960))
    img = skimage.io.imread(imgfile)
    if img.ndim != 3:
        img = skimage.color.gray2rgb(img)
    if augment:
        seq = iaa.Sequential([
                iaa.Affine(
                    shear=(-10,10),
                    rotate=(-10,10),
                    scale={"x": (0.9, 1.1), "y": (0.9, 1.1)}),
                iaa.Fliplr(0.5),
                iaa.Flipud(0.5),
            ], random_order=True)
        seq_det = seq.to_deterministic()
        images_aug = seq_det.augment_images([img])
        img = images_aug[0]

    return img

class FileSequence(Sequence):
    def __init__(self, filedict, batch_size, augment):
        self.filedict = filedict
        self.batch_size = batch_size
        self.augment = augment
        self.samples = np.arange(len(self.filedict))
        self.on_epoch_end()
        self.block_size = int(np.ceil(len(self.samples)/float(self.batch_size)))

    def epoch_samples(self):
        np.random.shuffle(self.samples)

    def on_epoch_end(self):
        self.epoch_samples()

    def __len__(self):
        return self.block_size

    def __getitem__(self, idx):
        idx = idx % self.block_size
        batch = self.samples[idx*self.batch_size: (idx+1)*self.batch_size]
        batch_images = []
        batch_labels = []
        batch_attrs = []
        batch_words = []
        for index in batch:
            img  = load_image(self.filedict[index], self.augment)
            classids = g_label2index[g_file2label[os.path.basename(self.filedict[index])]]
            batch_images.append(img)
            #label = np.zeros(OUTPUT_DIM, dtype=np.float32)
            #label[classids] = 1
            batch_labels.append(to_categorical(classids, CLASS_COUNT))
            batch_attrs.append(g_attr_vec)
            batch_words.append(g_word_vec)
        batch_images = np.array(batch_images)
        batch_attrs = np.array(batch_attrs)
        batch_words = np.array(batch_words)
 
        if self.augment:
            seq = iaa.Sequential([
                iaa.Sometimes(0.5,
                    iaa.GaussianBlur(sigma=(0, 0.5))
                ),
                iaa.ContrastNormalization((0.75, 1.5)),
                iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
                iaa.Multiply((0.8, 1.2), per_channel=0.2),
            ])
            batch_images = seq.augment_images(batch_images)
        batch_images = preprocess_input(batch_images.astype(np.float32))
        batch_labels = np.array(batch_labels)
        return [batch_images, batch_attrs, batch_words], [batch_labels, batch_labels, batch_labels, batch_labels]


In [10]:
def detect_model():
    x_in = Input(shape=(INPUT_DIM, INPUT_DIM, 3))
    x = x_in
    
    x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(x)
    x = BatchNormalization(name='block1_conv1_bn')(x)
    x = Activation('relu', name='block1_conv1_act')(x)
    x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
    x = BatchNormalization(name='block1_conv2_bn')(x)
    x = Activation('relu', name='block1_conv2_act')(x)

    residual = Conv2D(128, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
    x = BatchNormalization(name='block2_sepconv1_bn')(x)
    x = Activation('relu', name='block2_sepconv2_act')(x)
    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
    x = BatchNormalization(name='block2_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(256, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block3_sepconv1_act')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x)
    x = BatchNormalization(name='block3_sepconv1_bn')(x)
    x = Activation('relu', name='block3_sepconv2_act')(x)
    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x)
    x = BatchNormalization(name='block3_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x)
    x = layers.add([x, residual])

    residual = Conv2D(728, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block4_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x)
    x = BatchNormalization(name='block4_sepconv1_bn')(x)
    x = Activation('relu', name='block4_sepconv2_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x)
    x = BatchNormalization(name='block4_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x)
    x = layers.add([x, residual])

    for i in range(8):
        residual = x
        prefix = 'block' + str(i + 5)

        x = Activation('relu', name=prefix + '_sepconv1_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x)
        x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv2_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x)
        x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
        x = Activation('relu', name=prefix + '_sepconv3_act')(x)
        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x)
        x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)

        x = layers.add([x, residual])

    residual = Conv2D(1024, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = Activation('relu', name='block13_sepconv1_act')(x)
    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x)
    x = BatchNormalization(name='block13_sepconv1_bn')(x)
    x = Activation('relu', name='block13_sepconv2_act')(x)
    x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x)
    x = BatchNormalization(name='block13_sepconv2_bn')(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x)
    x = layers.add([x, residual])

    x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x)
    x = BatchNormalization(name='block14_sepconv1_bn')(x)
    x = Activation('relu', name='block14_sepconv1_act')(x)

    x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x)
    x = BatchNormalization(name='block14_sepconv2_bn')(x)
    x = Activation('relu', name='block14_sepconv2_act')(x)
  
    x = GlobalAveragePooling2D()(x)

    x = Dense(OUTPUT_DIM, activation='relu')(x)

    x_out = x
    model = Model(inputs=x_in, outputs=x_out)
    return model

In [11]:
base_model = detect_model()
print(base_model.summary())

Instructions for updating:
keep_dims is deprecated, use keepdims instead
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 31, 31, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 31, 31, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 31, 31, 32)   0           block1_conv1_bn[0][0]            
____________________________________

In [12]:
def classfy_model():
    attr_input = Input(shape=(CLASS_COUNT, ATTR_DIM))
    word_input = Input(shape=(CLASS_COUNT, WORD_DIM))
    x_in = base_model.output
    
    x_attr = layers.Dense(ATTR_DIM, activation='sigmoid', name='attr_000')(x_in)
    x_word = layers.Dense(WORD_DIM, activation='tanh', name='word_000')(x_in)
    #attr_in = layers.Lambda(lambda s: s[0])(attr_input)
    #attr_in = K.transpose(attr_in)
    #x = layers.multiply([x_in, attr_in])
    attr_in = layers.Lambda(lambda x:x[:,0,:])(attr_input)
    y_attr = layers.Dot(axes=1, normalize=False)([x_attr, attr_in])

    word_in = layers.Lambda(lambda x:x[:,0,:])(word_input)
    y_word = layers.Dot(axes=1, normalize=False)([x_word, word_in])
    
    for i in range(1, CLASS_COUNT):
        attr_in = layers.Lambda(lambda x:x[:,i,:])(attr_input)
        x = layers.Dot(axes=1, normalize=False)([x_attr, attr_in])
        y_attr = layers.Concatenate()([y_attr,x])

        word_in = layers.Lambda(lambda x:x[:,i,:])(word_input)
        x = layers.Dot(axes=1, normalize=False)([x_word, word_in])
        y_word = layers.Concatenate()([y_word,x])     
        
    x_baseout = layers.Dense(CLASS_COUNT, activation='softmax', name='base_out')(x_in)
    x_attrout = layers.Dense(CLASS_COUNT, activation='softmax', name='attr_out')(y_attr)
    x_wordout = layers.Dense(CLASS_COUNT, activation='softmax', name='word_out')(y_word)
    
    x_out = layers.Add()([x_baseout, x_attrout, x_wordout])
    x_out = layers.Activation('softmax', name='out')(x_out)
    #x_recon = dec_model(x_in)
    #x_recon = layers.Lambda(lambda s : s*2.0-1.0, name='recon_out')(x_recon)
    #x_recon = layers.Lambda(lambda s : s, name='recon_out')(x_recon)

    #model = Model(inputs=[base_model.input, attr_input, word_input], outputs=x_clsfy)
    model = Model(inputs=[base_model.input, attr_input, word_input], outputs=[x_baseout, x_attrout, x_wordout, x_out])
    return model

In [13]:
model = classfy_model()
print(model.summary())

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 31, 31, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 31, 31, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 31, 31, 32)   0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

dot_187 (Dot)                   (None, 1)            0           attr_000[0][0]                   
                                                                 lambda_187[0][0]                 
__________________________________________________________________________________________________
lambda_189 (Lambda)             (None, 30)           0           input_2[0][0]                    
__________________________________________________________________________________________________
concatenate_184 (Concatenate)   (None, 93)           0           concatenate_182[0][0]            
                                                                 dot_186[0][0]                    
__________________________________________________________________________________________________
dot_188 (Dot)                   (None, 1)            0           word_000[0][0]                   
                                                                 lambda_188[0][0]                 
__________

__________________________________________________________________________________________________
dot_477 (Dot)                   (None, 1)            0           attr_000[0][0]                   
                                                                 lambda_477[0][0]                 
__________________________________________________________________________________________________
lambda_479 (Lambda)             (None, 30)           0           input_2[0][0]                    
__________________________________________________________________________________________________
concatenate_474 (Concatenate)   (None, 238)          0           concatenate_472[0][0]            
                                                                 dot_476[0][0]                    
__________________________________________________________________________________________________
dot_478 (Dot)                   (None, 1)            0           word_000[0][0]                   
          

In [14]:
df_trainlabels = pd.read_csv("./data/DatasetB_20180919/train.txt", sep='\t', header=None)
g_file2label = {df_trainlabels.iloc[i,0]: df_trainlabels.iloc[i,1] for i in range(df_trainlabels.shape[0])}
imgs = glob.glob('data/DatasetB_20180919/train/*.jpeg')

In [15]:
train_generator = FileSequence(imgs, BATCH_SIZE_TRAIN, augment=True)

In [16]:
modelfiles = sorted(glob.glob('./output/{}*/*.h5'.format(FILE_TAG)))
init_epoch = 0
log_path = os.path.join('./output/{}-{:%Y%m%dT%H%M}'.format(FILE_TAG, datetime.datetime.now()))
if modelfiles:
    modelfile = modelfiles[-1]
    log_path = os.path.dirname(modelfile)
    model.load_weights(modelfile)
    filename = os.path.splitext(os.path.basename(modelfile))[0]
    init_epoch = int(filename.split('_')[-1])
checkout_file = os.path.join(log_path, "{}_*epoch*.h5".format(FILE_TAG))
checkout_file = checkout_file.replace("*epoch*", "{epoch:04d}")#-{val_acc:.2f}"

In [17]:
optimizer = keras.optimizers.SGD(lr=0.0125, momentum=0.9, decay=1e-6, nesterov=False)
#model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc', precision, recall])
#model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc', precision, recall])
model.compile(
    optimizer='rmsprop',
    loss={
        'base_out':'categorical_crossentropy',
        'attr_out':'categorical_crossentropy',
        'word_out':'categorical_crossentropy',
        'out':'categorical_crossentropy',
    },
    loss_weights={
        'base_out': 0.5,
        'attr_out': 0.5,
        'word_out': 0.5,
        'out': 1.,
    },
    metrics=['acc', precision, recall]
)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [18]:
callbacks = []
callbacks.append(TensorBoard(log_dir=log_path, write_images=False))
callbacks.append(ModelCheckpoint(checkout_file, monitor='out_acc', save_weights_only=True, save_best_only=True))

In [19]:
model.load_weights('output/180920_all-20180920T1039/180920_all_0136.h5')

testimgs = glob.glob("output/180920_all-20180920T1039/*.jpg")

test_results = []
attrs = np.expand_dims(g_attr_vec, axis=0)
words = np.expand_dims(g_word_vec, axis=0)
i = 0
for f in testimgs:
    fname = os.path.basename(f)
    imgdata = load_image(f, augment=False)
    imgdata = np.array(imgdata,dtype=np.float32)
    imgdata = preprocess_input(imgdata)
    imgdata = np.expand_dims(imgdata, axis=0)

    preds_base, preds_attr, preds_word, preds_out = model.predict([imgdata, attrs, words])
    index = np.argmax(preds_out[0])
    result = [fname, g_index2label[index]]
    test_results.append(result)
    i = i+1
    print("{}/{}".format(i, len(testimgs)))

import datetime
df_result = pd.DataFrame(test_results)
dtime = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
fname = 'submit-'+ dtime + '.txt'
df_result.to_csv(fname, index=False, sep='\t', header=None)

KeyboardInterrupt: 