TF serving demo:
* train a keras model with multiple outputs
* serve the model
    * compare timing with in-memory inference

In [237]:
# 1. Store a text file for train generator

In [238]:
import os
import numpy as np
import pandas as pd

def flatten(l): return [item for sublist in l for item in sublist]

In [239]:
# Read dataframe
df_train = pd.read_csv("./data/train_v2.csv")

# Make label maps
labels = sorted(list(set(flatten([l.split(' ') for l in df_train['tags'].values]))))

weather_labels = ['clear', 'cloudy', 'haze', 'partly_cloudy']
ground_labels = [l for l in labels if l not in weather_labels]

label_map = {l:i for i, l in enumerate(labels)}
wlabel_map = {l: i for i, l in enumerate(weather_labels)}
glabel_map = {l: i for i, l in enumerate(ground_labels)}

def get_labels_binary(s, labelmap):
    labels = np.zeros(len(labelmap), dtype=np.int64)
    idx = [v for v in [labelmap[w] for w in s.split(' ')]]
    labels[idx] = 1
    return labels

def array_to_str(arr):
    return(str(arr.tolist()))

df_train['label'] = df_train['tags'].apply(get_labels_binary, args=(label_map,))

In [240]:
# Map everything to strings
df_train['label']  = df_train['label'].map(array_to_str)

In [241]:
# Save as text file
df_train.drop('tags', axis=1).to_csv('./data/TRAIN_kaggle.csv', index=None)

In [242]:
pd.read_csv('./data/TRAIN_kaggle.csv').head()

Unnamed: 0,image_name,label
0,train_0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, ..."
1,train_1,"[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
2,train_2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
3,train_3,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4,train_4,"[1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, ..."


In [243]:
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input
import ast 

def randomHorizontalFlip(image, p=0.5):
    """Do a random horizontal flip with probability p"""
    if np.random.random() < p:
        image = np.fliplr(image)
    return image


def randomVerticalFlip(image, p=0.5):
    """Do a random vertical flip with probability p"""
    if np.random.random() < p:
        image = np.flipud(image)
    return image

class DataSet(object):
    """Custom generator"""

    def __init__(self, file_path, data_path, im_size, batch_size, shuffle, mode='train'):
        self.fp = file_path  # path to training file
        self.dp = data_path # path to raw images
        self.imsz = im_size
        self.bsz = batch_size
        self.shuf = shuffle
        self.mode = mode
        
        self.df = pd.read_csv(self.fp)
    
    def __len__(self):
        return len(self.df)
    
    def get_instance_indexes(self):
        indexes = list(self.df.index)
        if self.shuf:
            np.random.shuffle(indexes)
        return indexes

    def get_batch_features(self, indexes):
        batch_features = np.zeros((len(indexes), self.imsz, self.imsz, 3))

        # Fill up container
        for i, ix in enumerate(indexes):

            im = load_img(os.path.join(self.dp, self.df['image_name'][ix] + '.jpg'),
                          target_size=(self.imsz, self.imsz))
            im = img_to_array(im)
            im = im / 255.

            if False:
                im = randomHorizontalFlip(im)
                im = randomVerticalFlip(im)

            batch_features[i] = im

        return batch_features

    def get_batch_labels(self, indexes):
        if self.mode == 'test':
            return None
        else:
            # Empty containers for labels
            labels = np.zeros((len(indexes), 17))
            # Fill up container
            for i, ix in enumerate(indexes):
                labels[i] = np.array(ast.literal_eval(self.df['label'][ix]), dtype=int)

            return labels
        
    def generate(self, mode='train'):
        self.mode = mode
        while True:
            indexes = self.get_instance_indexes()
            num_batches = int(np.ceil(len(self.df) / self.bsz))
            for i in range(num_batches):
                if i == (num_batches - 1):
                    batch_indexes = indexes[i * self.bsz:]
                else:
                    batch_indexes = indexes[i * self.bsz:(i + 1) * self.bsz]

                X = self.get_batch_features(batch_indexes)
                y = self.get_batch_labels(batch_indexes)
                yield (X, y)

In [244]:
# 3. Train a model
import tensorflow as tf
import numpy as np
from keras.layers import Input, Dense, Lambda, Flatten, Reshape, Layer, Concatenate, Add, Subtract
from keras.layers import BatchNormalization, Dropout, Activation
from keras.layers import MaxPooling2D
from keras.layers import Conv2D, Conv2DTranspose, Reshape, Multiply, Dot
from keras.layers import BatchNormalization
from keras.engine.topology import Layer
from keras.models import Model
from keras.applications import VGG16, ResNet50
import keras.backend as K
from keras import metrics
        
class CNN_classifier(object):

    def __init__(self, im_size,  n_labels):
        """
        CNN for multi-label image classification with binary relevance
        """
        
        self.im_size = im_size
        self.n_labels = n_labels
        self.dropout_rate = 0.15
        self.n_neurons = 128  # Number of neurons in dense layers
        # build model on init
        self.build()

    def build(self):
        # Define input
        self.x = Input(shape=(self.im_size, self.im_size, 3))

        # Convolutional layers
        conv_1 = Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu')(self.x)
        conv_1 = MaxPooling2D(padding='same')(conv_1)
        conv_2 = Conv2D(32, kernel_size=(3, 3),
                        padding='same', activation='relu')(conv_1)
        conv_2 = MaxPooling2D(padding='same')(conv_2)

        # Flatten
        conv_flat = Flatten()(conv_2)
        # Fully connected layers
        fc_1 = Dense(self.n_neurons, activation='relu')(conv_flat)
        fc_1 = Dropout(self.dropout_rate)(fc_1)
        fc_2 = Dense(self.n_neurons, activation='relu')(fc_1)
        self.fc_2 = Dropout(self.dropout_rate)(fc_2)

        # Output layers: n_classes output nodes for binary relevance
        self.y = Dense(self.n_labels, activation='sigmoid')(self.fc_2)

        self.model = Model(inputs=self.x, outputs=self.y)

In [245]:
model = CNN_classifier(64, 17).model

model.compile(loss='binary_crossentropy', optimizer='adam')
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_20 (InputLayer)        (None, 64, 64, 3)         0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 64, 64, 32)        896       
_________________________________________________________________
max_pooling2d_39 (MaxPooling (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_40 (Conv2D)           (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_40 (MaxPooling (None, 16, 16, 32)        0         
_________________________________________________________________
flatten_20 (Flatten)         (None, 8192)              0         
_________________________________________________________________
dense_58 (Dense)             (None, 128)               1048704   
__________

In [246]:
batch_size = 32
ds = DataSet('./data/TRAIN_kaggle.csv', './data/train/', im_size=64, batch_size=batch_size, shuffle=True)
generator = ds.generate()

In [248]:
steps = int(np.ceil(len(ds) / batch_size))
model.fit_generator(generator=generator, steps_per_epoch=steps, verbose=1, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
  47/1265 [>.............................] - ETA: 3:52 - loss: 0.1261

KeyboardInterrupt: 

In [249]:
# Predict and verify accuracy
steps = int(np.ceil(len(ds) / batch_size))
predictions  = model.predict_generator(generator=ds.generate(mode='test'), verbose=1, steps=steps)

y_true = np.array([np.argmax(v) for v in df_train['label'].values])
y_hat = np.argmax(predictions, axis=1)

  51/1265 [>.............................] - ETA: 2:45

KeyboardInterrupt: 

In [153]:
def weather_accuracy(y_true, y_hat):
    return np.mean(y_true == y_hat)
    
w_acc = weather_accuracy(y_true, y_hat)
print(f'Accuracy for weather labels: {w_acc}')

Accuracy for weather labels: 0.0


  
