TF serving demo:
* train a keras model with multiple outputs
* serve the model
    * compare timing with in-memory inference

In [1]:
# 1. Store a text file for train generator

In [38]:
IM_SIZE = 64

In [39]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

def flatten(l): return [item for sublist in l for item in sublist]

In [40]:
# Read dataframe
df_train = pd.read_csv("./data/train_v2.csv")

# Make label maps
labels = sorted(list(set(flatten([l.split(' ') for l in df_train['tags'].values]))))

weather_labels = ['clear', 'cloudy', 'haze', 'partly_cloudy']
ground_labels = [l for l in labels if l not in weather_labels]

label_map = {l:i for i, l in enumerate(labels)}
wlabel_map = {l: i for i, l in enumerate(weather_labels)}
glabel_map = {l: i for i, l in enumerate(ground_labels)}

def get_labels_binary(s, labelmap):
    labels = np.zeros(len(labelmap), dtype=np.int64)
    idx = [v for v in [labelmap[w] for w in s.split(' ')]]
    labels[idx] = 1
    return labels

def array_to_str(arr):
    return(str(arr.tolist()))

df_train['label'] = df_train['tags'].apply(get_labels_binary, args=(label_map,))

# Map everything to strings
df_train['label']  = df_train['label'].map(array_to_str)

# Save as text file
df_train.drop('tags', axis=1).to_csv('./data/TRAIN_kaggle.csv', index=None)

pd.read_csv('./data/TRAIN_kaggle.csv').head()

Unnamed: 0,image_name,label
0,train_0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, ..."
1,train_1,"[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
2,train_2,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
3,train_3,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4,train_4,"[1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, ..."


In [41]:
# 3. Train a model
import tensorflow as tf
import numpy as np
from keras.layers import Input, Dense, Lambda, Flatten, Reshape, Layer, Concatenate, Add, Subtract
from keras.layers import BatchNormalization, Dropout, Activation
from keras.layers import MaxPooling2D
from keras.layers import Conv2D, Conv2DTranspose, Reshape, Multiply, Dot
from keras.layers import BatchNormalization
from keras.engine.topology import Layer
from keras.models import Model
from keras.applications import VGG16, ResNet50
import keras.backend as K
from keras import metrics
        
class CNN_classifier(object):

    def __init__(self, im_size,  n_labels):
        """
        CNN for multi-label image classification with binary relevance
        """
        
        self.im_size = im_size
        self.n_labels = n_labels
        self.dropout_rate = 0.15
        self.n_neurons = 128  # Number of neurons in dense layers
        # build model on init
        self.build()

    def build(self):
        # Define input
        self.x = Input(shape=(self.im_size, self.im_size, 3))

        # Convolutional layers
        conv_1 = Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu')(self.x)
        conv_1 = MaxPooling2D(padding='same')(conv_1)
        conv_2 = Conv2D(32, kernel_size=(3, 3),
                        padding='same', activation='relu')(conv_1)
        conv_2 = MaxPooling2D(padding='same')(conv_2)

        # Flatten
        conv_flat = Flatten()(conv_2)
        # Fully connected layers
        fc_1 = Dense(self.n_neurons, activation='relu')(conv_flat)
        fc_1 = Dropout(self.dropout_rate)(fc_1)
        fc_2 = Dense(self.n_neurons, activation='relu')(fc_1)
        self.fc_2 = Dropout(self.dropout_rate)(fc_2)

        # Output layers: n_classes output nodes for binary relevance
        self.y = Dense(self.n_labels, activation='sigmoid')(self.fc_2)

        self.model = Model(inputs=self.x, outputs=self.y)

In [42]:
model = CNN_classifier(IM_SIZE, 17).model

model.compile(loss='binary_crossentropy', optimizer='adam')
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 64, 32)        896       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 32)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               1048704   
__________

In [59]:
import os
import ast
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.utils import Sequence


class KagglePlanetSequence(Sequence):
    """
    """
    def __init__(self, file_path, data_path, im_size, batch_size, mode='train'):
        self.df = pd.read_csv(file_path)
        self.dp = data_path
        self.imsz = im_size
        self.bsz = batch_size
        self.mode = mode

    # Take labels and list of image locations in memory
        self.labels = np.array([ast.literal_eval(v) for v in self.df['label'].values])
        self.im_list = [os.path.join(self.dp, im + '.jpg') for im in self.df['image_name'].values]

    def __len__(self):
        return len(self.df) // self.bsz

    def get_batch_labels(self, idx):
        if idx == self.__len__() / self.bsz:
            return self.labels[idx * self.bsz:]
        return self.labels[idx * self.bsz: (idx + 1) * self.bsz]

    def get_batch_features(self, idx):
        if idx == self.__len__() / self.bsz:
            return [img_to_array(load_img(im, target_size=(self.imsz, self.imsz))) / 255. for im in self.im_list[idx * self.bsz:]]
        return np.array([img_to_array(load_img(im, target_size=(self.imsz, self.imsz))) / 255. for im in self.im_list[idx * self.bsz: (1 + idx) * self.bsz]])

    def __getitem__(self, idx):
        batch_x = self.get_batch_features(idx)
        batch_y = self.get_batch_labels(idx)
        return batch_x, batch_y

In [60]:
batch_size = 32
seq = KagglePlanetSequence('./data/TRAIN_kaggle.csv', './data/train/', im_size=IM_SIZE, batch_size=batch_size)

In [62]:
model.fit_generator(seq, verbose=1, epochs=5, use_multiprocessing=True)

Epoch 1/5

Process ForkPoolWorker-2:
Traceback (most recent call last):
  File "/home/stijn/miniconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/stijn/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/stijn/miniconda3/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/stijn/venvs/sbx3/lib/python3.6/site-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]
  File "<ipython-input-59-7ccfd5f10a1c>", line 40, in __getitem__
    batch_x = self.get_batch_features(idx)
  File "<ipython-input-59-7ccfd5f10a1c>", line 37, in get_batch_features
    return np.array([img_to_array(load_img(im, target_size=(self.imsz, self.imsz))) / 255. for im in self.im_list[idx * self.bsz: (1 + idx) * self.bsz]])
  File "<ipython-input-59-7ccfd5f10a1c>", line 37, in <listcomp>
    return

Epoch 1/5


KeyboardInterrupt: 

In [None]:
# Predict and verify accuracy
steps = int(np.ceil(len(ds) / batch_size))
predictions  = model.predict_generator(generator=ds.generate(mode='test'), verbose=1, steps=steps)

y_true = np.array([np.argmax(v) for v in df_train['label'].values])
y_hat = np.argmax(predictions, axis=1)

In [None]:
def weather_accuracy(y_true, y_hat):
    return np.mean(y_true == y_hat)
    
w_acc = weather_accuracy(y_true, y_hat)
print(f'Accuracy for weather labels: {w_acc}')