TF serving demo:
* train a keras model with multiple outputs
* serve the model
    * compare timing with in-memory timing

In [93]:
# 1. Store a text file for train generator

In [1]:
import os
import numpy as np
import pandas as pd

def flatten(l): return [item for sublist in l for item in sublist]

In [160]:
# Read dataframe
df_train = pd.read_csv("./data/train_v2.csv")

# Make label maps
labels = sorted(list(set(flatten([l.split(' ') for l in df_train['tags'].values]))))

weather_labels = ['clear', 'cloudy', 'haze', 'partly_cloudy']
ground_labels = [l for l in labels if l not in weather_labels]

N_WLAB = len(weather_labels)
N_GLAB = len(ground_labels)

wlabel_map = {l: i for i, l in enumerate(weather_labels)}
glabel_map = {l: i for i, l in enumerate(ground_labels)}

def get_labels_binary(s, labelmap):
    idx = [v for v in [labelmap.get(w) for w in s.split(' ')] if v != None]
    labels = np.zeros(len(labelmap), dtype=np.int64)
    labels[idx] = 1
    return labels

def array_to_str(arr):
    return(str(arr.tolist()))
df_train['wlabel'] = df_train['tags'].apply(get_labels_binary, args=(wlabel_map,))
df_train['glabel'] = df_train['tags'].apply(get_labels_binary, args=(glabel_map,))

In [161]:
# there is a single instance without weather label -> remove it
missing_w_idx = np.where(np.array([np.sum(v) for v in df_train['wlabel'].values]) != 1.0)[0][0]
df_train = df_train.drop(index=missing_w_idx, axis=0)

In [162]:
# Map everything to strings'
df_train['wlabel'] = df_train['wlabel'].map(array_to_str)
df_train['glabel'] = df_train['glabel'].map(array_to_str)

In [163]:
# Save as text file
df_train.drop('tags', axis=1).to_csv('./data/TRAIN.csv', index=None)

In [164]:
# 2. write a generator

In [165]:
class DataGenerator(object):
    """Custom generator to train a keras model

    Call .generate() to get the actual generator
    """
    def __init__(self, file_path, batch_size):
        self.fp = file_path
        self.bs = batch_size

        self._get_filesize()

    def _get_filesize(self):
        with open(self.fp, 'r') as f:
            self.fz = sum(1 for line in f)
        print(f'Found {self.fz} lines')

    def _parse_line(self, line):
        """Parse a line from the data and return context ids, target id, label"""
        *ctxt_ids, tgt_id, label = [int(n) for n in line.split(' ')]
        return ctxt_ids, tgt_id, label

    def generate(self):
        """The actual generator"""
        num_batches = int(self.fz / self.bs)
        while True:
            with open(self.fp, 'r') as f:
                for i in range(num_batches - 1):
                        x_c, x_t, y = [], [], []
                        for j in range(self.bs):
                            l = f.readline()
                            c, t, l = self._parse_line(l)
                            x_c.append(c)
                            x_t.append(t)
                            y.append(l)

                        Xctx = np.array(x_c)
                        Xtgt = np.array(x_t)
                        Y = np.array(y)
                        yield ([Xtgt, Xctx], Y)

In [166]:
from keras.preprocessing.image import load_img, img_to_array
from keras.applications.vgg16 import preprocess_input
import ast 

def randomHorizontalFlip(image, p=0.5):
    """Do a random horizontal flip with probability p"""
    if np.random.random() < p:
        image = np.fliplr(image)
    return image


def randomVerticalFlip(image, p=0.5):
    """Do a random vertical flip with probability p"""
    if np.random.random() < p:
        image = np.flipud(image)
    return image


class DataGenerator(object):
    """Custom generator"""

    def __init__(self, file_path, data_path, im_size, batch_size, shuffle, mode='train'):
        self.fp = file_path  # path to training file
        self.dp = data_path # path to raw images
        self.imsz = im_size
        self.bsz = batch_size
        self.shuf = shuffle
        self.mode = mode
        
        self.df = pd.read_csv(self.fp)

    def get_instance_indexes(self):
        indexes = list(self.df.index)
        if self.shuf:
            np.random.shuffle(indexes)
        return indexes

    def get_batch_features(self, indexes):
        batch_features = np.zeros((len(indexes), self.imsz, self.imsz, 3))

        # Fill up container
        for i, ix in enumerate(indexes):

            im = load_img(os.path.join(self.dp, self.df['image_name'][ix] + '.jpg'))
            w, h = im.size
            resize = max(self.imsz / w, self.imsz / h)
            # PIL .resize() requires size as (width, height)!
            newdim = (int(w * resize), int(h * resize))
            newim = im.resize(newdim)

            im = img_to_array(newim)
            im = preprocess_input(im)

            if True:
                im = randomHorizontalFlip(im)
                im = randomVerticalFlip(im)

            batch_features[i] = im

            return batch_features

    def get_batch_labels(self, indexes):
        if self.mode == 'test':
            return None
        else:
            # Empty containers for labels
            w_labels, g_labels = np.zeros((len(indexes), N_WLAB)), np.zeros((len(indexes), N_GLAB))
            # Fill up container
            for i, ix in enumerate(indexes):
                w_labels[i] = np.array(ast.literal_eval(
                    self.df['wlabel'][ix]), dtype=int)
                g_labels[i] = np.array(ast.literal_eval(
                    self.df['glabel'][ix]), dtype=int)
            return [w_labels, g_labels]

    def generate(self):
        while True:
            indexes = self.get_instance_indexes()
            num_batches = int(np.ceil(len(self.df) / self.bsz))
            for i in range(num_batches):
                if i == (num_batches - 1):
                    batch_indexes = indexes[i * self.bsz:]
                else:
                    batch_indexes = indexes[i * self.bsz:(i + 1) * self.bsz]

                X = self.get_batch_features(batch_indexes)
                y = self.get_batch_labels(batch_indexes)
                yield (X, y)

In [178]:
# 3. Train a model
import math
import tensorflow as tf
import numpy as np
from keras.layers import Input, Dense, Lambda, Flatten, Reshape, Layer, Concatenate, Add, Subtract
from keras.layers import BatchNormalization, Dropout, Activation
from keras.layers import MaxPooling2D
from keras.layers import Conv2D, Conv2DTranspose, Reshape, Multiply, Dot
from keras.layers import BatchNormalization
from keras.engine.topology import Layer
from keras.models import Model
from keras.applications import VGG16, ResNet50
import keras.backend as K
from keras import metrics

class VGG_classifier(object):
    def __init__(self, im_size, n_neurons, imagenet):
        self.im_size = im_size
        self.n_channels = 3
        self.n_neurons = n_neurons
        self.imagenet = imagenet
        self.build()

    def build(self):
        # Define input
        self.x = Input(shape=(self.im_size, self.im_size, 3))

        # Pretrained VGG16
        weights = None
        if self.imagenet:
            weights = 'imagenet'
        VGGmodel = VGG16(include_top=False, weights=weights,
                         input_tensor=self.x,
                         input_shape=(self.im_size, self.im_size, self.n_channels),
                         pooling='max')

        # VGG_out = Flatten()(VGGmodel.output) # in case of no pooling
        VGG_out = VGGmodel.output
        VGG_out = BatchNormalization()(VGG_out)
        VGG_out = Dropout(0.25)(VGG_out)

        # batchnorm + dense layers
        fc_1 = BatchNormalization()(Dense(self.n_neurons, activation='relu')(VGG_out))
        self.fc_1 = Dropout(0.25)(fc_1)
        self.weather = Dense(N_WLAB, activation='softmax')(self.fc_1)
        self.ground = Dense(N_GLAB, activation='sigmoid')(self.fc_1)
        self.model = Model(inputs=self.x, outputs=[self.weather, self.ground])

In [184]:
model = VGG_classifier(48, 0, 128, True).model
model.compile(optimizer='sgd', loss=['categorical_crossentropy', 'binary_crossentropy'])

In [185]:
generator = DataGenerator('./data/TRAIN.csv', './data/train/', 48, 16, shuffle=True).generate()

In [187]:
model.fit_generator(generator=generator, steps_per_epoch=100, verbose=1)

Epoch 1/1
  5/100 [>.............................] - ETA: 1:54 - loss: nan - dense_8_loss: nan - dense_9_loss: nan

KeyboardInterrupt: 