# Data preparation

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
IM_SIZE=32

In [3]:
import os
import numpy as np
import pandas as pd

def flatten(l): return [item for sublist in l for item in sublist]

In [11]:
# Read dataframe
df_train = pd.read_csv("./data/train_v2.csv")

# Make label maps
labels = sorted(list(set(flatten([l.split(' ') for l in df_train['tags'].values]))))

weather_labels = ['clear', 'cloudy', 'haze', 'partly_cloudy']
ground_labels = [l for l in labels if l not in weather_labels]

label_map = {l:i for i, l in enumerate(labels)}
wlabel_map = {l: i for i, l in enumerate(weather_labels)}
glabel_map = {l: i for i, l in enumerate(ground_labels)}

def get_labels_binary(s, labelmap):
    labels = np.zeros(len(labelmap), dtype=np.int64)
    idx = [v for v in [labelmap.get(w, -1) for w in s.split(' ')]]
    idx = [i for i in idx if i > -1]
    labels[idx] = 1
    return labels

def array_to_str(arr):
    return(str(arr.tolist()))

# Map to binary arrays, then to strings
#df_train['label'] = df_train['tags'].apply(get_labels_binary, args=(label_map,)).map(array_to_str)
df_train['wlabel'] = df_train['tags'].apply(get_labels_binary, args=(wlabel_map,)).map(array_to_str)
df_train['glabel'] = df_train['tags'].apply(get_labels_binary, args=(glabel_map,)).map(array_to_str)

# Save as text file
df_train.drop('tags', axis=1).to_csv('./data/TRAIN_blogpost.csv', index=None)
pd.read_csv('./data/TRAIN_blogpost.csv').head()

Unnamed: 0,image_name,wlabel,glabel
0,train_0,"[0, 0, 1, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
1,train_1,"[1, 0, 0, 0]","[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1]"
2,train_2,"[1, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
3,train_3,"[1, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]"
4,train_4,"[1, 0, 0, 0]","[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0]"


# Model training

In [5]:
from src.data import BlogpostSequence
from src.models.cnn import BlogpostCNN

Using TensorFlow backend.


In [6]:
network = BlogpostCNN(IM_SIZE, w_labels=4, g_labels=13)
model = network.model
model.compile(loss=['categorical_crossentropy', 'binary_crossentropy'], optimizer='adam')
# print(model.summary())

In [7]:
batch_size=32
seq = BlogpostSequence('./data/TRAIN_blogpost.csv',
                       './data/train/',
                       im_size=IM_SIZE, batch_size=batch_size)

In [10]:
model.fit_generator(generator=seq,
                    verbose=1, 
                    epochs=5,
                    use_multiprocessing=True,
                    workers=4)

Epoch 1/5
Epoch 2/5
Epoch 3/5

Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc15c496320>

# Export the model

In [9]:
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model.signature_def_utils_impl import build_signature_def, predict_signature_def
from tensorflow.python.saved_model import tag_constants, signature_constants

import keras.backend as K

export_path = './models/1'
builder = saved_model_builder.SavedModelBuilder(export_path)
    
with K.get_session() as sess:
    K.set_learning_phase(0)
    
    signature = predict_signature_def(inputs={'images': model.input},
                              outputs={t.name:t for t in model.outputs})
    builder.add_meta_graph_and_variables(sess=sess,
                                 tags=[tag_constants.SERVING],
                                 signature_def_map={'predict': 
                                                   signature})
    builder.save()    

AssertionError: Export directory already exists. Please specify a different export directory: ./models/1

# 4. Serve model and call via gRPC request

Appendix: TF records