# Extractor

In this notebook, we will build the Extractor model for Pennyworth. This model is responsible for extracting category, category type and attributes from an article of clothings that were selected in the previous balancing step. Once the model achieves sufficient accuracy in predicting the above qualities, we will be using the Extractor-derived layer activations to construct a meaningful and rich description of clothing items. 

In [282]:
%matplotlib inline
from packages import *
import numpy as np, matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sklearn.utils import shuffle, class_weight
import tensorflow as tf
import h5py
import time
from PIL import Image
from tensorflow.keras.layers import *
from tensorflow.keras.utils import plot_model
from ipywidgets import IntProgress, Label
from tensorflow.data.experimental import AUTOTUNE
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
fp = 'float32' # aimed to use float16 but ran in to 1000's of issues with Keras and Tensorflow
epsilon = 1e-4 if fp == 'float32' else 1e-7
tf.config.gpu.set_per_process_memory_growth(True) # Needed this per a bug
tf.config.gpu.set_per_process_memory_fraction(.9)
tf.keras.backend.set_floatx(fp)
tf.keras.backend.set_epsilon(epsilon)

## Dataset

We will be using the Tensorflow Dataset API to construct the dataset and preprocess the images on the fly. 

In [3]:
batch_size = 64
input_shape = (300, 300, 3)
shuffle_buffer_size = batch_size*4
prefetch_buffer_size = 2
repeat = 4

In [4]:
partition = load_pickle(os.path.join(LINK_DIR, 'partition_balanced.pkl'))
img_cat = load_pickle(os.path.join(LINK_DIR, 'img_cat_balanced.pkl'))
img_attr = load_pickle(os.path.join(LINK_DIR, 'img_attr_balanced.pkl'))
cat_str = load_pickle(os.path.join(LINK_DIR, 'cat_str_balanced.pkl'))
attr_str = load_pickle(os.path.join(LINK_DIR, 'attr_str_balanced.pkl'))

# cat_types = load_pickle(os.path.join(LINK_DIR, 'cat_types.pkl'))

In [5]:
partition['train'] = shuffle(partition['train'])
partition['val'] = shuffle(partition['val'])
partition['test'] = shuffle(partition['test'])

In [6]:
data = {'train':{}, 'val': {}, 'test': {}}
N = {'train': 0, 'val': 0, 'test': 0}           
for split in data: data[split] = {'path': [], 'cat':[], 'attr': []}
for split in partition:
    for path in partition[split]:
        N[split]+=1
        data[split]['cat'].append(img_cat[path])
        data[split]['attr'].append(img_attr[path])
        data[split]['path'].append(os.path.join(IMG_DIR, path))

In [7]:
assert N['train'] > 3 * N['val'] + 3 * N['test']
N

{'train': 72008, 'val': 9001, 'test': 9001}

In [8]:
tensor_datasets = {}
for split in data:
    tensor_datasets[split] = {}
    for key in data[split]:
        tensor_datasets[split][key] = tf.data.Dataset.from_tensor_slices(data[split][key])

In [9]:
@tf.function
def color(image):
    image = tf.image.random_hue(image, 0.08)
    image = tf.image.random_saturation(image, 0.8, 1.2)
    image = tf.image.random_brightness(image, 0.04)
    image = tf.image.random_contrast(image, 0.8, 1.1)
    return image

In [10]:
@tf.function
def flip(image):
    image = tf.image.random_flip_left_right(image)
    return image

In [66]:
@tf.function
def load_and_preprocess(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
#     image = tf.keras.applications.vgg16.preprocess_input(image)
    image /= 255
    image -= 0.5
    image *= 2.
    image = tf.image.resize_with_pad(image, input_shape[0], input_shape[1], antialias=True)
    if fp == 'float16':
        image = tf.cast(image, tf.float16)
    return image

In [12]:
datasets = {}
augmentations = [flip, color]
for split in tensor_datasets:
    loaded_preprocessed_images =  tensor_datasets[split]['path'].map(load_and_preprocess, 
                                                               num_parallel_calls=AUTOTUNE)  

    datasets[split] = tf.data.Dataset.zip((loaded_preprocessed_images, 
                                       (tensor_datasets[split]['cat'],
                                       tensor_datasets[split]['attr'])))
    
                                                                     
    datasets[split] = tf.data.Dataset.map(datasets[split], lambda x, y:  (x, {'cat_classifier': y[0], 
                                                                              'attr_classifier': y[1]}))
datasets['train'] = tf.data.Dataset.repeat(datasets['train'], repeat)

for aug in augmentations:
    datasets['train'] = datasets['train'].map(lambda x, y: (aug(x), y) , num_parallel_calls=AUTOTUNE)

for split in datasets:
    datasets[split] = datasets[split].shuffle(buffer_size=shuffle_buffer_size)
    datasets[split] =  datasets[split].batch(batch_size=batch_size)
    datasets[split] = datasets[split].prefetch(buffer_size=prefetch_buffer_size)

In [13]:
for x, y in datasets['train']:
    break

## Model

### VGG
We will be applying transfer learning using the VGG16 pretrained model. However, we will only use up to block5 and train the last block for our task. 

In [14]:
weights_path = '/home/kaan/.keras/models/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [15]:
def build_vgg_base(input_shape, weigths_path, block5):
    
    model = tf.keras.Sequential(name='vgg_base')
    model.add(InputLayer(input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv1', padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', name='block1_conv2', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))

    model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv1', padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', name='block2_conv2', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool'))

    model.add(Conv2D(256,(3, 3), activation='relu', name='block3_conv1', padding='same'))
    model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv2', padding='same'))
    model.add(Conv2D(256, (3, 3), activation='relu', name='block3_conv3', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool'))

    model.add(Conv2D(512, (3, 3), activation='relu', name='block4_conv1', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', name='block4_conv2', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', name='block4_conv3', padding='same'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool'))

    if block5:
        model.add(Conv2D(512, (3, 3), activation='relu', name='block5_conv1', padding='same'))
        model.add(Conv2D(512, (3, 3), activation='relu', name='block5_conv2', padding='same'))
        model.add(Conv2D(512, (3, 3), activation='relu', name='block5_conv3', padding='same'))
        model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool'))

    f = h5py.File(weights_path)
    for layer in model.layers:
        if 'conv' in layer.name:
            weights = [f[layer.name][w] for w in f[layer.name].attrs['weight_names']]
            layer.set_weights(weights)
    f.close()
    
    return model


In [17]:
def build_block(input_shape, name, dropout_rate, pool_type='max'):
    
    model = tf.keras.Sequential(name=name)
    model.add(InputLayer(input_shape=input_shape))
    model.add(Conv2D(512, (3, 3), activation='relu', name='block1_conv1', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', name='block1_conv2', padding='same'))
    if pool_type == 'max':
        model.add(MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool'))
    elif pool_type =='avg':
        model.add(AveragePooling2D((2, 2), strides=(2, 2), name='block1_pool'))
    model.add(Flatten())
    if dropout_rate:
        model.add(Dropout(dropout_rate))
    
    return model

### Extractor
With the VGG base model and the `build_block` helper in place, we can begin building our Extractor model.

In [18]:
class Extractor:
    def build(input_shape = input_shape, block5 = False, weights_path=weights_path, cat_units=len(cat_str), attr_units=len(attr_str)):
        inputs = Input(input_shape, name='image_input')
        vgg_base = build_vgg_base(input_shape, weights_path, block5=block5)
        vgg_base.trainable=False
        post_vgg = vgg_base(inputs)
        
        cat = build_block(vgg_base.output_shape[1:], 'cat', .2)(post_vgg)
        cat = Dense(4096, activation='relu', name='cat_dense')(cat)
#         cat = Dropout(rate=.1,  name='cat_dropout')(cat)
        cat = Dense(cat_units, activation='softmax', name='cat_classifier')(cat)
        
        attr =  build_block(vgg_base.output_shape[1:], 'attr', .2)(post_vgg)
        attr = Concatenate(name='concat_attr')([attr, cat])
        attr = Dense(4096, activation='relu', name='attr_dense')(attr)
#         attr = Dropout(rate=.1, name='attr_dropout')(attr)
        attr = Dense(attr_units, activation='sigmoid', name='attr_classifier')(attr)
        return tf.keras.models.Model(inputs=inputs, outputs={'cat_classifier': cat,
                                                             'attr_classifier': attr}, name='Extractor')
    
    @tf.function
    def loss_fn(truth, logits):
        cat_loss = tf.keras.losses.sparse_categorical_crossentropy(truth[0], logits['cat_classifier'])
        cat_type_loss = tf.keras.losses.sparse_categorical_crossentropy(truth[1], logits['cat_type_classifier'])
        attr_loss = tf.keras.losses.binary_crossentropy(truth[2], logits['attr_classifier'])
        return {'cat_classifier': cat_loss, 'cat_type_classifier': cat_type_loss, 'attr_classifier': attr_loss}
    
    def loss_to_string(loss):
        loss_numpy = (tf.reduce_mean(loss['cat_classifier']).numpy(), tf.reduce_mean(loss['cat_type_classifier']).numpy(), 
                      tf.reduce_mean(loss['attr_classifier']).numpy())
        string = 'Category loss: {:.2e}, Category type loss: {:.2e}, Attribute loss: {:.2e}'.format(*loss_numpy)
        return string

## Training

In [19]:
cat_weights = class_weight.compute_class_weight('balanced', np.arange(len(cat_str)) ,list(img_cat.values()))
Y = np.array(list(img_attr.values()))
attr_weights = np.array([class_weight.compute_class_weight('balanced', [0, 1],  Y[:, i]) for i in range(len(attr_str))])

In [20]:
def get_weighted_loss(weights):
    @tf.function
    def weighted_loss(y_true, y_pred):
        y_true_float = tf.cast(y_true, tf.float32)
        return tf.keras.backend.mean( 
            (weights[:,0]**(1-y_true_float))*(weights[:,1]**(y_true_float))*tf.keras.backend.binary_crossentropy(y_true_float, y_pred)
            , axis=-1)
    return weighted_loss

In [21]:
extractor = Extractor.build(block5=True)
extractor.compile(optimizer=tf.keras.optimizers.Adam(), 
                  loss={'cat_classifier': 'sparse_categorical_crossentropy', 
                        'attr_classifier': get_weighted_loss(attr_weights)},
                  loss_weights={'cat_classifier': cat_weights},
                  metrics= {'cat_classifier': ['accuracy'], 
                            'attr_classifier': [tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.Precision(name='precision')]})
log_dir = os.path.join(SRC_DIR, 'logs', '{}'.format(datetime.fromtimestamp(time.time()).strftime('%H-%M-%S_%Y-%m-%d')))
tb = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True, update_freq='batch')

In [22]:
extractor.summary()

Model: "Extractor"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image_input (InputLayer)        [(None, 300, 300, 3) 0                                            
__________________________________________________________________________________________________
vgg_base (Sequential)           (None, 9, 9, 512)    14714688    image_input[0][0]                
__________________________________________________________________________________________________
cat (Sequential)                (None, 8192)         4719616     vgg_base[1][0]                   
__________________________________________________________________________________________________
cat_dense (Dense)               (None, 4096)         33558528    cat[1][0]                        
__________________________________________________________________________________________

In [23]:
num_batches = repeat * N['train'] // batch_size
num_batches

4500

In [None]:
extractor.fit_generator(datasets['train'].take(num_batches//10), 
                                               epochs=10, callbacks=[tb], validation_data=datasets['val'], validation_steps=100)

Epoch 1/10

In [26]:
extractor.save(os.path.join(MODEL_DIR, 'extractor.h5'))

## Activations
Now we will extract activations.

In [27]:
all_files = partition['train'] + partition['val'] + partition['test']

In [28]:
all_files.sort()

In [119]:
all_files = [os.path.join(IMG_DIR, f) for f in all_files]

In [120]:
all_files = tf.data.Dataset.from_tensor_slices(all_files)

In [122]:
all_images = all_files.map(load_and_preprocess, num_parallel_calls=AUTOTUNE)
all_images = all_images.batch(batch_size=batch_size)
all_images = all_images.prefetch(buffer_size=prefetch_buffer_size)

In [338]:
extractor.layers[-6].output_shape

(None, 4096)

In [340]:
get_layer_output = tf.keras.backend.function([extractor.layers[0].input],
                                  [extractor.layers[-6].output])

In [341]:
activations = []
for img in all_images:
    activations.append(get_layer_output([img])[0])

In [342]:
activations = np.concatenate((np.array(activations[:-1]).reshape((len(activations)-1)*activations[0].shape[0], 
                                     activations[0].shape[1]), activations[-1] ) )

In [343]:
np.save(os.path.join(ACTIVATIONS_DIR, 'activations_cat.npy'), activations, allow_pickle=False)

In [None]:
plt.rcParams["figure.figsize"] = [10.0, 10.0]
fig, ax = plt.subplots()
_ = ax.matshow(y['attr_classifier'].numpy()[:32])
_ = ax.set_xticks(np.arange(32))
_ = ax.set_xlabel('Attributes')
_ = ax.set_ylabel('Examples')