In [52]:
import keras
import numpy as np
import pickle
import os
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Input
from keras.initializers import he_normal
from keras import optimizers
from keras.callbacks import LearningRateScheduler, TensorBoard
from keras.layers.normalization import BatchNormalization
from keras.utils.data_utils import get_file
from keras.utils import to_categorical
from keras import backend as K

In [53]:
#-----data dir----
data_dir = "./data"
#-----------------

In [58]:
def scheduler(epoch):
  learning_rate_init = 0.001
  if epoch > 42:
    learning_rate_init = 0.0002
  if epoch > 52:
    learning_rate_init = 0.00005
  return learning_rate_init

def unpickle(filename):
  file = os.path.join(data_dir, filename)
  with open(file, 'rb') as fo:
    dict = pickle.load(fo, encoding='bytes')
  return dict

class LossWeightsModifier(keras.callbacks.Callback):
  def __init__(self, alpha, beta, gamma):
    self.alpha = alpha
    self.beta = beta
    self.gamma = gamma
    # customize your behavior
  def on_epoch_end(self, epoch, logs={}):
    if epoch == 15:
      K.set_value(self.alpha, 0.1)
      K.set_value(self.beta, 0.8)
      K.set_value(self.gamma, 0.1)
    if epoch == 25:
      K.set_value(self.alpha, 0.1)
      K.set_value(self.beta, 0.2)
      K.set_value(self.gamma, 0.7)
    if epoch == 35:
      K.set_value(self.alpha, 0)
      K.set_value(self.beta, 0)
      K.set_value(self.gamma, 1)


In [66]:
#-------- dimensions ---------
height, width = 28, 28
channel = 1
if K.image_data_format() == 'channels_first':
    input_shape = (channel, height, width)
else:
    input_shape = (height, width, channel)
#-----------------------------

In [59]:
train_size = 60000
test_size = 10000

#--- coarse 1 classes ---
coarse1_classes = 2
#--- coarse 2 classes ---
coarse2_classes = 6
#--- fine classes ---
num_classes  = 10

batch_size   = 128
epochs       = 60

In [61]:
#--- file paths ---
log_filepath = './tb_log_vgg16_hierarchy_dynamic/'
weights_store_filepath = './vgg16_weights_hierarchy_dynamic/'
retrain_id = '101'
model_name = 'weights_vgg16_dynamic_cifar_100_'+retrain_id+'.h5'
model_path = os.path.join(weights_store_filepath, model_name)

In [62]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')

In [16]:
fashion_mnist = keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
class_names_c1 = ['Clothes', 'Goods']
class_names_c2 = ['Tops', 'Bottoms', 'Dresses', 'Outers', 'Accessories', 'Shoes']

In [17]:
c2_to_c1 = {0:0, 1:0, 2:0, 3:0, 4:1, 5:1}
fine_to_c2 = {0:0, 1:1, 2:0, 3:2, 4:3, 5:5, 6:0, 7:5, 8:4, 9:5}

In [18]:
def print_mappings(mapping, source, dest):
    for k,v in mapping.items():
        print(source[k], "->", dest[v])


In [21]:
print_mappings(c2_to_c1, class_names_c2, class_names_c1)
print("-"*10)
print_mappings(fine_to_c2, class_names, class_names_c2)

Tops -> Clothes
Bottoms -> Clothes
Dresses -> Clothes
Outers -> Clothes
Accessories -> Goods
Shoes -> Goods
----------
T-shirt/top -> Tops
Trouser -> Bottoms
Pullover -> Tops
Dress -> Dresses
Coat -> Outers
Sandal -> Shoes
Shirt -> Tops
Sneaker -> Shoes
Bag -> Accessories
Ankle boot -> Shoes


In [9]:
train_images.shape

(60000, 28, 28)

In [41]:
train_labels_fine = to_categorical(train_labels)
train_labels_fine.shape

(60000, 10)

In [42]:
test_labels_fine = to_categorical(test_labels)
test_labels_fine.shape

(10000, 10)

In [36]:
train_labels_c2_index = [fine_to_c2[i] for i in train_labels]
train_labels_c2 = to_categorical(train_labels_c2_index)
train_labels_c2.shape

(60000, 6)

In [37]:
test_labels_c2_index = [fine_to_c2[i] for i in test_labels]
test_labels_c2 = to_categorical(test_labels_c2_index)
test_labels_c2.shape

(10000, 6)

In [35]:
train_labels_c1_index = [c2_to_c1[i] for i in train_labels_c2_index]
train_labels_c1 = to_categorical(train_labels_c1_index)
train_labels_c1.shape

(60000, 2)

In [40]:
test_labels_c1_index = [c2_to_c1[i] for i in test_labels_c2_index]
test_labels_c1 = to_categorical(test_labels_c1_index)
test_labels_c1.shape

(10000, 2)

In [72]:
x_train = train_images[..., np.newaxis]
x_test = test_images[..., np.newaxis]

y_train = train_labels_fine
y_test = test_labels_fine

y_c1_train = train_labels_c1
y_c1_test = test_labels_c1

y_c2_train = train_labels_c2
y_c2_test = test_labels_c2

In [73]:
print("x_train shape: ", x_train.shape)
print("x_test shape: ", x_test.shape)

print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)
print("y_c1_train shape: ", y_c1_train.shape)
print("y_c1_test shape: ", y_c1_test.shape)
print("y_c2_train shape: ", y_c2_train.shape)
print("y_c2_test shape: ", y_c2_test.shape)

x_train shape:  (60000, 28, 28, 1)
x_test shape:  (10000, 28, 28, 1)
y_train shape:  (60000, 10)
y_test shape:  (10000, 10)
y_c1_train shape:  (60000, 2)
y_c1_test shape:  (10000, 2)
y_c2_train shape:  (60000, 6)
y_c2_test shape:  (10000, 6)


In [67]:
#----------------------- model definition ---------------------------
alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper

img_input = Input(shape=input_shape, name='input')
img_input

<tf.Tensor 'input_2:0' shape=(None, 28, 28, 1) dtype=float32>

In [70]:
#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(256, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(256, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(coarse1_classes, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(1024, activation='relu', name='c2_fc_cifar100_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(1024, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(coarse2_classes, activation='softmax', name='c2_predictions_cifar100')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)


#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc_cifar100_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu', name='fc_cifar100_2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar100')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='vgg16_hierarchy')
#model.load_weights(weights_path, by_name=True)

In [None]:
#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', 
              optimizer=sgd, 
              loss_weights=[alpha, beta, gamma], 
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
tb_cb = TensorBoard(log_dir=log_filepath, histogram_freq=0)
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)
cbks = [change_lr, tb_cb, change_lw]

model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))


Train on 60000 samples, validate on 10000 samples
Epoch 1/60