# Hierarchically Deep Convolutional Neural Network For Image Recognition

## Setup and Imports

**Import Packages**

In [1]:
import keras as kr
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.utils import np_utils

from keras.datasets import cifar100

from sklearn.model_selection import train_test_split
import cv2 

from random import randint
import time
import os

import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # Don't pre-allocate memory; allocate as-needed
config.log_device_placement = True  # to log device placement (on which device the operation ran)
#config.gpu_options.per_process_gpu_memory_fraction = 0.3 # Only allow a total fraction the GPU memory to be allocated
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras

In [3]:
if not os.path.exists('data/models/'):
    os.mkdir('data/models')

**Define Global Variables**

In [4]:
GGN = True

# The number of coarse categories
coarse_categories = 20

# The number of fine categories
fine_categories = 61

## Import and Preprocess Dataset

**Import Cifar100 Data Set**

In [5]:
# (X, y_c), (x_test, y_c_test) = cifar100.load_data(label_mode='coarse')
# (X, y), (x_test, y_test) = cifar100.load_data(label_mode='fine')

**Import Cham Data Set**

In [6]:
X = np.load("dataset/train_feature.npy")
y = np.load("dataset/train_label.npy")
y_c = np.load("dataset/train_label_coarse.npy")

x_test = np.load("dataset/test_feature.npy")
y_test = np.load("dataset/test_label.npy")
y_c_test = np.load("dataset/test_label_coarse.npy")

**Fine-To-Coarse Mapping**

(Ideally, this would be done through spectral clustering as opposed to hard-coding)

In [7]:
fine2coarse = np.zeros((fine_categories,coarse_categories))
for i in range(coarse_categories):
    index = np.where(y_c_test[:,0] == i)[0]
    fine_cat = np.unique([y_test[j,0] for j in index])
    if len(index) == 0:
        print i
    for j in fine_cat:
        fine2coarse[j,i] = 1


In [8]:
y_c = 0; # Clear y_c in interest of saving mem
y_c_test=0;

In [9]:
################################################################################
#    Title: One Hot Encoding
################################################################################
#    Description: 
#        This function extends a matrix to one-hot encoding
#    
#    Parameters:
#        y    Array of label values
# 
#    Returns:
#        y_new    One hot encoded array of labels
################################################################################
def one_hot(y):
    n_values = np.max(y) + 1
    y_new = np.eye(n_values)[y[:,0]]
    return y_new

In [10]:
y=one_hot(y)
y_test=one_hot(y_test)
print(np.shape(y))

(2359, 61)


**Apply ZCA Whitening**

In [11]:
################################################################################
#    Title: ZCA
################################################################################
#    Description: 
#        This function applies ZCA Whitening to the image set
#    
#    Parameters:
#        x_1           Array of MxNxC images to compute the ZCA Whitening
#        x_2           Array of MxNxC images to apply the ZCA transform
#        num_batch    Number of batches to do the computation
# 
#    Returns:
#        An array of MxNxC zca whitened images
################################################################################
def zca(x_1, x_2, epsilon=1e-5):
        
    with tf.name_scope('ZCA'):
        
        x1 = tf.placeholder(tf.float64, shape=np.shape(x_1), name='placeholder_x1')
        x2 = tf.placeholder(tf.float64, shape=np.shape(x_2), name='placeholder_x2')
        
        flatx = tf.cast(tf.reshape(x1, (-1, np.prod(x_1.shape[-3:])),name="reshape_flat"),tf.float64,name="flatx")
        sigma = tf.tensordot(tf.transpose(flatx),flatx, 1,name="sigma") / tf.cast(tf.shape(flatx)[0],tf.float64) ### N-1 or N?
        s, u, v = tf.svd(sigma,name="svd")
        pc = tf.tensordot(tf.tensordot(u,tf.diag(1. / tf.sqrt(s+epsilon)),1,name="inner_dot"),tf.transpose(u),1, name="pc")
        
        net1 = tf.tensordot(flatx, pc,1,name="whiten1")
        net1 = tf.reshape(net1,np.shape(x_1), name="output1")
        
        flatx2 = tf.cast(tf.reshape(x2, (-1, np.prod(x_2.shape[-3:])),name="reshape_flat2"),tf.float64,name="flatx2")
        net2 = tf.tensordot(flatx2, pc,1,name="whiten2")
        net2 = tf.reshape(net2,np.shape(x_2), name="output2")
        
    with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            x_1,x_2 = sess.run([net1,net2], feed_dict={x1: x_1, x2: x_2})    
    return x_1,x_2

**Split Training set into Training and Validation sets**

In [12]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=.1, stratify=y, random_state=0)
X = 0
y = 0

In [13]:
def resize_cifar100_data(X_train, Y_train, X_valid, Y_valid, img_rows, img_cols):
    
    nb_train_samples = 3000 # 3000 training samples
    nb_valid_samples = 100 # 100 validation samples
    # Load cifar10 training and validation sets
    # (X_train, Y_train), (X_valid, Y_valid) = cifar10.load_data()

    # Resize trainging images
    if K.image_dim_ordering() == 'th':
        X_train = np.array([cv2.resize(img.transpose(1,2,0), (img_rows,img_cols)).transpose(2,0,1) for img in X_train[:nb_train_samples,:,:,:]])
        X_valid = np.array([cv2.resize(img.transpose(1,2,0), (img_rows,img_cols)).transpose(2,0,1) for img in X_valid[:nb_valid_samples,:,:,:]])
    else:
        X_train = np.array([cv2.resize(img, (img_rows,img_cols)) for img in X_train[:nb_train_samples,:,:,:]])
        X_valid = np.array([cv2.resize(img, (img_rows,img_cols)) for img in X_valid[:nb_valid_samples,:,:,:]])

    # Transform targets to keras compatible format
#     Y_train = np_utils.to_categorical(Y_train[:], num_classes)
#     Y_valid = np_utils.to_categorical(Y_valid[:], num_classes)
    Y_train = Y_train[:nb_train_samples]
    Y_valid = Y_valid[:nb_valid_samples]
    
    return X_train, Y_train, X_valid, Y_valid

**Flip, pad and randomly crop each photo**

In [14]:
################################################################################
#    Title: Preprocess Img
################################################################################
#    Description: 
#        This function pads images by 4 pixels, randomly crops them, then
#        randomly flips them
#    
#    Parameters:
#        x_1           Array of MxNxC images to compute the ZCA Whitening
#        x_2           Array of MxNxC images to apply the ZCA transform
#        num_batch    Number of batches to do the computation
# 
#    Returns:
#        An array of MxNxC zca whitened images
################################################################################
def preprocess_img(X,y):
        
    with tf.name_scope('Preproc'):
        
        images = tf.placeholder(tf.float64, shape=np.shape(X))
        labels = tf.placeholder(tf.float64, shape=np.shape(y))
        
        net = tf.map_fn(lambda img: tf.image.flip_left_right(img), images)
        net = tf.map_fn(lambda img: tf.image.rot90(img), net)
        net = tf.image.resize_image_with_crop_or_pad(net,40,40)
        net = tf.map_fn(lambda img: tf.random_crop(img, [32,32,3]), net)

        net1 = tf.image.resize_image_with_crop_or_pad(images,40,40)
        net1 = tf.map_fn(lambda img: tf.random_crop(img, [32,32,3]), net1)
        
        net = tf.concat([net, net1],0)
        net = tf.random_shuffle(net, seed=0)
        net_labels = tf.concat([labels, labels],0)
        net_labels = tf.random_shuffle(net_labels,seed=0)
        
        net = tf.map_fn(lambda img: tf.image.random_flip_up_down(img), net)
        
    with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            x_t,y_t = sess.run([net,net_labels], feed_dict={images: X, labels: y})    
    return x_t,y_t


## Single Classifier Training

**Constructing CNN**

In [15]:
from keras import optimizers
from keras.layers import Input, Conv2D, Dropout, MaxPooling2D, Flatten, Dense
from keras.models import Model

# in_layer = Input(shape=(32, 32, 3), dtype='float32', name='main_input')

# net = Conv2D(384, 3, strides=1, padding='same', activation='elu')(in_layer)
# net = MaxPooling2D((2, 2), padding='valid')(net)

# net = Conv2D(384, 1, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(384, 2, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(640, 2, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(640, 2, strides=1, padding='same', activation='elu')(net)
# net = Dropout(.2)(net)
# net = MaxPooling2D((2, 2), padding='valid')(net)

# net = Conv2D(640, 1, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(768, 2, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(768, 2, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(768, 2, strides=1, padding='same', activation='elu')(net)
# net = Dropout(.3)(net)
# net = MaxPooling2D((2, 2), padding='valid')(net)

# net = Conv2D(768, 1, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(896, 2, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(896, 2, strides=1, padding='same', activation='elu')(net)
# net = Dropout(.4)(net)
# net = MaxPooling2D((2, 2), padding='valid')(net)

# net = Conv2D(896, 3, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(1024, 2, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(1024, 2, strides=1, padding='same', activation='elu')(net)
# net = Dropout(.5)(net)
# net = MaxPooling2D((2, 2), padding='valid')(net)

# net = Conv2D(1024, 1, strides=1, padding='same', activation='elu')(net)
# net = Conv2D(1152, 2, strides=1, padding='same', activation='elu')(net)
# net = Dropout(.6)(net)
# net = MaxPooling2D((2, 2), padding='same')(net)

# net = Flatten()(net)
# net = Dense(1152, activation='elu')(net)
# net = Dense(100, activation='softmax')(net)

## Metrics

In [16]:
def recall(y_true, y_pred):
    """Recall metric.

    Only computes a batch-wise average of recall.

    Computes the recall, a metric for multi-label classification of
    how many relevant items are selected.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    """Precision metric.

    Only computes a batch-wise average of precision.

    Computes the precision, a metric for multi-label classification of
    how many selected items are relevant.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_score(y_true, y_pred):
    precision_m = precision(y_true, y_pred)
    recall_m = recall(y_true, y_pred)
    return 2*((precision_m*recall_m)/(precision_m+recall_m+K.epsilon()))

**Compile Model**

In [17]:
if GGN: 
    # Inception v1
    from inception_v1 import InceptionV1
    in_layer, net, model = InceptionV1(weights=None,classes=fine_categories) # classes = sum of all fine classes
else:
    # NiN
    model = Model(inputs=in_layer,outputs=net)
    
sgd_coarse = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer= sgd_coarse, loss='categorical_crossentropy', metrics=['accuracy', f1_score, precision, recall])

In [18]:
#model.load_weights('data/models/model_coarse'+str(30)+'.model')

# Training history visualization

In [19]:
def train_histo_vis(history,title):
        
    acc = [hist.history['f1_score'] for hist in history]
    val_acc = [hist.history['val_f1_score'] for hist in history]
    loss = [hist.history['loss'] for hist in history]
    val_loss = [hist.history['val_loss'] for hist in history]

    epochs = range(len(acc))
    
    plt.figure()
    plt.plot(epochs, acc, 'b', label='Training F1_score')
    plt.plot(epochs, val_acc, 'r', label='Validation F1_score')
    plt.title('Training and validation F1_score')
    plt.ylabel('F1_score')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(title+"_f1_score.png",bbox_inches='tight')

    plt.figure()
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title('Training and validation loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.savefig(title+"_loss.png",bbox_inches='tight')

    plt.show()

In [20]:
import json
def save_his(history_model, history_path):
    # Get the dictionary containing each metric and the loss for each epoch
    history_dict = {}
    for hist in history_model:
        history_dict.update(hist.history)
    # Save it under the form of a json file
    json.dump(history_dict, open(history_path, 'w'))

**Train Model**

In [21]:
tbCallBack = kr.callbacks.TensorBoard(log_dir='./data/graph/elu_drop/', histogram_freq=0, write_graph=True, write_images=True)

In [22]:
batch = 8

In [None]:
index= 0
step = 1000
stop = 1000

while index < stop:
    hist = model.fit(x_train, y_train, batch_size=batch, initial_epoch=index, epochs=index+step, validation_data=(x_val, y_val), callbacks=[tbCallBack])
    index += step
    history.append(hist)
    model.save_weights('data/models/model_coarse'+str(index)+'.h5')
save_index = index

Train on 2123 samples, validate on 236 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1

In [None]:
train_histo_vis(history, "history/Single_cls")
save_his(history, "history/single_cls.json")

### Load Most Recent Model

In [None]:
sgd_fine = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)

In [None]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(model.layers):
    print(i, layer.name)

In [None]:
# to get weights and biases of all layers
#for layer in model.layers: print(layer.get_config(), layer.get_weights())

In [None]:
# we will freeze all layers
for i in range(len(model.layers)):
    model.layers[i].trainable=False

## Fine-Tuning for Coarse Classifier

In [None]:
y_train_c = np.dot(y_train,fine2coarse)
y_val_c = np.dot(y_val,fine2coarse)

In [None]:
if GGN:
    # Inception
    net = Conv2D(1024, 1, strides=1, padding='same', activation='relu')(model.layers[-8].output)
    net = Conv2D(1152, 2, strides=1, padding='same', activation='relu')(net)
else:
    # NiN
    net = Conv2D(1024, 1, strides=1, padding='same', activation='relu')(model.layers[-8].output)
    net = Conv2D(1152, 2, strides=1, padding='same', activation='relu')(net)
net = Dropout(.6)(net)
net = MaxPooling2D((2, 2), padding='same')(net)

net = Flatten()(net)
net = Dense(1152, activation='relu')(net)
out_coarse = Dense(20, activation='softmax')(net)

model_c = Model(inputs=in_layer,outputs=out_coarse)
model_c.compile(optimizer= sgd_coarse, loss='categorical_crossentropy', metrics=['accuracy', f1_score, precision, recall])

# model.load_weights('data/models/model_coarse'+str(10)+'.h5')

# for i in range(len(model_c.layers)-1):
#     model_c.layers[i].set_weights(model.layers[i].get_weights())

In [None]:
#model.load_weights('data/models/model_coarse'+str(10)+'.h5')

for i in range(len(model_c.layers)-1):
    model_c.layers[i].set_weights(model.layers[i].get_weights())

In [None]:
index = 1000
step = 1000
stop = 2000

history = []
while index < stop:
    hist = model_c.fit(x_train, y_train_c, batch_size=batch, initial_epoch=index, epochs=index+step, validation_data=(x_val, y_val_c), callbacks=[tbCallBack])
    index += step
    history.append(hist)

In [None]:
train_histo_vis(history, "history/Coarse_cls1")
save_his(history, "history/coarse_cls1.json")

In [None]:
model_c.compile(optimizer=sgd_fine, loss='categorical_crossentropy', metrics=['accuracy', f1_score, precision, recall])
stop = 3000

history = []
while index < stop:
    hist = model_c.fit(x_train, y_train_c, batch_size=batch, initial_epoch=index, epochs=index+step, validation_data=(x_val, y_val_c), callbacks=[tbCallBack])
    index += step
    history.append(hist)

In [None]:
train_histo_vis(history, "history/Coarse_cls2")
save_his(history, "history/coarse_cls2.json")

## Fine-Tuning for Fine Classifiers

### Construct Fine Classifiers

In [None]:
def fine_model():
    net = Conv2D(1024, 1, strides=1, padding='same', activation='relu')(model.layers[-8].output)
    net = Conv2D(1152, 2, strides=1, padding='same', activation='relu')(net)
    net = Dropout(.2)(net)
    net = MaxPooling2D((2, 2), padding='same')(net)

    net = Flatten()(net)
    net = Dense(1152, activation='elu')(net)
    out_fine = Dense(61, activation='softmax')(net)
    model_fine = Model(inputs=in_layer,outputs=out_fine)
    model_fine.compile(optimizer= sgd_coarse,
              loss='categorical_crossentropy',
              metrics=['accuracy', f1_score, precision, recall])
    
    for i in range(len(model_fine.layers)-1):
        model_fine.layers[i].set_weights(model.layers[i].get_weights())
    return model_fine

In [None]:
fine_models = {'models' : [{} for i in range(coarse_categories)], 'yhf' : [{} for i in range(coarse_categories)]}
for i in range(coarse_categories):
    model_i = fine_model()
    fine_models['models'][i] = model_i

### Train Fine Classifiers on Respective Data

In [None]:
def get_error(y,yh):
    # Threshold 
    yht = np.zeros(np.shape(yh))
    yht[np.arange(len(yh)), yh.argmax(1)] = 1
    # Evaluate Error
    error = float(np.count_nonzero(np.count_nonzero(y-yht,1)))/len(y)
    return error

In [None]:
for i in range(coarse_categories):
    index= 0
    step = 500
    stop = 500
    
    # Get all training data for the coarse category
    ix = np.where([(y_train[:,j]==1) for j in [k for k, e in enumerate(fine2coarse[:,i]) if e != 0]])[1]
    x_tix = x_train[ix]
    y_tix = y_train[ix]
    
    # Get all validation data for the coarse category
    ix_v = np.where([(y_val[:,j]==1) for j in [k for k, e in enumerate(fine2coarse[:,i]) if e != 0]])[1]
    x_vix = x_val[ix_v]
    y_vix = y_val[ix_v]
    
    history = []
    while index < stop:
        hist = fine_models['models'][i].fit(x_tix, y_tix, batch_size=batch, initial_epoch=index, epochs=index+step, validation_data=(x_vix, y_vix))
        index += step
        history.append(hist)
    train_histo_vis(history, "history/Fine_cls1")
    save_his(history, "history/fine_cls1.json")
    
    fine_models['models'][i].compile(optimizer=sgd_fine, loss='categorical_crossentropy', metrics=['accuracy', f1_score, precision, recall])
    stop = 1000
    
    history = []
    while index < stop:
        hist = fine_models['models'][i].fit(x_tix, y_tix, batch_size=batch, initial_epoch=index, epochs=index+step, validation_data=(x_vix, y_vix))
        index += step
        history.append(hist)
    train_histo_vis(history, "history/Fine_cls2")
    save_his(history, "history/fine_cls2.json")
        
    yh_f = fine_models['models'][i].predict(x_val[ix_v], batch_size=batch)
    print('Fine Classifier '+str(i)+' Error: '+str(get_error(y_val[ix_v],yh_f))) 

## Probabilistic Averaging

def eval_hdcnn(X, y):
    yh = np.zeros(np.shape(y))
    
    yh_s = model.predict(X, batch_size=batch)
    
    print('Single Classifier Error: '+str(get_error(y,yh_s)))
    
    yh_c = model_c.predict(X, batch_size=batch)
    y_c = np.dot(y,fine2coarse)
    
    print('Coarse Classifier Error: '+str(get_error(y_c,yh_c)))

    for i in range(coarse_categories):
        if i%5 == 0:
            print("Evaluating Fine Classifier: ", str(i))
        #fine_models['yhf'][i] = fine_models['models'][i].predict(X, batch_size=batch)
        yh += np.multiply(yh_c[:,i].reshape((len(y)),1), fine_models['yhf'][i])
    
    print('Overall Error: '+str(get_error(y,yh)))
    return yh

yh = eval_hdcnn(x_val,y_val)

In [None]:
X = x_test; y = y_test

yh = np.zeros(np.shape(y))

yh_s = model.predict(X, batch_size=batch)

print('Single Classifier Error: '+str(get_error(y,yh_s)))

yh_c = model_c.predict(X, batch_size=batch)
y_c = np.dot(y,fine2coarse)

print('Coarse Classifier Error: '+str(get_error(y_c,yh_c)))

for i in range(coarse_categories):
    if i%5 == 0:
        print("Evaluating Fine Classifier: "+ str(i))
    fine_models['yhf'][i] = fine_models['models'][i].predict(X, batch_size=batch)
    yh += np.multiply(yh_c[:,i].reshape((len(y)),1), fine_models['yhf'][i])

print('Overall Error: '+str(get_error(y,yh)))

In [None]:
# Get error function: Debug
yht = np.zeros(np.shape(yh))
yht[np.arange(len(yh)), yh.argmax(1)] = 1
# Evaluate Error
error = float(np.count_nonzero(np.count_nonzero(y-yht,1)))/len(y)
error