# Hierarchically Deep Convolutional Neural Network For Image Recognition

## Setup and Imports

**Import Packages**

In [1]:
import tensorflow as tf
import keras as kr
from keras import backend as K
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from random import randint
import time
import os
from utility import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
%matplotlib inline
if not os.path.exists('./data'):
    os.mkdir('./data')
if not os.path.exists('./data/models/'):
    os.mkdir('./data/models')

In [3]:
if any([True if 'preped_data_cifar' in file_name else False for file_name in os.listdir('./data/')]):
    with np.load(file='./data/preped_data_cifar.npz') as big_load:
        X = big_load['train_arr']
        x_test = big_load['test_arr']
        y = big_load['y_fine']
        y_test = big_load['y_fine_test'] 
        y_c = big_load['y_c_train']
        y_c_test = big_load['y_c_test']
        fine2coarse=big_load['fine2coarse']
    raise File_Exists_Error('preped_data_cifar.npz')

File_Exists_Error: preped_data_cifar.npz already exists,are you sure you want to overwrite it?

In [5]:
# The number of coarse categories, max is 20
coarse_categories = [1, 2]
batch_size, stop, course_history = 32, 4, []

## Import and Preprocess Dataset

### A little bit about the CIFAR100 Data Set

In [None]:
pd.read_csv('./cifar100.txt', header=0, sep='\t', index_col=None)

**Import Cifar100 Data Set**

In [None]:
(X, y_c), (x_test, y_c_test) = cifar100.load_data(label_mode='coarse')
(_, y), (_, y_test) = cifar100.load_data(label_mode='fine')

In [None]:
super_class, first_inds = np.unique(y_c, return_index=True)
fig, axes = plt.subplots(nrows=2, ncols=10, squeeze=True, figsize=(20, 5))
for row in range(axes.shape[0]):
    for ind, ax in enumerate(axes[row, :]):
        ind = ind + (axes.shape[1] * row)
        ax.imshow(X[first_inds[ind]])
        ax.set_title('Coarse: {0}\n fine: {1}'.format(super_class[ind], y[first_inds[ind]]))
        ax.set_xticks([])
        ax.set_yticks([])

In [None]:
(y_c, y, X), (y_c_test, y_test, x_test) = \
    tuple([tuple([elm[np.isin(mem[0], coarse_categories)[:,0]] for elm in mem]) \
        for mem in [(y_c, y, X), (y_c_test, y_test, x_test)]])

In [None]:
fine_class, first_inds = np.unique(y, return_index=True)
fig, axes = plt.subplots(nrows=2, ncols=5, squeeze=True, figsize=(20, 5))
for row in range(axes.shape[0]):
    for ind, ax in enumerate(axes[row, :]):
        ind = ind + (axes.shape[1] * row)
        ax.imshow(X[first_inds[ind]])
        ax.set_title('fine: {0}'.format(fine_class[ind]))
        ax.set_xticks([])
        ax.set_yticks([])

**Fine-To-Coarse Mapping**

(Ideally, this would be done through spectral clustering as opposed to hard-coding)

In [None]:
fine2coarse = np.zeros((len(coarse_categories), 5))
for i in coarse_categories:
    index = np.where(y_c[:,0] == i)[0]
    fine_cat = np.unique([y[j,0] for j in index])
    fine2coarse[i-1] = fine_cat

In [None]:
y = kr.utils.to_categorical(np.where(fine2coarse.flatten()==y)[1])
y_test = kr.utils.to_categorical(np.where(fine2coarse.flatten()==y_test)[1])
y_c = kr.utils.to_categorical(np.where(coarse_categories==y_c)[1])
y_c_test = kr.utils.to_categorical(np.where(coarse_categories==y_c_test)[1])
print(np.shape(y_c), np.shape(y_c_test), np.shape(y), np.shape(y_test))

**Apply ZCA Whitening**

In [None]:
time1 = time.time()
X,x_test = zca(X,x_test)
time2 = time.time()
print('Time Elapsed - ZCA Whitening: '+str(time2-time1));

**Resize Images to be compatible with Xception**

In [None]:
time1 = time.time()
X = resize(X,10)
x_test = resize(x_test)
time2 = time.time()
print('Time Elapsed - Resizing: '+str(time2-time1));

In [None]:
X.shape, y.shape, x_test.shape, y_test.shape, y_c.shape, y_c_test.shape, fine2coarse.shape

In [None]:
np.savez_compressed(file='./data/preped_data_cifar.npz', train_arr=X, test_arr=x_test, y_fine=y, y_fine_test=y_test,
                   y_c_train=y_c, y_c_test=y_c_test, fine2coarse=fine2coarse)

**Split Training set into Training and Validation sets**

In [6]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=.1, random_state=0)
_, _, y_c_train, y_c_val = train_test_split(X, y_c, test_size=.1, random_state=0)

**Split Course Labels into train and validation**

## Coarse Training

**Import Xception Pretrained on Imagenet**

In [None]:
if any([True if 'xception_coarse' in file_name else False for file_name in os.listdir('./data/models/')]):
    raise File_Exists_Error('xception_coarse')

Citation credit for Xception model to:

Chollet Francois. “Xception: Deep Learning with Depthwise Separable Convolutions.” 2016, Oct 7 [1610.02357]   arxiv.org/abs/1610.02357

**Modify Model for Cifar100**

In the HD-CNN paper, this is represented by Shared Layers in Fig 1(b). 

In [None]:
in_layer = kr.layers.Input(shape=(128, 128, 3), dtype='float32', name='shared_layer_input')
model = kr.applications.Xception(include_top=True, weights='imagenet', 
                                 input_tensor=in_layer, input_shape=(128, 128, 3))
out_coarse = kr.layers.Dense(len(coarse_categories), activation='softmax')(model.layers[-2].output)
model = kr.Model(inputs=in_layer,outputs=out_coarse)
model.compile(optimizer=kr.optimizers.SGD(lr=0.045, momentum=0.9, decay=0),
              loss='categorical_crossentropy', 
              metrics=['accuracy', 'top_k_categorical_accuracy', 'MAE'])
with open('./data/models/xception.json', 'w') as json_file:
    json_file.write(model.to_json())

In [None]:
with open('./data/models/xception.json', 'w') as json_file:
    json_file.write(model.to_json())

**Train Shared Layers**

In [None]:
def step_decay(epoch):
    initial_lrate = 0.045
    lrate = initial_lrate * 0.94 ** np.floor(epoch/2)# Learning rate decay: decay of rate 0.94 every 2 epochs
    return lrate

In [None]:
log_dir = './logs/'+time.ctime().replace(' ', '_').replace(':', '.')
os.mkdir(log_dir)
callbacks=[
    TensorBoard(log_dir=log_dir),
    kr.callbacks.LearningRateScheduler(step_decay),
    kr.callbacks.History()]

In [None]:
print('To check in on tensorboard, copy and paste following line to cmd')
print('tensorboard --logdir={0}'.format(log_dir))

In [None]:
model.fit(x_val, y_c_val, batch_size=batch_size, initial_epoch=0, 
          validation_data=(x_val, y_c_val), epochs=stop, callbacks=callbacks)

In [None]:
course_preds = model.predict(x_val, batch_size=batch_size)

In [None]:
model.save_weights('data/models/xception_coarse')

## Fine-Tuning

### Load Most Recent Model

In [40]:
with open('./data/models/xception.json', 'rb') as json_file:
    coarse_model = kr.models.model_from_json(json_file.read())
coarse_model.load_weights('data/models/xception_coarse')

In [48]:
coarse_model.compile(
    optimizer=kr.optimizers.SGD(lr=0.045, momentum=0.9, decay=0),
    loss='categorical_crossentropy')

In [42]:
for ind, layer in enumerate(coarse_model.layers):
    if layer.name in ['block4_sepconv1_act', 'block5_sepconv1_act']:
        print(layer.name, ind, layer.input_shape, layer.output_shape)

block4_sepconv1_act 26 (None, 16, 16, 256) (None, 16, 16, 256)
block5_sepconv1_act 36 (None, 8, 8, 728) (None, 8, 8, 728)


**Get output of Shared layers and cache it**

In [43]:
shared_layers = kr.Model(inputs=coarse_model.input, outputs=coarse_model.layers[25].output)
for ind, layer in enumerate(shared_layers.layers):
    layer.trainable=False

In [44]:
shared_layers.compile(optimizer=kr.optimizers.SGD(lr=0.045, momentum=0.9, decay=0),
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [45]:
shared_out = shared_layers.predict(x_val, batch_size=batch_size, verbose=1)



In [None]:
np.savez_compressed(file='./data/shared_out.npz', shared_out=shared_out)

In [52]:
[mem.shape for mem in coarse_model.get_weights()[43:55]]

[(256,),
 (3, 3, 256, 1),
 (1, 1, 256, 728),
 (728,),
 (728,),
 (728,),
 (728,),
 (3, 3, 728, 1),
 (1, 1, 728, 728),
 (728,),
 (728,),
 (728,)]

In [None]:
coarse_model.layers[26].input_shape, coarse_model.layers[26].name; T = coarse_model.layers[26]

In [None]:
T.get_config(), T.input.consumers()[0], T.

In [None]:
shared_layers.layers[25].weights

In [None]:
kr.layers.Activation

### Construct Fine Classifiers

**To Be Clear**

All Layers before 26 should be considered the shared layers!
Then I suppose we consider layers 26 - 35 (including 35) to be the coarse classifier
And then the layers afer that would all be copied to fine classifiers... how many parameters in every fine classifier?

20,847,932, which is far to many... What do you want to do? 

In [None]:
def fine_model(course_cat):
    callbacks=[
        TensorBoard(log_dir=log_dir),
        kr.callbacks.LearningRateScheduler(step_decay),
        kr.callbacks.History()]
    batch_size, stop = 32, 4
    # TODO select other group
    start, end = 5 * (course_cat - 1), 5 * course_cat
    label = np.argmax(y_val, axis=1)
    fine_ind = np.where(np.logical_and(label>=start, label<end))[0]
    del label
    fine_y = y_val[:, start:end][fine_ind]
    fine_x = shared_out[fine_ind]
    # The HDCNN paper has the course prediction go into the fine models...
    _course_preds = course_preds[fine_ind, course_cat-1]
    fine_x = np.concatenate(
        (fine_x, 
         np.expand_dims(
             np.apply_along_axis(
                 lambda x: x * course_preds[fine_ind, course_cat-1], 
                 arr=np.ones(shape=(fine_x.shape[0:3])), 
                 axis=0),
            axis=3),), 
        axis=3)
    in_layer = Input(shape=fine_x.shape[1:], dtype='float32', name='fine_input_{0}'.format(course_cat))
    out_fine = Dense(fine_y.shape[1:], activation='softmax')(model.layers[-2].output)
###### Set model.layer[x]'s input tensor to be in_layer
    model_fine = Model(inputs=in_layer, outputs=out_fine)
    model_fine.compile(optimizer= 'adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', 'top_k_accuracy'])
    model_fine.fit(fine_x, fine_y, batch_size=batch_size, initial_epoch=0, 
          validation_data=(fine_x, fine_y), epochs=stop, callbacks=callbacks)
    return model_fine

In [None]:
fine_models = []
for i in coarse_categories:
    fine_models.append(fine_model(i))

### Train Fine Classifiers on Respective Data

In [None]:
for i in range(coarse_categories):
    print("Training Fine Classifier: ", str(i))
    
    index= 0
    step = 2
    stop = 10  # Set to this only for testing purposes, change later
    
    # Get all training data for the coarse category
    ind = np.where([(y_train[:,int(fine2coarse[i,j])]==1) for j in range(int(fine_categories/coarse_categories))])[1]
    y_i = np.array([y_train[j] for j in ind])
    x_i = np.array([x_train[j] for j in ind])
    print(np.shape(y_i))
    print(np.shape(x_i))
    
    # Get all validation data for the coarse category
    indv = np.where([(y_val[:,int(fine2coarse[i,j])]==1) for j in range(int(fine_categories/coarse_categories))])[1]
    y_iv = np.array([y_val[j] for j in indv])
    x_iv = np.array([x_val[j] for j in indv])
    print(np.shape(y_iv))
    print(np.shape(x_iv))
    
    if (np.shape(x_i)[0]>0)&(np.shape(x_iv)[0]>0):
        while index < stop:
            fine_models['models'][i].fit(
                x_i, y_i, batch_size=2, initial_epoch=index, epochs=index+step, validation_data=(x_iv, y_iv))
            index += step
            fine_models['models'][i].save_weights('data/models/model_fine_'+str(i))

## Probabilistic Averaging