# Hierarchically Deep Convolutional Neural Network For Image Recognition

## Setup and Imports

**Import Packages**

In [None]:
import keras as kr
import numpy as np
import tensorflow as tf

from keras.datasets import cifar100

from sklearn.model_selection import train_test_split

from random import randint
import time

**Define Global Variables**

In [None]:
# The number of coarse categories
coarse_categories = 20

# The number of fine categories
fine_categories = 100

# The threshold percentage in thresholding layer
sigma = .01

## Import and Preprocess Dataset

**Import Cifar100 Data Set**

In [None]:
(X, y_c), (x_test, y_c_test) = cifar100.load_data(label_mode='coarse')
(X, y), (x_test, y_test) = cifar100.load_data(label_mode='fine')

In [None]:
# Partition data into only 10 fine categories and 2 coarse categories
index = np.where((y_c[:,0] == 0) | (y_c[:,0] == 1))[0]
y = np.array([y[j] for j in index])
y_c = np.array([y_c[j] for j in index])
X = np.array([X[j] for j in index])

index = np.where((y_c_test[:,0] == 0) | (y_c_test[:,0] == 1))[0]
y_test = np.array([y_test[j] for j in index])
y_c_test = np.array([y_c_test[j] for j in index])
x_test = np.array([x_test[j] for j in index])

In [None]:
# The new number of coarse categories
coarse_categories = 2

# The new number of fine categories
fine_categories = 10

**Fine-To-Coarse Mapping**

(Ideally, this would be done through spectral clustering as opposed to hard-coding)

In [None]:
fine2coarse = np.zeros((coarse_categories,int(fine_categories/coarse_categories)))
for i in range(coarse_categories):
    index = np.where(y_c[:,0] == i)[0]
    fine_cat = np.unique([y[j,0] for j in index])
    fine2coarse[i] = fine_cat  
y_c = 0; # Clear y_c in interest of saving mem
y_c_test=0;

In [None]:
#X = X[:10]
#y = y[:10]
## This is just for testing purposes, as is the n_values = 100 in the cell below

In [None]:
################################################################################
#    Title: One Hot Encoding
################################################################################
#    Description: 
#        This function extends a matrix to one-hot encoding
#    
#    Parameters:
#        y    Array of label values
# 
#    Returns:
#        y_new    One hot encoded array of labels
################################################################################
def one_hot(y):
    n_values = 100#np.max(y) + 1
    y_new = np.eye(n_values)[y[:,0]]
    return y_new

In [None]:
y=one_hot(y)
y_test=one_hot(y_test)
print(np.shape(y))

**Apply ZCA Whitening**

In [None]:
# Center data
X = X - 128
x_train = x_train - 128

In [None]:
################################################################################
#    Title: ZCA
################################################################################
#    Description: 
#        This function applies ZCA Whitening to the image set
#    
#    Parameters:
#        x_1           Array of MxNxC images to compute the ZCA Whitening
#        x_2           Array of MxNxC images to apply the ZCA transform
#        num_batch    Number of batches to do the computation
# 
#    Returns:
#        An array of MxNxC zca whitened images
################################################################################
def zca(x_1, x_2, epsilon=1e-5):
        
    with tf.name_scope('ZCA'):
        
        x1 = tf.placeholder(tf.float64, shape=np.shape(x_1), name='placeholder_x1')
        x2 = tf.placeholder(tf.float64, shape=np.shape(x_2), name='placeholder_x2')
        
        flatx = tf.cast(tf.reshape(x1, (-1, np.prod(x_1.shape[-3:])),name="reshape_flat"),tf.float64,name="flatx")
        sigma = tf.tensordot(tf.transpose(flatx),flatx, 1,name="sigma") / tf.cast(tf.shape(flatx)[0],tf.float64) ### N-1 or N?
        s, u, v = tf.svd(sigma,name="svd")
        pc = tf.tensordot(tf.tensordot(u,tf.diag(1. / tf.sqrt(s+epsilon)),1,name="inner_dot"),tf.transpose(u),1, name="pc")
        
        net1 = tf.tensordot(flatx, pc,1,name="whiten1")
        net1 = tf.reshape(net1,np.shape(x_1), name="output1")
        
        flatx2 = tf.cast(tf.reshape(x2, (-1, np.prod(x_2.shape[-3:])),name="reshape_flat2"),tf.float64,name="flatx2")
        net2 = tf.tensordot(flatx2, pc,1,name="whiten2")
        net2 = tf.reshape(net2,np.shape(x_2), name="output2")
        
    with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            x_1,x_2 = sess.run([net1,net2], feed_dict={x1: x_1, x2: x_2})    
    return x_1,x_2

In [None]:
time1 = time.time()
X,x_test = zca(X,x_test)
time2 = time.time()
print('Time Elapsed - ZCA Whitening: '+str(time2-time1));

**Resize Images to be compatible with VGG16**

In [None]:
################################################################################
#    Title: Preprocess Images
################################################################################
#    Description: 
#        This function resizes 32x32x3 images to 128x128x3 by adding padding
#    
#    Parameters:
#        X            Array of 32x32x3 Images
#        num_batch    Number of batches to do the computation
# 
#    Returns:
#        A 224x224 set of images
################################################################################
def resize(X, num_batch=1):
    l = len(X)
    d=int(l/num_batch)
    X_new = np.zeros((l,224,224,3)).astype(np.int8)
    ind = 0
    for i in range (num_batch):
        end = ind+d
        if i==num_batch-1:
            end = l;
        x_batch = X[ind:end]
        net = tf.image.resize_images(x_batch, size=(256,256))
        net = tf.random_crop(net, ((end-ind),224,224,3))
        with tf.Session() as sess:
                X_new[ind:end] = sess.run(net)
        ind = ind + d
    return X_new

In [None]:
time1 = time.time()
X = resize(X,10)
#x_test = resize(x_test) #Commented only for testing purposes
time2 = time.time()
print('Time Elapsed - Resizing: '+str(time2-time1));

**Split Training set into Training and Validation sets**

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=.1, random_state=0)

## Coarse Training

**Import VGG16 Pretrained on Imagenet**

In [None]:
from keras.applications.vgg16 import VGG16

Citation credit for VGG16 model to:

Simonyan, Karen, and Andrew Zisserman. “Very Deep Convolutional Networks for Large-Scale Image Recognition.” [1409.1556] Very Deep Convolutional Networks for Large-Scale Image Recognition, 10 Apr. 2015, arxiv.org/abs/1409.1556.

In [None]:
from keras.layers import Input, Dense, Conv3D
from keras.models import Model
in_layer = Input(shape=(224, 224, 3), dtype='float32', name='main_input')
model = VGG16(include_top=True, weights='imagenet', input_tensor=in_layer, input_shape=(224, 224, 3))

**Modify Model for Cifar100**

In [None]:
out_coarse = Dense(100, activation='softmax')(model.layers[-2].output)
model = Model(inputs=in_layer,outputs=out_coarse)
model.compile(optimizer= 'adam', loss='categorical_crossentropy', metrics=['accuracy'])

**Train Model**

In [None]:
index= 0
step = 2
stop = 10

while index < stop:
    model.fit(x_train, y_train, batch_size=32, initial_epoch=index, epochs=index+step, validation_data=(x_val, y_val))
    index += step
    model.save_weights('data/models/model_coarse')

## Fine-Tuning

### Load Most Recent Model

In [None]:
model.load_weights('data/models/model_coarse_'+str(stop))

### Construct Fine Classifiers

In [None]:
trainable_index = 17

for i in range(len(model.layers)):
    if i<trainable_index:
        model.layers[i].trainable=False

In [None]:
def fine_model():
    out_fine = Dense(fine_categories, activation='softmax')(model.layers[-2].output)
    model_fine = Model(inputs=in_layer,outputs=out_fine)
    model_fine.compile(optimizer= 'adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model_fine

In [None]:
fine_models = {'models' : [{} for i in range(coarse_categories)]}
for i in range(coarse_categories):
    model_i = fine_model()
    fine_models['models'][i] = model_i

### Train Fine Classifiers on Respective Data

In [None]:
for i in range(coarse_categories):
    print("Training Fine Classifier: ", str(i))
    
    index= 0
    step = 2
    stop = 10  # Set to this only for testing purposes, change later
    
    # Get all training data for the coarse category
    ind = np.where([(y_train[:,int(fine2coarse[i,j])]==1) for j in range(int(fine_categories/coarse_categories))])[1]
    y_i = np.array([y_train[j] for j in ind])
    x_i = np.array([x_train[j] for j in ind])
    print(np.shape(y_i))
    print(np.shape(x_i))
    
    # Get all validation data for the coarse category
    indv = np.where([(y_val[:,int(fine2coarse[i,j])]==1) for j in range(int(fine_categories/coarse_categories))])[1]
    y_iv = np.array([y_val[j] for j in indv])
    x_iv = np.array([x_val[j] for j in indv])
    print(np.shape(y_iv))
    print(np.shape(x_iv))
    
    if (np.shape(x_i)[0]>0)&(np.shape(x_iv)[0]>0):
        while index < stop:
            fine_models['models'][i].fit(x_i, y_i, batch_size=2, initial_epoch=index, epochs=index+step, validation_data=(x_iv, y_iv))
            index += step
            fine_models['models'][i].save_weights('data/models/model_fine_'+str(i))

## Probabilistic Averaging