In [1]:
# %load_ext autoreload
# %autoreload 2
# %load_ext jupyternotify
# %autonotify -a 30

import math
import time
import sys
import os
import random
import pickle

import numpy as np

import keras
import keras.layers
import keras.applications
import keras.backend
import keras.preprocessing.image
import keras.utils
import tensorflow as tf

import cv2
import PIL
import PIL.Image
import matplotlib.pyplot as plt

import shutil

from IPython.display import clear_output as clr 
# configurations

## seeding
os.environ['PYTHONHASHSEED'] = '3'
np.random.seed(3)
random.seed(3)
tf.set_random_seed(3)

## which gpu to use
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

## memory allocation
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction = 0.5
session = tf.Session(config=config)
keras.backend.set_session(session)

## data directory for CUB200 root
PATH_DATA_ROOT_CUB200 = "D:\project\commons\CUB_200_2011"

## network configurations
### number of output classes, 200 for CUB200
NO_CLASS = 200



Using TensorFlow backend.


In [2]:
def resize_image(x, size_target=None, flg_keep_aspect=False, rate_scale=1.0, flg_random_scale=False):

    # convert to numpy array
    if not isinstance(x, np.ndarray):
        img = np.asarray(x)
    else:
        img = x

    # calculate resize coefficients
    if len(img.shape) == 4:
        _o, size_height_img, size_width_img, _c , = img.shape
        img = img[0]
    elif len(img.shape) == 3:
        size_height_img, size_width_img, _c , = img.shape

    if len(size_target) == 1:
        size_heigth_target = size_target
        size_width_target = size_target
    if len(size_target) == 2:
        size_heigth_target = size_target[0]
        size_width_target = size_target[1]
    if size_target == None:
        size_heigth_target = size_height_img * rate_scale 
        size_width_target = size_width_img * rate_scale 

    coef_height = 1
    coef_width = 1
    if size_height_img < size_heigth_target :
        coef_height = size_heigth_target / size_height_img
    if size_width_img < size_width_target :
        coef_width = size_width_target / size_width_img

    # calculate coeffieient to match small size to target size
    ## scale coefficient if specified
    low_scale = rate_scale
    if flg_random_scale:
        low_scale = 1.0
    coef_max = max(coef_height, coef_width) * np.random.uniform(low=low_scale, high=rate_scale)

    # resize image
    size_height_resize = math.ceil(size_height_img*coef_max)
    size_width_resize = math.ceil(size_width_img*coef_max)

    # method_interpolation = cv2.INTER_LINEAR
    method_interpolation = cv2.INTER_CUBIC
    # method_interpolation = cv2.INTER_NEAREST

    if flg_keep_aspect:
        img_resized = cv2.resize(
                            img
                            , dsize=(size_width_resize, size_height_resize)
                            , interpolation=method_interpolation
                        )
    else:
        img_resized = cv2.resize(
                            img
                            , dsize=(
                                int(size_width_target*np.random.uniform(low=low_scale, high=rate_scale))
                                ,int(size_heigth_target*np.random.uniform(low=low_scale, high=rate_scale))
                            )
                            , interpolation=method_interpolation
                        )
    return img_resized

def resize_images(images, **kwargs):
    max_images = len(images)
    for i in range(max_images):
        images[i] = resize_image(images[i], **kwargs)
    return images

In [3]:
data = np.load("images.npy")
labels = np.load("combined_labels.npy")

In [4]:
data_resized = resize_images(data, size_target=(224,224), flg_keep_aspect=True)

In [5]:
data_resized.shape

(2423, 224, 224, 3)

## BCNN Model

In [6]:
from keras.initializers import glorot_normal

def outer_product(x):
    """
    calculate outer-products of 2 tensors

        args 
            x
                list of 2 tensors
                , assuming each of which has shape = (size_minibatch, total_pixels, size_filter)
    """
    return keras.backend.batch_dot(
                x[0]
                , x[1]
                , axes=[1,1]
            ) / x[0].get_shape().as_list()[1] 

def signed_sqrt(x):
    """
    calculate element-wise signed square root

        args
            x
                a tensor
    """
    return keras.backend.sign(x) * keras.backend.sqrt(keras.backend.abs(x) + 1e-9)

def L2_norm(x, axis=-1):
    """
    calculate L2-norm

        args 
            x
                a tensor
    """
    return keras.backend.l2_normalize(x, axis=axis)


def build_model(
    size_heigth=224
    ,size_width=224
    ,no_class=36
    ,no_last_layer_backbone=13
    
    ,name_optimizer="sgd"
    ,rate_learning=1.0
    ,rate_decay_learning=0.0
    ,rate_decay_weight=0.0
    
    ,name_initializer="glorot_normal"
    ,name_activation_logits="softmax"
    ,name_loss="categorical_crossentropy"

    ,flg_debug=False
    ,**kwargs
):
    
    keras.backend.clear_session()
    
    print("-------------------------------")
    print("parameters:")
    for key, val in locals().items():
        if not val == None and not key == "kwargs":
            print("\t", key, "=",  val)
    print("-------------------------------")
    
    ### 
    ### load pre-trained model
    ###
    tensor_input = keras.layers.Input(shape=[size_heigth,size_width,3])
    model_detector = keras.applications.vgg16.VGG16(
                            input_tensor=tensor_input
                            , include_top=False
                            , weights='imagenet'
                        )
    

    ### 
    ### bi-linear pooling
    ###

    # extract features from detector
    x_detector = model_detector.layers[no_last_layer_backbone].output
    shape_detector = model_detector.layers[no_last_layer_backbone].output_shape
    if flg_debug:
        print("shape_detector : {}".format(shape_detector))

    # extract features from extractor , same with detector for symmetry DxD model
    shape_extractor = shape_detector
    x_extractor = x_detector
    if flg_debug:
        print("shape_extractor : {}".format(shape_extractor))
        
    
    # rehape to (minibatch_size, total_pixels, filter_size)
    x_detector = keras.layers.Reshape(
            [
                shape_detector[1] * shape_detector[2] , shape_detector[-1]
            ]
        )(x_detector)
    if flg_debug:
        print("x_detector shape after rehsape ops : {}".format(x_detector.shape))
        
    x_extractor = keras.layers.Reshape(
            [
                shape_extractor[1] * shape_extractor[2] , shape_extractor[-1]
            ]
        )(x_extractor)
    if flg_debug:
        print("x_extractor shape after rehsape ops : {}".format(x_extractor.shape))
        
        
    # outer products of features, output shape=(minibatch_size, filter_size_detector*filter_size_extractor)
    x = keras.layers.Lambda(outer_product)(
        [x_detector, x_extractor]
    )
    if flg_debug:
        print("x shape after outer products ops : {}".format(x.shape))
        
        
    # rehape to (minibatch_size, filter_size_detector*filter_size_extractor)
    x = keras.layers.Reshape([shape_detector[-1]*shape_extractor[-1]])(x)
    if flg_debug:
        print("x shape after rehsape ops : {}".format(x.shape))
        
        
    # signed square-root 
    x = keras.layers.Lambda(signed_sqrt)(x)
    if flg_debug:
        print("x shape after signed-square-root ops : {}".format(x.shape))
        
    # L2 normalization
    x = keras.layers.Lambda(L2_norm)(x)
    if flg_debug:
        print("x shape after L2-Normalization ops : {}".format(x.shape))



    ### 
    ### attach FC-Layer
    ###

    if name_initializer != None:
            name_initializer = eval(name_initializer+"()")
            
    x = keras.layers.Dense(
            units=no_class
            ,kernel_regularizer=keras.regularizers.l2(rate_decay_weight)
            ,kernel_initializer=name_initializer
        )(x)
    if flg_debug:
        print("x shape after Dense ops : {}".format(x.shape))
    tensor_prediction = keras.layers.Activation(name_activation_logits)(x)
    if flg_debug:
        print("prediction shape : {}".format(tensor_prediction.shape))

        

    ### 
    ### compile model
    ###
    model_bilinear = keras.models.Model(
                        inputs=[tensor_input]
                        , outputs=[tensor_prediction]
                    )
    
    
    # fix pre-trained weights
    for layer in model_detector.layers:
        layer.trainable = False
        
        
    # define optimizers
    opt_adam = keras.optimizers.adam(
                    lr=rate_learning
                    , decay=rate_decay_learning
                )
    opt_rms = keras.optimizers.RMSprop(
                    lr=rate_learning
                    , decay=rate_decay_learning
                )
    opt_sgd = keras.optimizers.SGD(
                    lr=rate_learning
                    , decay=rate_decay_learning
                    , momentum=0.9
                    , nesterov=False
                )
    optimizers ={
        "adam":opt_adam
        ,"rmsprop":opt_rms
        ,"sgd":opt_sgd
    }
    
    model_bilinear.compile(
        loss=name_loss
        , optimizer=optimizers[name_optimizer]
        , metrics=["categorical_accuracy"]
    )
    
    
    
    if flg_debug:
        model_bilinear.summary()
    
    return model_bilinear

In [7]:
model = build_model(
            # number of output classes, 200 for CUB200
            no_class = 36

            # pretrained model specification, using VGG16
            # "block5_conv3 "
            ,no_last_layer_backbone = 13
    
            # training parametes
            ,rate_learning=1.0
            ,rate_decay_weight=1e-8
    
            ,flg_debug=True
        )

-------------------------------
parameters:
	 flg_debug = True
	 name_loss = categorical_crossentropy
	 name_activation_logits = softmax
	 name_initializer = glorot_normal
	 rate_decay_weight = 1e-08
	 rate_decay_learning = 0.0
	 rate_learning = 1.0
	 name_optimizer = sgd
	 no_last_layer_backbone = 13
	 no_class = 36
	 size_width = 224
	 size_heigth = 224
-------------------------------
shape_detector : (None, 28, 28, 512)
shape_extractor : (None, 28, 28, 512)
x_detector shape after rehsape ops : (?, 784, 512)
x_extractor shape after rehsape ops : (?, 784, 512)
x shape after outer products ops : (?, 512, 512)
x shape after rehsape ops : (?, 262144)
x shape after signed-square-root ops : (?, 262144)
x shape after L2-Normalization ops : (?, 262144)
x shape after Dense ops : (?, 36)
prediction shape : (?, 36)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to      

In [8]:
def train_model(
        model=None
        ,name_model="BCNN_keras"
        ,X_train = None
        ,Y_train = None
        ,X_test = None
        ,Y_test = None
        ,max_epoch=50
    ):
    
    path_model = "./model/{}/".format(name_model)
    if not os.path.exists(path_model):
        os.mkdir(path_model)
        
    now = time.strftime("%Y%m%d%H%M%S", time.localtime())
        
    # callback setting
    callback_logger = keras.callbacks.CSVLogger(
                            path_model +  "log_training_{}.csv".format(now)
                            , separator=','
                            , append=False
                        )
    callack_saver = keras.callbacks.ModelCheckpoint(
                        path_model
                            + "E[{epoch:02d}]"
                            + "_LOS[{val_loss:.3f}]"
                            + "_ACC[{val_categorical_accuracy:.3f}]"
                            + ".hdf5" 
                        , monitor='val_loss'
                        , verbose=0
                        , mode='auto'
                        , period=10
                        , save_best_only=True
                    )
    callback_reducer = keras.callbacks.ReduceLROnPlateau(
                                monitor='val_loss'
                                , factor=0.5
                                , patience=5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
                                , min_lr=1e-6
                                , min_delta=1e-3
                            )
    callback_stopper = keras.callbacks.EarlyStopping(
                            monitor='val_loss'
                            , min_delta=1e-3
                            , patience=10
                            , verbose=0
                            , mode='auto'
                        )
    list_callback = [
        callback_logger
        ,callack_saver
        ,callback_reducer
        ,callback_stopper
    ]
            
    hist = model.fit(
                X_train,
                Y_train
                , epochs=max_epoch
                , validation_data=(X_test, Y_test)
                ,callbacks=list_callback
                ,verbose=1
                ,batch_size = 5
            )
        
    model.save_weights(
        path_model
            + "E[{}]".format(len(hist.history['val_loss']))
            + "_LOS[{:.3f}]".format(hist.history['val_loss'][-1])
            + "_ACC[{:.3f}]".format(hist.history['val_categorical_accuracy'][-1])
            + ".h5" 
    )
    
    return hist                                              

In [9]:
from sklearn.model_selection import train_test_split as tts
import itertools

In [10]:
oh_labels = keras.utils.to_categorical(labels, num_classes = 36)
x_tr, x_ts, y_tr, y_ts = tts(data_resized, oh_labels, test_size = 0.2, stratify = labels)

In [11]:
model.load_weights('bcnnw.h5')

In [12]:
# now all layers are trainable
for layer in model.layers:
    layer.trainable = True

# change LR
opt_sgd = keras.optimizers.SGD(
                lr=1e-3
                , decay=1e-9
                , momentum=0.9
                , nesterov=False
            )
model.compile(
    loss="categorical_crossentropy"
    , optimizer=opt_sgd
    , metrics=["categorical_accuracy"]
)

In [13]:
# hist =train_model(model=model
#             ,X_train = x_tr
#             ,Y_train = y_tr
#             ,X_test = x_ts
#             ,Y_test = y_ts
#             ,max_epoch=1)

In [14]:
def describeSURF(image):
    surf = cv2.xfeatures2d.SURF_create()
    surf.setHessianThreshold(400)
    kp, des = surf.detectAndCompute(image,None)
    return kp,des

def describeSIFT(image):
    sift = cv2.xfeatures2d.SIFT_create()
    kp, des = sift.detectAndCompute(image,None)
    return kp,des

def describeORB(image):
    orb=cv2.ORB_create()
    kp, des=orb.detectAndCompute(image,None)
    return kp,des

def getDescriptors(images,function):
    
    descriptors = []
    
    for im in images:
        
        kp,des = function(im)
        
        if des is not None:
            descriptors.append(des)
            
    descriptors = list(itertools.chain.from_iterable(descriptors))
    descriptors = np.asarray(descriptors)

    return descriptors

def get_kMeansModel(X, k):
    model = KMeans(n_clusters=k,init='k-means++',tol=0.0001,verbose=1).fit(X)
    return model

def get_VLAD(X,model):

    c_vals = model.predict(X)
    centers = model.cluster_centers_
    labels = model.labels_
    k = model.n_clusters
   
    m,d = X.shape
    V=np.zeros([k,d])
    
    for i in range(k):
        if np.sum(c_vals==i)>0:
            V[i]=np.sum(X[c_vals == i,:]-centers[i],axis=0)
            

    V = V.flatten()
    
    V = np.sign(V)*np.sqrt(np.abs(V))

    # L2 normalization
    V = V/np.sqrt(np.dot(V,V))
    
    
    vec = np.zeros(k)
    cv = np.unique(c_vals, return_counts =True)
    vec[cv[0]] = cv[1]
    
    vec = vec/np.sqrt(np.dot(vec,vec))
    vlad_vec = np.concatenate((V, vec))
    
    return vlad_vec

def getVLAD_Descriptors(images,function,k_model):
    descriptors = []

    
    
    for im in images:
        
        dess = []
        kp,des = function(im)
        if des is not None:
            dess.append(des)
        
        dess = list(itertools.chain.from_iterable(dess))
        dess = np.asarray(dess)
        
        description = get_VLAD(dess,k_model)
        
        descriptors.append(description)
    
    return descriptors

In [15]:
import pickle as pkl

In [16]:
with open("k_model_sift.pkl", "rb") as file:
    k_model_sift = pkl.load(file) 

with open("k_model_surf.pkl", "rb") as file:
    k_model_surf = pkl.load(file) 
    
with open("k_model_orb.pkl", "rb") as file:
    k_model_orb = pkl.load(file) 

In [26]:
# desc_sift = getVLAD_Descriptors(x_tr, describeSIFT, k_model_sift)
# desc_orb = getVLAD_Descriptors(x_tr, describeORB, k_model_orb)
# desc_surf = getVLAD_Descriptors(x_tr, describeSURF, k_model_surf)

# desc_net_train = np.concatenate((desc_sift, desc_surf, desc_orb), axis = 1)

In [27]:
# desc_sift = getVLAD_Descriptors(x_ts, describeSIFT, k_model_sift)
# desc_orb = getVLAD_Descriptors(x_ts, describeORB, k_model_orb)
# desc_surf = getVLAD_Descriptors(x_ts, describeSURF, k_model_surf)

# desc_net_test = np.concatenate((desc_sift, desc_surf, desc_orb), axis = 1)

In [22]:
model_mod = model

In [29]:
# pred_train = model_mod.predict(x_tr)
# pred_test = model_mod.predict(x_ts)

In [30]:
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier as MLP

In [31]:
# red = PCA(n_components = 35).fit(desc_net_train)

In [32]:
# final_train_feat = np.concatenate((red.transform(desc_net_train), pred_train), axis = 1)
# final_test_feat = np.concatenate((red.transform(desc_net_test), pred_test), axis = 1)
# final_train_feat.shape, final_test_feat.shape

((1938, 71), (485, 71))

In [33]:
# c_model_c = MLP(hidden_layer_sizes =(1000))
# c_model_c.fit(final_train_feat, np.argmax(y_tr, axis=1))
# c_model_c.score(final_test_feat, np.argmax(y_ts, axis=1))

0.9855670103092784

In [45]:
# # Saving the files
# with open("mlp_model.pkl", 'wb') as file:  
#         pkl.dump(c_model_c, file)

# with open("pca_model.pkl", 'wb') as file:  
#         pkl.dump(red, file)
        

In [23]:
# Loading the files
with open("mlp_model.pkl", 'rb') as file:  
        c_model_c = pkl.load(file)

with open("pca_model.pkl", 'rb') as file:  
        red = pkl.load(file)

model_mod = model

In [24]:
def solve_query(x):
    N =x.shape[0]
    imgra = []

    for i in range(N):
        imgra.append(resize_image(x[i], size_target=(224,224)))
    
    imgr = np.array(imgra)
    
    desc_sift = getVLAD_Descriptors(imgr, describeSIFT, k_model_sift)
    desc_orb = getVLAD_Descriptors(imgr, describeORB, k_model_orb)
    desc_surf = getVLAD_Descriptors(imgr, describeSURF, k_model_surf)

    desc_net = np.concatenate((desc_sift, desc_surf, desc_orb), axis = 1)
    
    pred = model_mod.predict(imgr)
    
    final_feat = np.concatenate((red.transform(desc_net), pred), axis = 1)
    y_hat = c_model_c.predict(final_feat)
    
    return y_hat

In [46]:
bnames = np.load('bnames.npy')

fnames = np.load('fine_labels.npy')

X_query = np.load('images.npy')

Y_query = np.load('combined_labels.npy')

Y_query_b = np.load('broad_labels.npy')

bnames[bnames == 'birds_'] = 'birds'
bnames[bnames == 'dogs_'] = 'dogs'
bnames[bnames == 'flowers_'] = 'flowers'

In [44]:
fine_names = np.load('fine_names2.npy')

In [57]:
def get_output(imagenames , images):
    
    query_in = imagenames.copy().astype(str)
    query_out = []
    preds = solve_query(images)
    
    for i in range(preds.shape[0]):
        pred = preds[i]
        bname = np.unique(bnames[Y_query == pred])[0]
        fname = np.unique(fine_names[Y_query == pred])[0]
        
        var  =  str(query_in[i]) + ' ' + str(bname) +' '+str(bname) +'@'+ str(fname)
        query_out.append(str(var))
    return query_out, preds

In [40]:
out, preds = get_output(bnames[0:1], X_query[0:1])
out

['aircrafts aircrafts aircrafts@0']

In [35]:
np.sum(preds == Y_query)

98

In [60]:
read_folder = "data_test/data"
write_folder = "data_test/preds"

text_file = open(write_folder+'/'+'pred'+".txt", "w")
counter = 0
for img_path in sorted(os.listdir(read_folder)):
    print(img_path)
    img = np.array(cv2.imread(read_folder+'/'+img_path))
    a,b,c = img.shape
    out, preds = get_output(np.array([img_path]), img.reshape(1,a,b,c))
    out_w = out[0]
    print(out_w)
    text_file.write(out_w+"\n")
    print('Remaining', 1212-counter)
    counter+=1
    clr()

text_file.close()

In [43]:
fine_labels2 = np.load("fine_labels2.npy")

In [50]:
comb_labels2 = np.load("combined_labels2.npy")

In [62]:
np.sum(fnames == fine_labels2)/fnames.shape[0]

1.0