In [1]:
#!/usr/bin/env python3
from argparse import ArgumentParser, FileType
from importlib import import_module
from itertools import count
import os

import h5py
import json
import numpy as np
from sklearn.metrics import average_precision_score
from shutil import rmtree
import tensorflow as tf

import common
import loss

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# The GPU id to use, usually either "0" or "1"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
EXP_ID ="190319_triplet-reid_pre-trained_densenet161_veri+small_512/"
TEST_QUERY = False
if (TEST_QUERY):
    result_folder = "/home/hthieu/AICityChallenge2019/val_results_temp/"
else:
    result_folder = "/home/hthieu/AICityChallenge2019/val_results/"

EXP_DIR= os.path.join("/home/hthieu/AICityChallenge2019/track2_experiments/", EXP_ID)
batch_size = 128

In [2]:
#VALIDATE
query_dataset="data/track2_validate_query_v3.csv"
gallery_dataset="data/track2_validate_v3.csv"
query_embeddings=os.path.join(EXP_DIR,"track2_validate_query_embedding.h5")
gallery_embeddings=os.path.join(EXP_DIR,"track2_validate_embedding.h5")

In [9]:
#TEST
query_dataset="data/track2_query.csv"
gallery_dataset="data/track2_test_v3.csv"
query_embeddings=os.path.join(EXP_DIR,"track2_query_embedding.h5")
gallery_embeddings=os.path.join(EXP_DIR,"track2_test_embedding.h5")

In [2]:
#CUSTOM EMBED
query_dataset="data/track2_test_best_imgs_que.csv"
gallery_dataset="data/track2_test_best_imgs.csv"
query_embeddings=os.path.join(EXP_DIR,"track2_best_imgs_que_embedding.h5")
gallery_embeddings=os.path.join(EXP_DIR,"track2_best_imgs_embedding.h5")

In [3]:
query_pids, query_fids, query_views = common.load_dataset(query_dataset, None)
gallery_pids, gallery_fids, gallery_views = common.load_dataset(gallery_dataset, None)
gallery_views = gallery_views.astype(int)
query_views = query_views.astype(int)

with h5py.File(query_embeddings, 'r') as f_query:
    query_embs = np.array(f_query['emb'])

with h5py.File(gallery_embeddings, 'r') as f_gallery:
    gallery_embs = np.array(f_gallery['emb'])

query_dim = query_embs.shape[1]
gallery_dim = gallery_embs.shape[1]
if query_dim != gallery_dim:
    raise ValueError('Shape mismatch between query ({}) and gallery ({}) '
                     'dimension'.format(query_dim, gallery_dim))


In [4]:
metric = 'euclidean'
batch_pids, batch_fids, batch_embs = tf.data.Dataset.from_tensor_slices(
        (query_pids, query_fids, query_embs)
    ).batch(batch_size).make_one_shot_iterator().get_next()

batch_distances = loss.cdist(batch_embs, gallery_embs, metric=metric)

In [5]:
#################
#Evaluate     :# 
#################
def calculate_ap(fid, pid_match, score):
    val_top = np.argsort(score)[-100:][::-1]
    ap = average_precision_score(pid_match[val_top], score[val_top])
    try:
        k = np.where(pid_match[val_top])[0][0]
    except:
        print("Wrong!")
        k = 100
        ap = 0.0
    if np.isnan(ap):
        print()
        print("WARNING: encountered an AP of NaN!")
        print("This usually means a person only appears once.")
        print("In this case, it's because of {}.".format(fid))
        print("I'm excluding this person from eval and carrying on.")
        print()
    return ap, k

#################
#Save log files:# 
#################
def save_test_img_index(result_folder,ques,aps):
    with open(os.path.join(result_folder,"index.csv"), "w") as fo:
        for i in range(len(aps)):
            fo.write("{},{:.5f}\n".format(ques[i],aps[i]))
    return

def save_predict_results(result_folder,score, pid, pid_match):
    #Missing images out of top 100:
    val_top = np.argsort(score)[-100:][::-1]
    all_imgs = np.argwhere(gallery_pids == pid)
    found_imgs = val_top
    missing_mask = np.isin(all_imgs,found_imgs, invert=True)
    missing_imgs = all_imgs[missing_mask[:,0]][:,0]
    with open(os.path.join(result_folder, fids[i].replace('.jpg','.txt')), "w") as fo:
        for x in found_imgs:
            fo.write("{:s},{:5f},{},{}\n".format(gallery_fids[x].replace('Track2Data',''),
                                                 score[x],
                                                 pid_match[x],
                                                 gallery_views[x]))
        for x in missing_imgs:
            fo.write("{:s},{:5f},{},{}\n".format(gallery_fids[x].replace('Track2Data',''),
                                                 score[x],
                                                 pid_match[x],
                                                 gallery_views[x]))
        fo.close()

In [6]:
################
# Query   area #
################
def tf_argsort_des(inp_tensor):
    return tf.contrib.framework.argsort(inp_tensor,direction='DESCENDING')

def tf_count_view_freq(input_views, view_id_max):
    tf_cal_row_freq = lambda x : tf.histogram_fixed_width(x, (0,view_id_max),view_id_max + 1)
    input_views_freq = tf.map_fn(tf_cal_row_freq,input_views)
    input_views_freq_argsorted = tf.map_fn(tf_argsort_des,input_views_freq)
    return input_views_freq_argsorted

def tf_get_top1_view(batch_distances):
    tensor_gal_view = tf.convert_to_tensor(gallery_views, dtype=tf.int32)
    gal_view_id_max = np.max(gallery_views)

    dis_agr_sorted = tf.contrib.framework.argsort(batch_distances)
    dis_agr_sorted_view = tf.gather(tensor_gal_view,dis_agr_sorted)[:,:10]
    return tf_count_view_freq(dis_agr_sorted_view, gal_view_id_max)[:,0]

def tf_query_imgs(ins):    
    ins_distances = loss.cdist(ins, gallery_embs, metric = metric)
    return tf.reduce_mean(ins_distances, axis = 0)

def tf_query_img_in_same_view(view_mask, tensor_gal_embs):
    view_mask = tf.transpose(view_mask)
    view_mask = tf.cast(view_mask,tf.bool)
    ins = tf.boolean_mask(tensor_gal_embs,view_mask)
    return tf.cast(tf_query_imgs(ins),tf.float32)

def re_ranking_v2(top_1_view, predict_score): 
    #Get the top 1 view_id
    imp_view = [top_1_view]
    #Set images in important views:
    for j in range(len(imp_view)):
        imp_view_imgs = np.argwhere(gallery_views == imp_view[j])
        predict_score[imp_view_imgs] = 1.0
    return predict_score

def tf_query_extention(distance):
    top1_view = tf_get_top1_view(batch_distances)
            
    tensor_gal_view = tf.convert_to_tensor(gallery_views, dtype=tf.int32)
    tensor_gal_embs = tf.convert_to_tensor(gallery_embs)

    tmp = tf.ones((tf.size(top1_view),tensor_gal_view.shape[0]), dtype=tf.int32)
    tmp = tf.multiply(tmp, top1_view[:,None])
    tmp = tf.equal(tmp, tensor_gal_view)
    tmp = tf.cast(tmp,tf.float32)
    lmb_que_ext = lambda x: tf_query_img_in_same_view(x,tensor_gal_embs)
    return tf.map_fn(lmb_que_ext,tmp)

In [7]:
################
# Query area #
################
aps = []
ques = []
cmc = np.zeros(len(gallery_pids), dtype=np.int32)
gallery_views_id, gallery_views_count = np.unique(gallery_views, return_counts=True)


with tf.Session() as sess:
    for start_idx in count(step=batch_size):
        try: 
            top1_view = tf_get_top1_view(batch_distances)
            que_ext_re_ranking = tf_query_extention(batch_distances)
            top1_views, distances, pids, fids = sess.run([top1_view, que_ext_re_ranking, batch_pids, batch_fids])
            print('\rCalculating batch {}-{}/{}'.format( start_idx, start_idx + len(fids), len(query_fids)), flush=True, end='')
    
        except tf.errors.OutOfRangeError:
            print()  # Done!
            break
        
        pids, fids = np.array(pids, '|U'), np.array(fids, '|U')
        pid_matches = gallery_pids[None] == pids[:,None]
        scores = 1 / (1 + distances)
        
        for i in range(len(distances)):
            fid = fids[i]
            pid = pids[i]
            pid_match = pid_matches[i,:]
            score = scores[i]
            top1_view = top1_views[i]
            score = re_ranking_v2(top1_view, score)
            #Save predict results:
            save_predict_results(result_folder,score, pid, pid_match)
            
            #Calculate AP:
            ap, k = calculate_ap(fid, pid_match, score)
            cmc[k:] += 1
            aps.append(ap)
            ques.append(fid)
    
    # Save index.csv
    save_test_img_index(result_folder,ques,aps)  
    
    # Compute the actual cmc and mAP values
    cmc = cmc / len(query_pids)
    mean_ap = np.mean(aps)
    print('mAP: {:.2%} | top-1: {:.2%} top-2: {:.2%} | top-5: {:.2%} | top-10: {:.2%}'.format(
        mean_ap, cmc[0], cmc[1], cmc[4], cmc[9]))

Calculating batch 256-384/1000Wrong!


  recall = tps / tps[-1]


Calculating batch 384-512/1000Wrong!
Calculating batch 512-640/1000Wrong!
Wrong!
Calculating batch 640-768/1000Wrong!
Calculating batch 768-896/1000Wrong!
Wrong!
Calculating batch 896-1000/1000
mAP: 91.10% | top-1: 97.80% top-2: 97.80% | top-5: 97.80% | top-10: 97.80%


In [None]:
####################################################################
# Out of dated #
################
# Return the top 1 view (same as query image's view in most cases)
def get_top_1_view(predict_score):
    #Calculate the view_id frequency of the top 10 images with the highest scores
    top10 = np.argsort(predict_score)[-10:]
    view_id, view_count = np.unique(gallery_views[top10], return_counts=True)
    return view_id[np.argsort(view_count)[-1:]][0]
    
def get_top_100_view_analize():
     #Get views info of the top 100 images:
    top100 = np.argsort(predict_score)[-100:][::-1]
    view100_id, view100_count = np.unique(gallery_views[top100], return_counts=True)
    view100_per = np.zeros(len(view100_id))
    
    with open(os.path.join(result_folder, fids[i].replace('.jpg','_views.txt')), "w") as fo:
        for j, x in enumerate(np.argsort(view100_count)):
            view_count = gallery_views_count[np.where(gallery_views_id == view100_id[x])[0][0]]
            view100_per[j] = view100_count[x] / view_count
            fo.write("{},{},{},{:.5f}\n".format(view100_id[x], 
                                         view100_count[x],
                                         view_count,
                                         view100_per[j]))
        fo.close()
    view100_id = view100_id[np.argsort(view100_count)]
#                 tmp = view100_id[np.argsort(view100_per)][-2:-1]
#                 imp_view = np.concatenate([imp_view,tmp])

    imgs_same_view = np.argwhere(gallery_views == query_views[0])
    max_diff_id = imgs_same_view[np.argmax(distances[i,imgs_same_view])]
    
def re_ranking(predict_score): 
    #Get the top 1 view_id
    imp_view = [get_top_1_view(predict_score)]
    #Set images in important views:
    for j in range(len(imp_view)):
        imp_view_imgs = np.argwhere(gallery_views == imp_view[j])
        predict_score[imp_view_imgs] = 1.0
    return predict_score

#####################
#My query extention:# 
#####################
def query_imgs(imgId):    
    ins = tf.convert_to_tensor(gallery_embs[imgId,:])
    ins_distances = loss.cdist(ins, gallery_embs, metric = metric)
    return ins_distances

In [None]:
################
# Out of dated #
################
aps = []
ques = []
cmc = np.zeros(len(gallery_pids), dtype=np.int32)
gallery_views_id, gallery_views_count = np.unique(gallery_views, return_counts=True)

#clear existing results
if (os.path.exists(result_folder)):
    rmtree(result_folder)
os.makedirs(result_folder)

with tf.Session() as sess:
    for start_idx in range(1):#count(step=batch_size):
        try:
            distances, pids, fids = sess.run([batch_distances, batch_pids, batch_fids])
            print('\rCalculating batch {}-{}/{}'.format( start_idx, start_idx + len(fids), len(query_fids)), flush=True, end='')
    
        except tf.errors.OutOfRangeError:
            print()  # Done!
            break
            
        pids, fids = np.array(pids, '|U'), np.array(fids, '|U')
        pid_matches = gallery_pids[None] == pids[:,None]
        scores = 1 / (1 + distances)
        
        for i in range(1):#range(len(distances)):
            fid = fids[i]
            pid = pids[i]
            pid_match = pid_matches[i,:]
            score = scores[i]
            #My re-ranking solution:
#             score = re_ranking(score)
            
            #My query extention:
            top1_view = get_top_1_view(score)
            top1_view_imgs = np.argwhere(gallery_views == top1_view)[:,0]
            tmp = sess.run(query_imgs(top1_view_imgs))
            tmp = np.average(tmp,axis=0)
            tmp = 1 / (1 + tmp)
            score = tmp
            print(score)
            #My re-ranking solution:
            score = re_ranking(score)
            
            #Save predict results:
            save_predict_results(result_folder,score, pid, pid_match)
            
            #Calculate AP:
            ap, k = calculate_ap(fid, pid_match, score)
            print(ap)
            cmc[k:] += 1
            aps.append(ap)
            ques.append(fid)
            
            

    # Save index.csv
    save_test_img_index(result_folder,ques,aps)  
    # Compute the actual cmc and mAP values
    cmc = cmc / len(query_pids)
    mean_ap = np.mean(aps)
    print('mAP: {:.2%} | top-1: {:.2%} top-2: {:.2%} | top-5: {:.2%} | top-10: {:.2%}'.format(
        mean_ap, cmc[0], cmc[1], cmc[4], cmc[9]))