In [1]:
%matplotlib inline

MIN_KEYPOINTS = 30
N_CLUSTERS = 200

In [2]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import os

In [3]:
def visualize_list_img(imgs):
    """ Visualize list of image in one row """
    fig, axes = plt.subplots(1, len(imgs), figsize=(20, 8 * len(imgs)))
    for img, ax in zip(imgs, axes):
        ax.imshow(img)
    plt.show()

In [4]:
def get_visual_words(img, feature_detector):
    """ Create bags of visual word from img """
    
    keypoints, descriptors = feature_detector.detectAndCompute(img, None)
    if len(keypoints) < MIN_KEYPOINTS:
        return None
    return descriptors

### Find K mean cluster

In [5]:
from sklearn.cluster import KMeans
from pathlib import Path

def load_bov_files(input_folder):
    """ 
    Input:
        input_folder: str: Path to folder you want to bag of visual word
        
    Return: 
        bovs (dict): Mapping from fname to list visual words 
    """
    bovs = {}
    sift = cv2.xfeatures2d.SIFT_create()
    cnt_file, cnt_word = 0, 0
    for filename in Path(input_folder).glob("**/*.jpg"):
        cnt_file += 1
        fname = os.path.join(str(filename.parts[-2]), str(filename.parts[-1]))
        img = cv2.imread(str(filename))
        visual_words = get_visual_words(img, sift)    
        if visual_words is not None:
            bovs[fname] = visual_words
            cnt_word += 1
    print("Number of files that have words: %s / %s" % (cnt_word, cnt_file))
    return bovs

def find_k_mean_clusters(bovs, n_cluster=6):
    """ Find kmean cluster from bovs dictionary """
    arr_bov = []
    for v in bovs.values():
        arr_bov.extend(v)
    
    kmean = KMeans(n_cluster)
    kmean.fit(arr_bov)
    return kmean   

In [6]:
def build_hist(x, n_bins):
    """ Build histogram """
    hist = np.zeros(n_bins)
    for value in x:
        hist[value] += 1
    return hist

In [7]:
input_folder = '/home/qcuong98/Desktop/ReID/attributes/Wheel_Test/'
bovs = load_bov_files(input_folder)

print("Load success")


Number of files that have words: 1616 / 2692
Load success


In [None]:
# kmean = find_k_mean_clusters(bovs, N_CLUSTERS)
# import pickle
# pickle.dump(kmean, open("BoW_pickles/wheel_kmean_200_clusters.pkl", 'wb'))

In [12]:
import pickle
kmean = pickle.load(open("BoW_pickles/wheel_kmean_200_clusters.pkl", 'rb'))

### Compare

In [9]:
def l2_distance(x1, x2):
    return np.sum((x1 - x2) ** 2)

In [10]:
def find_nearest(bovs, kmean, f1):
    rank = [k for k in bovs]
    rank = sorted(rank, key = lambda f2: compare_l2(kmean, bovs[f1], bovs[f2]))
    return rank

In [13]:
hists = {}
for k, v in bovs.items():
    h = kmean.predict(v)
    h = build_hist(h, N_CLUSTERS)
    hists[k] = h

In [15]:
query_folder = '/home/qcuong98/Desktop/ReID/attributes/Wheel_Query/Cropped'
sift = cv2.xfeatures2d.SIFT_create()

sum_dist = np.zeros((1052, 798))
cnt_dist = np.zeros((1052, 798))

reference_path = '/media/qcuong98/BackUp1/Dataset/AIC20_track2_reid/AIC20_track2/AIC20_ReID/test_track.txt'
reference = open(reference_path).readlines()
tracks = np.array([x.strip().split()
                   for x in reference if len(x.strip()) != 0])
tracklet_ids = np.arange(len(tracks))
veh2tracklet_mapping = {v: i for i, x in enumerate(tracks) for v in x}

# from tqdm import tqdm
# pbar = tqdm([filename for filename in Path(query_folder).glob("**/*.jpg")])
pbar = [filename for filename in Path(query_folder).glob("**/*.jpg")]
cnt = 0
for filename in pbar:
    cnt += 1
    if (cnt % 100 == 0):
        print(cnt)
        
    query_id = int(str(filename.parts[-2]))
    img = cv2.imread(str(filename))
    visual_words = get_visual_words(img, sift)  
    if visual_words is not None:
        hist = kmean.predict(visual_words)
        hist = build_hist(hist, N_CLUSTERS)
        for k, v in hists.items():
            gallery_name = k[:6] + ".jpg"
            tracklet_id = veh2tracklet_mapping[gallery_name]
            sum_dist[query_id - 1][tracklet_id] += l2_distance(hist, v)
            cnt_dist[query_id - 1][tracklet_id] += 1
            
# dist_matrix = np.fill((1052, 798), np.nan)

100
200


In [16]:
N_QUERIES = 1052
N_TRACKLETS = 798

dist = np.full((N_QUERIES, N_TRACKLETS), np.nan)
np.divide(sum_dist, cnt_dist, out=dist, where=cnt_dist != 0)

n_nan = np.zeros(N_QUERIES)
for i in range(N_QUERIES):
    for j in range(N_TRACKLETS):
        if not np.isnan(dist[i][j]):
            n_nan[i] += 1

scores = np.full((N_QUERIES, N_TRACKLETS), np.nan)
indices = np.argsort(dist, axis = 1)
for i in range(N_QUERIES):
    for j in range(N_TRACKLETS):
        if not np.isnan(dist[i][indices[i][j]]):
            scores[i][indices[i][j]] = 1 - (j / n_nan[i]) 

In [18]:
print(scores[1][:50])
print(dist[1][:50])

[       nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.05925926        nan        nan 0.54074074        nan        nan
        nan 0.19259259        nan        nan        nan        nan
        nan        nan        nan        nan 0.64444444        nan
        nan        nan 0.77037037        nan        nan        nan
        nan        nan        nan 0.58518519        nan        nan
 0.04444444        nan        nan        nan        nan 0.2
        nan        nan]
[         nan          nan          nan          nan          nan
          nan          nan          nan          nan          nan
          nan          nan 511.6                 nan          nan
 419.5                 nan          nan          nan 458.55555556
          nan          nan          nan          nan          nan
          nan          nan          nan 412.                  nan
          nan          nan 393.33333333          na

In [19]:
np.save("wheel.npy", scores)

In [28]:
import shutil 
gallery_folder = '/media/qcuong98/BackUp1/Dataset/AIC20_track2_reid/AIC20_track2/AIC20_ReID/image_test'

threshold = 0.40

visualize_folder = 'visualize_false_wheels_0.40'

os.mkdir(visualize_folder)
for i in range(N_QUERIES):
    query_id = i + 1
    if (np.all(np.isnan(scores[i]))):
        continue
    d = os.path.join(visualize_folder, str(query_id))
    os.mkdir(d)
    for j in range(N_TRACKLETS):
        if (not np.isnan(scores[i][j])) and scores[i][j] < threshold:
            gallery_id = tracks[j][0]
            shutil.copyfile(os.path.join(gallery_folder, gallery_id), os.path.join(d, gallery_id))