In [99]:
import os
import os.path as osp
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from src.utils.load_cfg import ConfigLoader
from src.factories import ModelFactory, LossFactory, InferFactory
from src.loaders.base_loader_factory import BaseDataLoaderFactory
from trainer import train
from tester import test
import src.utils.logging as logging
import itertools
from matplotlib import pyplot as plt
import ipdb
import src.config as cfg
from src.inferences.base_infer import BaseInfer
import glob
import xml.etree.ElementTree as ET
from shutil import copyfile, copytree
import pandas as pd
import pickle as pkl
from src.utils.reid_metrics import reid_evaluate

In [4]:
"""Prepeare TEST data loaders"""
dataset_cfg = "configs/dataset_cfgs/aic20_vehicle_reid.yaml"
train_cfg   = "configs/train_cfgs/aic20_t2_trip_onlloss.yaml"
train_params = ConfigLoader.load_train_cfg(train_cfg)
train_params = ConfigLoader.load_train_cfg(train_cfg)

common_loader_params = {'batch_size': train_params['batch_size'],'num_workers': 4,}
dataset_name, dataset_params = ConfigLoader.load_dataset_cfg(dataset_cfg)
loader_fact = BaseDataLoaderFactory(dataset_name, dataset_params, train_params, common_loader_params)
test_loaders = loader_fact.test_loaders()
gal_ld = test_loaders['gallery']
que_ld = test_loaders['query']
"""Prepare GALLERY & QUERY file name"""
que_fname = que_ld.dataset.get_img_names()
gal_fname = gal_ld.dataset.get_img_names()
que_fname = np.array([int(i) for i in que_fname]).astype(np.int32)
gal_fname = np.array([int(i) for i in gal_fname]).astype(np.int32)

./dataset_splits/aic20_vehicle_reid/reid_test.csv
./dataset_splits/aic20_vehicle_reid/reid_query.csv


In [6]:
""" Read test track id file
    - input: txt file. test track id, each row contains all imgid in that tracklet
    - output: 
            - (dict) gal_tracks.test_tracks[i] = np.array(img_id)
            - (dict) galimg2track. queimg2track[img_id] = track_id"""
with open("../aic20_data/origin/test_track_id.txt") as fi:
    lines = fi.readlines()
gal_tracks = {}
galimg2track = {}
for i,track in enumerate(lines):
    gal_tracks[i] = np.array(track.strip().split(' ')).astype(np.int) - 1
    #note that this is now 0 index!!!
    for img_id in gal_tracks[i]:
        galimg2track[img_id] = i

In [7]:
"""Get raw  distance"""
RAW_OUT_DIR = "outputs/aic20_t2_trip_onlloss/"
raw_dist = np.load(osp.join(RAW_OUT_DIR,"dist.npy"))
print(raw_dist.shape)

(1052, 18290)


### Using Re-ID

In [104]:
raw_que_emb = np.load(osp.join(RAW_OUT_DIR,"que_emb.npy"))
raw_gal_emb = np.load(osp.join(RAW_OUT_DIR,"gal_emb.npy"))
tmp = []
for track_id in gal_tracks:
    tmp.append(np.mean(raw_gal_emb[gal_tracks[track_id]],axis = 0)[:,np.newaxis])
avg_gal_emb = np.concatenate(tmp, axis = 1).transpose()
raw_que_emb = torch.from_numpy(raw_que_emb)
avg_gal_emb = torch.from_numpy(avg_gal_emb)
idcs,mAP, cmc, dist_reranked = reid_evaluate(raw_que_emb, avg_gal_emb, \
                np.ones(1052), np.ones(18290), is_reranking = True)
print('Reranked - Validation mAP: %.4f' % mAP)
print('Reranked - Validation cmc (hard): %.4f' % cmc)

Reranking is applied!
using GPU to compute original distance
starting re_ranking


Evaluating: 100% [############################] loss: ---------- Time:  0:00:00


Reranked - Validation mAP: 1.0000
Reranked - Validation cmc (hard): 1.0000


In [107]:
que2track_idcs = np.argsort(dist_reranked, axis = 1)

### QUERY TO TRACKLET

In [23]:
"""calculate "mean" dist between query image and tracklets"""
tmp = []
for track_id in gal_tracks:
    tmp.append(np.mean(raw_dist[:,gal_tracks[track_id]],axis = 1)[:,np.newaxis])
que2track_dist = np.concatenate(tmp, axis = 1)
que2track_idcs = np.argsort(que2track_dist, axis = 1)

### SAVE SUBMISSION FILE

In [24]:
def save_predictions(tracklet_dist, out_file ):
    que2track_idcs = np.argsort(tracklet_dist)
    rows = []
    for idx in range(que2track_idcs.shape[0]):
        tmp = []
        for track in que2track_idcs[idx,:]:
            tmp.append(gal_tracks[track])
        rows.append(np.concatenate(tmp,axis=0)[:,np.newaxis])
    final_idcs = np.concatenate(rows,axis=1).transpose()[:,:100]
    print("Saving submission file")
    out_file  = osp.join("", out_file)
    np.savetxt(out_file, gal_fname[final_idcs], 
            delimiter = " ", fmt = "%d", newline='\n')

In [255]:
save_predictions(que2track_dist,"track2_onlloss_tracklet_2.txt")

Saving submission file


### VEHICLE TYPE ATTRIBUTE

In [108]:
#Load pickle files
gal_veh_type_dict = pkl.load(open("../aic20_attributes/test_types.pkl", "rb"))
que_veh_type_dict = pkl.load(open("../aic20_attributes/query_types.pkl", "rb"))
que_type = np.array([que_veh_type_dict[img_id] for img_id in que_fname])
gal_type = np.array([gal_veh_type_dict[img_id] for img_id in gal_fname])
print(que_type.shape, gal_type.shape)

(1052,) (18290,)


In [109]:
def get_majority(a):
    (values,counts) = np.unique(a,return_counts=True)
    ind=np.argmax(counts)
    return values[ind] 
gal_track_type = []
for track_id in gal_tracks:
    gal_track_type.append(get_majority(gal_type[gal_tracks[track_id]]))
gal_track_type = np.array(gal_track_type)

In [110]:
#Manual check with first tracklet
first_track_type = gal_track_type[que2track_idcs[:,0]]
diff = np.where(first_track_type != que_type)[0]
len(diff)

255

In [111]:
#penalties for vehicle types
vehi_type_penal = (que_type[:, np.newaxis] != gal_track_type) * 1.0
que2track_final_dist = que2track_init_dist + vehi_type_penal

In [112]:
np.argsort(que2track_final_dist).shape

(1052, 798)

In [259]:
save_predictions(que2track_final_dist, "track2_tracklet_vehitype_pen1.0.txt")

Saving submission file


### Read all attributes

In [113]:
#Load pickle files
que_dict = pkl.load(open("../aic20_attributes/aic20_que_imgs_attribs.pkl", "rb"))
gal_dict = pkl.load(open("../aic20_attributes/aic20_gal_imgs_attribs.pkl", "rb"))

def dict2npy(que_key, gal_key):
    que_lb = np.array([que_dict[img_id][que_key]['lbl'] for img_id in que_fname])
    que_sc = np.array([que_dict[img_id][que_key]['scr'] for img_id in que_fname])
    gal_lb = np.array([gal_dict[img_id][gal_key]['lbl'] for img_id in gal_fname])
    gal_sc = np.array([gal_dict[img_id][gal_key]['scr'] for img_id in gal_fname])
    return {"que_lb": que_lb, "que_sc": que_sc, "gal_lb":gal_lb, "gal_sc":gal_sc}

attr = {}
attr["type"] = dict2npy("que_type8", "gal_type8")
attr["type6"] = dict2npy("que_type6", "gal_type6")
attr["view"] = dict2npy("que_view", "gal_view")
attr["wheel"] = dict2npy("que_wheel", "gal_wheel")
attr["top_view"] = dict2npy("que_topview", "gal_topview")

In [114]:
def get_majority(a):
    (values,counts) = np.unique(a,return_counts=True)
    ind=np.argmax(counts)
    return values[ind] 

"""Generate mask with condition que_scr & gal_scr > thresh"""
def apply_threshold(attr_dict, que_thresh, gal_thresh):
    que_scr = attr_dict["que_sc"]
    gal_scr = attr_dict["gal_sc"]
    que_msk = que_scr > que_thresh
    gal_msk = gal_scr > gal_thresh
    return que_msk, gal_msk

In [115]:
def que_gal_match_attrib(attr_dict, que_thresh, gal_thresh):
    gal_lb = attr_dict["gal_lb"].copy()
    que_lb = attr_dict["que_lb"].copy()
    que_msk, gal_msk = apply_threshold(attr_dict, que_thresh, gal_thresh)
    #set all label with scr < thresh to -1
    gal_lb_masked = gal_lb.copy()
    gal_lb_masked[np.logical_not(gal_msk)] = -1
    
    gal_track_label = []
    for track_id in gal_tracks: #for all gallery tracks
        #get lbls of all imgs in track
        lb_of_imgs_in_track = gal_lb_masked[gal_tracks[track_id]]
        #remove all labels = -1
        lb_of_imgs_in_track = lb_of_imgs_in_track[lb_of_imgs_in_track != -1]

        #get majority,
        if len(lb_of_imgs_in_track) > 0:
            track_lb = get_majority(lb_of_imgs_in_track)
        else:
            track_lb = -1
        gal_track_label.append(track_lb)
    #Convert list to npy
    gal_track_label = np.array(gal_track_label)
    #Mask for gallery tracklet labels (!= -1)
    gal_track_mask = gal_track_label != -1
    print("Total reliable tracklets: %d / 798 with %d / 18920 images"
          % (gal_track_mask.sum(), gal_msk.sum()))
    print("Total reliable queries  : %d / 1052" % que_msk.sum())
    #Tracklet prediction != query prediction (true: diff, false: NO diff)
    diff_mask = que_lb[:, np.newaxis] != gal_track_label
#     print(diff_mask.sum()) #Set all unreliable queries to false: NO diff 
    diff_mask[np.logical_not(que_msk),:] = False
#     print(diff_mask.sum()) #Set all unreliable tracklet to false: No diff
    diff_mask[:,np.logical_not(gal_track_mask)] = False
#     print(diff_mask.sum())
    return diff_mask, que_msk, gal_track_mask

type_diff_mask, type_que_msk, type_gal_msk    = \
    que_gal_match_attrib(attr["type"],que_thresh=0.9,gal_thresh=0.9)

topview_diff_mask,  topview_que_msk, topview_gal_msk = \
que_gal_match_attrib(attr["top_view"],que_thresh=0.9,gal_thresh=0.)

wheel_diff_mask,  wheel_que_msk, wheel_gal_msk   = \
que_gal_match_attrib(attr["wheel"],que_thresh=0.6,gal_thresh=0.8)

type6_diff_mask, type6_que_msk, type6_gal_msk    = \
    que_gal_match_attrib(attr["type6"],que_thresh=0.9,gal_thresh=0.9)

Total reliable tracklets: 647 / 798 with 8202 / 18920 images
Total reliable queries  : 420 / 1052
Total reliable tracklets: 760 / 798 with 4319 / 18920 images
Total reliable queries  : 349 / 1052
Total reliable tracklets: 134 / 798 with 608 / 18920 images
Total reliable queries  : 226 / 1052
Total reliable tracklets: 774 / 798 with 16172 / 18920 images
Total reliable queries  : 933 / 1052


In [116]:
#initial distance by triplet-reid
que2track_init_dist = np.zeros((1052,798)) 
trip_w = np.arange(798)
for i in range(que2track_init_dist.shape[0]):
    que2track_init_dist[i,que2track_idcs[i,:]] = trip_w * 1.0

que2track_final_dist = que2track_init_dist
print(que2track_final_dist[0][686]) # -> check if it is 0.0: oh yeah!

#Use vehicle type attribute
que2track_final_dist += type_diff_mask * 10.
que2track_final_dist += topview_diff_mask * 10.
que2track_final_dist += wheel_diff_mask * 10.
que2track_final_dist += type6_diff_mask * 10.

#
text_dict = pkl.load(open("../aic20_attributes/aic20_scencetext_attribs.pkl", "rb"))
scence_text_scr = np.zeros((1052,798))
for img_id in text_dict:
    print(img_id, text_dict[img_id])
    for selected_idcs in text_dict[img_id].values():
        scence_text_scr[img_id - 1, np.array(selected_idcs)] = 1

que2track_final_dist += scence_text_scr * -50.

wheel_cuong = np.load("../aic20_attributes/wheel.npy")
where_are_NaNs = np.isnan(wheel_cuong)
wheel_cuong[where_are_NaNs] = 10.0
wheel_cuong_msk = wheel_cuong < 0.5
# (wheel_cuong_msk == False).sum()
que2track_final_dist += wheel_cuong_msk * 10.0

0.0
242 {'intermodal': [283, 283]}
328 {'wrestoration': [57, 57, 356]}
350 {'worrywhe': [217], '400': [185, 683, 217, 508, 217, 771, 758, 185, 185, 683, 217, 508, 217, 771, 758, 185]}
352 {'wrestoration': [57, 57, 356], '563543': [57, 57], '4887': [57]}
399 {'homecare': [260, 96]}
444 {'uctric': [484]}
647 {'268': [565, 536, 565, 553, 175, 536, 565]}
827 {'apless': [161]}
901 {'4954444': [332, 332], '563': [185, 683, 508, 771, 57, 57, 758, 185]}
916 {'2659': [166, 553]}


In [85]:
save_predictions(que2track_final_dist, "t2_track_0.9_topv10.0_type10.0_q0.6g0.8_wheel10.0_type6_10.0_scencetext_wheel_Cuong.txt")

Saving submission file


In [117]:
save_predictions(que2track_final_dist, "t2_reranked_track_0.9_topv10.0_type10.0_q0.6g0.8_wheel10.0_type6_10.0_scencetext_wheel_Cuong.txt")

Saving submission file
