In [8]:
import numpy as np
import glob
import os
import xml.etree.ElementTree as ET
from shutil import copyfile, copytree
import os.path as osp
import pandas as pd
import numpy as np
import pickle as pkl

In [10]:
"""Read labels from structured folder"""
files = glob.glob("../aic20_attributes/types/test/*/*.jpg")
img2type = {}
for file in files:
    tm = file.split('/')
    file_name = int(tm[-1].split('.')[0])
    vehtype = int(tm[-2])
    img2type[file_name] = vehtype
pkl.dump(img2type, open("../aic20_attributes/test_types.pkl", "wb"))

In [5]:
"""Read train_label.xml and output: 
    - img2id:  dict[imageID] = vehicleID
    - id2imgs: dict[vehicleId] = [list of imgs id]"""

xml_train_lbl = ET.parse('../aic20_data/origin/train_label.xml', parser=ET.XMLParser(encoding='iso-8859-5'))
root = xml_train_lbl.getroot()
img2id = {}
id2imgs= {}
for child in root.iter("Item"):
    imgId = child.attrib["imageName"].replace(".jpg","")
    vehId = int(child.attrib["vehicleID"])
    img2id[imgId] = vehId
    if vehId not in id2imgs: id2imgs[vehId] = []
    id2imgs[vehId].append(imgId)

In [26]:
"""Generate csv of vehicle type of each training image
    - input: annotated vehicle types with following structure: <vehi_type_id>/<vehi_id>/<vehi_tracklet>
    - output: csv file. each row contains <img_id>, <vehi_type_id> """
anno_paths = glob.glob("../aic20_data/aic20_train_vehicle_types/*/*")
id2vehi_type = {}
for annoPath in anno_paths:
    vehType = annoPath.split('/')[-2]
    vehId   = annoPath.split('/')[-1]
    id2vehi_type[int(vehId)] = vehType

vehi_type = [id2vehi_type[img2id[img]] for img in sorted(img2id.keys())]
df = pd.DataFrame({"img_id":list(sorted(img2id.keys())), "vehicle_type": vehi_type}, columns= ['img_id', 'vehicle_type'])
df.to_csv("../aic20_data/attributes_lbls/train_vehicle_type.csv", index = False, header = True)

In [20]:
"""Convert data split files from vltanh to my format
    - input: csv file. with 2 columns: `camera_id` `vehicle_id` 
    - output: file name .lst"""
inp_f = "reid_gallery_hard.csv"
out_f = "gallery_hard.lst"
df = pd.read_csv(inp_f)
train_tracks2vehicle = {}
selected_tracks = []
for i, cam_id in enumerate(df['camera_id']):
    selected_tracks.append(str(cam_id) + "_"+ str(df['vehicle_id'][i]).zfill(4))
with open(out_f, "w") as fo:
    for child in root.iter("Item"):
        name = child.attrib["imageName"]
        veh_id = child.attrib["vehicleID"]
        cam_id = child.attrib["cameraID"]
        m  = str(cam_id)+ "_" + str(veh_id)
        if m in selected_tracks:
            fo.write(name + "\n")

In [None]:
with open("../AIC20_track2/data/train_track_id.txt") as fi:
    lines = fi.readlines()
train_tracks = {}
for i,track in enumerate(lines):
    train_tracks[i] = track.strip().split(' ')

In [None]:
train_tracks2id = {k:img2id[int(train_tracks[k][0])] for k in train_tracks.keys()}
id2train_tracks = {}
for track in train_tracks2id.keys():
    veh_id = train_tracks2id[track]
    if veh_id not in id2train_tracks:
        id2train_tracks[veh_id] = []
    id2train_tracks[veh_id].append(track)

In [None]:
train_thumbnails = "../AIC20_track2/data/image_train_thumbnails/"
path_train_vehicles   = "../AIC20_track2/train_vehicles"
for veh_id in id2train_tracks.keys():
    veh_id_str = str(veh_id).zfill(3)
    out_path = osp.join(path_train_vehicles,veh_id_str)
    os.makedirs(out_path, exist_ok=True)
    for track in id2train_tracks[veh_id]:
        track_img_name = str(track).zfill(3)+".jpg"
        copyfile(osp.join(train_thumbnails,track_img_name), osp.join(out_path,track_img_name))

In [None]:
df = pd.read_csv("vehicle_id_draft.csv")
train_tracks2vehicle = {}
for i, track in enumerate(df['track']):
    track = int(track)
    veh_id = train_tracks2id[track]
    if (pd.isnull(df['veh_type'][i])):
        draft_lbl = -1
    else:
        draft_lbl = int(df['veh_type'][i])
    if veh_id not in train_tracks2vehicle:
        train_tracks2vehicle[veh_id] = draft_lbl
    if (i > 1170):
        print(i)
        break

In [None]:
vehicle_types_fold = "../AIC20_track2/train_vehicle_types/"
for veh_id in train_tracks2vehicle.keys():
    veh_type = str(train_tracks2vehicle[veh_id])
    veh_id = str(veh_id).zfill(3)
    out_path = osp.join(vehicle_types_fold,veh_type)
    os.makedirs(out_path, exist_ok=True)
    copytree(osp.join(path_train_vehicles,veh_id), osp.join(out_path,veh_id))

In [None]:
print(len(train_tracks2vehicle))
print(len(id2train_tracks))
[t for t in id2train_tracks.keys() if t not in train_tracks2vehicle.keys()]