In [1]:
import json
import pandas as pd
import os
import glob
import re
from tqdm import tqdm_notebook as tqdm
from PIL import Image
import shutil

In [2]:
DS_FOLDER = '/APL/Datasets/UAVBenchmark/'
GT_FOLDER = os.path.join(DS_FOLDER, 'UAV-benchmark-MOTD_v1.0','GT')
IMG_FOLDER = os.path.join(DS_FOLDER, 'UAV-benchmark-M')
ATTR_FOLDER_TRAIN = os.path.join(DS_FOLDER, 'M_attr', 'train')
ATTR_FOLDER_TEST = os.path.join(DS_FOLDER, 'M_attr', 'test')

OUTPUT_DIR = '/APL/Datasets/UAVBenchmark-COCO-format/'

In [3]:
ground_truth_files = glob.glob(os.path.join(GT_FOLDER, '*_gt_whole.txt'))

In [4]:
train_attribute_files = glob.glob(os.path.join(ATTR_FOLDER_TRAIN, '*.txt'))
test_attribute_files = glob.glob(os.path.join(ATTR_FOLDER_TEST, '*.txt'))

In [5]:
id_getter = re.compile('(?:train|test)\/M([0-9]*)_attr')

In [6]:
train_ids = [id_getter.search(i)[1] for i in train_attribute_files]
test_ids = [id_getter.search(i)[1] for i in test_attribute_files]

In [7]:
train_ids

['0401',
 '0901',
 '1305',
 '0202',
 '0604',
 '0703',
 '0704',
 '1005',
 '1006',
 '1003',
 '0210',
 '0402',
 '1304',
 '0603',
 '0605',
 '0101',
 '1002',
 '0902',
 '0207',
 '0702',
 '1008',
 '1306',
 '0501',
 '1102',
 '0204',
 '1202',
 '0206',
 '1201',
 '0301',
 '0201']

In [8]:
test_ids

['1001',
 '0209',
 '1007',
 '1301',
 '1004',
 '0701',
 '1101',
 '0802',
 '0403',
 '0801',
 '0606',
 '0208',
 '0203',
 '0602',
 '1302',
 '1009',
 '1401',
 '0601',
 '1303',
 '0205']

In [19]:
#SET = 'train'
SET = 'test'

In [20]:
new_data_format = {'info': {'description': 'UAVDT Benchmark',
  'url': 'https://sites.google.com/site/daviddo0323/projects/uavdt',
  'version': '1.0',
  'year': 2019,
  'contributor': 'Dawei Du et. al., this format: Michael Smith @McGill University',
  'date_created': '2019/07/07'},
                  'images': [],
                  'annotations': [],
                  'categories': []}

In [21]:
# Get all image data
if SET == 'train':
    id_list = train_ids
elif SET == 'test':
    id_list = test_ids
    
image_data = []
    
regex = re.compile('img([0-9]*).')

# Prepare folder to copy images to
IMG_DEST_FOLDER = os.path.join(OUTPUT_DIR, SET)
try:
    os.mkdir(IMG_DEST_FOLDER)
except FileExistsError:
    shutil.rmtree(IMG_DEST_FOLDER)
    os.mkdir(IMG_DEST_FOLDER)
    
# Create list of all image files
for ID in tqdm(id_list):
    im_files = glob.glob(os.path.join(IMG_FOLDER, 'M' + ID, '*.jpg'))
    
    for im_file in tqdm(im_files, leave=False):
        im = Image.open(im_file)
        im_size = im.size
        im.close()

        sequence_num = regex.search(os.path.basename(im_file))[1]
        
        new_id = ID + sequence_num
        new_file_name = 'img' + new_id + '.jpg'
        
        shutil.copy2(im_file, os.path.join(IMG_DEST_FOLDER, new_file_name))

        new_data_format['images'].append(
            {'file_name': new_file_name,
             'height': im_size[1],
             'width': im_size[0],
             'id': int(new_id)})

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1859), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1576), HTML(value='')))

HBox(children=(IntProgress(value=0, max=659), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1182), HTML(value='')))

HBox(children=(IntProgress(value=0, max=269), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1308), HTML(value='')))

HBox(children=(IntProgress(value=0, max=864), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1101), HTML(value='')))

HBox(children=(IntProgress(value=0, max=514), HTML(value='')))

HBox(children=(IntProgress(value=0, max=298), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1374), HTML(value='')))

HBox(children=(IntProgress(value=0, max=265), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1007), HTML(value='')))

HBox(children=(IntProgress(value=0, max=480), HTML(value='')))

HBox(children=(IntProgress(value=0, max=719), HTML(value='')))

HBox(children=(IntProgress(value=0, max=604), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1050), HTML(value='')))

HBox(children=(IntProgress(value=0, max=372), HTML(value='')))

HBox(children=(IntProgress(value=0, max=445), HTML(value='')))

HBox(children=(IntProgress(value=0, max=646), HTML(value='')))




In [22]:
# Fill out categories
new_data_format['categories'] = [{'id': 1, 'name': 'car', 'supercategory': None}, {'id': 2, 'name': 'truck', 'supercategory': None}, {'id': 3, 'name': 'bus', 'supercategory': None}]

In [23]:
id_getter_2 = re.compile('GT\/M([0-9]*)_gt')

annot_id = 0

# Get annotations
for file in tqdm(ground_truth_files):
    set_id = id_getter_2.search(file)[1]
    data = pd.read_csv(file, header=None, names=['frame_index','target_id','bbox_left','bbox_top','bbox_width','bbox_height','out-of-view','occlusion','object_category'])
    
    for row in tqdm(data.itertuples(), leave=False):
        
        findex = '{:06d}'.format(row.frame_index)
        
        new_data_format['annotations'].append(
            {'id': annot_id,
             'image_id': int(set_id + findex),
             'category_id': row.object_category,
             'bbox': [row.bbox_left, row.bbox_top, row.bbox_width, row.bbox_height],
             'segmentation': [[row.bbox_left, row.bbox_top, row.bbox_left, row.bbox_top + row.bbox_height, row.bbox_left + row.bbox_width, row.bbox_top + row.bbox_height, row.bbox_left + row.bbox_width, row.bbox_top]],
             'area': row.bbox_width * row.bbox_height,
             'iscrowd': 0})
        annot_id += 1

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [24]:
new_data_format['annotations']

[{'id': 0,
  'image_id': 1302000601,
  'category_id': 1,
  'bbox': [486, 523, 25, 16],
  'segmentation': [[486, 523, 486, 539, 511, 539, 511, 523]],
  'area': 400,
  'iscrowd': 0},
 {'id': 1,
  'image_id': 1302000602,
  'category_id': 1,
  'bbox': [486, 522, 25, 16],
  'segmentation': [[486, 522, 486, 538, 511, 538, 511, 522]],
  'area': 400,
  'iscrowd': 0},
 {'id': 2,
  'image_id': 1302000603,
  'category_id': 1,
  'bbox': [487, 521, 25, 17],
  'segmentation': [[487, 521, 487, 538, 512, 538, 512, 521]],
  'area': 425,
  'iscrowd': 0},
 {'id': 3,
  'image_id': 1302000604,
  'category_id': 1,
  'bbox': [488, 520, 25, 17],
  'segmentation': [[488, 520, 488, 537, 513, 537, 513, 520]],
  'area': 425,
  'iscrowd': 0},
 {'id': 4,
  'image_id': 1302000605,
  'category_id': 1,
  'bbox': [489, 519, 25, 18],
  'segmentation': [[489, 519, 489, 537, 514, 537, 514, 519]],
  'area': 450,
  'iscrowd': 0},
 {'id': 5,
  'image_id': 1302000606,
  'category_id': 1,
  'bbox': [490, 518, 25, 19],
  'segme

In [25]:
new_data_format['images']

[{'file_name': 'img1001001484.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001484},
 {'file_name': 'img1001001610.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001610},
 {'file_name': 'img1001001375.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001375},
 {'file_name': 'img1001001543.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001543},
 {'file_name': 'img1001000848.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001000848},
 {'file_name': 'img1001000680.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001000680},
 {'file_name': 'img1001001116.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001116},
 {'file_name': 'img1001001324.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001324},
 {'file_name': 'img1001001133.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001133},
 {'file_name': 'img1001001058.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001001058},
 {'file_name': 'img1001000615.jpg',
  'height': 540,
  'width': 1024,
  'id': 1001000615},

In [26]:
OUTPUT_JSON = os.path.join(OUTPUT_DIR, SET + '.json')
#Output to file
with open(OUTPUT_JSON, 'w') as f:
    json.dump(new_data_format,f)