In [2]:
# DIOR Train
!pip -q install gdown 
!gdown https://drive.google.com/file/d/1ZHbHDM6hYAEGDC_K5eiW0yF_lzVgpuir/view?usp=share_link --fuzzy --quiet
!unzip -oq JPEGImages-trainval.zip
!rm -f JPEGImages-trainval.zip

In [3]:
# DIOR Test
!gdown https://drive.google.com/file/d/11SXPqcESez9qTn4Z5Q3v35K9hRwO_epr/view?usp=share_link --fuzzy --quiet
!unzip -oq JPEGImages-test.zip
!rm -f JPEGImages-test.zip

In [4]:
# DIOR Annotations
!gdown https://drive.google.com/file/d/1KoQzqR20qvIXDf1qsXCHGxD003IPmXMw/view?usp=share_link --fuzzy --quiet
!unzip -oq Annotations.zip
!rm -f Annotations.zip

# Clean dataset

In [5]:
from glob import glob
import os
import xml.etree.ElementTree as ET

annot_files = glob(os.path.join('Annotations', 'Horizontal Bounding Boxes', '*.xml'))
len(f'Number of annotations: {len(annot_files)}')

28

In [6]:
# Make sure all images have bounding boxes
_segmented = 0
for annot_file in annot_files:
    tree = ET.parse(annot_file)
    root = tree.getroot()
    for child in root:
        if child.tag == 'segmented' and child.text == '0':
            _segmented += 1
assert _segmented == len(annot_files)

In [7]:
# Make sure all images have source = DIOR
for annot_file in annot_files:
    tree = ET.parse(annot_file)
    root = tree.getroot()
    for child in root:
        if child.tag == 'source':
            assert child[0].tag == 'database' and child[0].text == 'DIOR'

In [8]:
# Extract bounding boxes for each file

annot_dict = dict()

def parse_object(obj):
    cls_name = obj.find('name').text.lower()
    bnd_box = obj.find('bndbox')
    xmin = int(bnd_box.find('xmin').text)
    xmax = int(bnd_box.find('xmax').text)
    ymin = int(bnd_box.find('ymin').text)
    ymax = int(bnd_box.find('ymax').text)
    
    assert xmin <= xmax
    assert ymin <= ymax
    
    obj_dict = dict(
        name=cls_name,
        xmin=xmin,
        xmax=xmax,
        ymin=ymin,
        ymax=ymax
    )
    
    return obj_dict

def check_axis(obj):
    return obj['xmax'] - obj['xmin'] >= 120 and obj['ymax'] - obj['ymin'] >= 120

for annot_file in annot_files:
    root = ET.parse(annot_file).getroot()
    fname = root.find('filename').text
    objects = list(map(parse_object, root.findall('object')))
    
    # Filter out small objects 
    objects = list(filter(check_axis, objects))
    if len(objects) > 0:
        annot_dict[fname] = objects

In [9]:
print(f'Train len: {glob(os.path.join("JPEGImages-trainval", "*.jpg")).__len__()}')
print(f'Test len: {glob(os.path.join("JPEGImages-test", "*.jpg")).__len__()}')

Train len: 11725
Test len: 11738


In [10]:
# Find unique classes and their counts
cls_dict = dict()
for _, v in annot_dict.items():
    for obj in v:
        name = obj['name']
        if name not in cls_dict:
            cls_dict[name] = 0
        cls_dict[name] += 1

print(f'Number of unique classes: {len(cls_dict)}')
print(cls_dict)
_cls_dict = dict()
for k, v in cls_dict.items():
    if v >= 50:
        _cls_dict[k] = v
cls_dict = _cls_dict
print()
print(f'Number of unique classes after removing small classes: {len(cls_dict)}')
print(cls_dict)

Number of unique classes: 20
{'baseballfield': 3111, 'storagetank': 1181, 'dam': 752, 'trainstation': 441, 'bridge': 912, 'harbor': 2335, 'basketballcourt': 1172, 'overpass': 1335, 'stadium': 873, 'groundtrackfield': 1392, 'golffield': 978, 'expressway-service-area': 1560, 'airplane': 1320, 'airport': 1029, 'windmill': 166, 'chimney': 1277, 'vehicle': 35, 'ship': 771, 'tenniscourt': 916, 'expressway-toll-station': 305}

Number of unique classes after removing small classes: 19
{'baseballfield': 3111, 'storagetank': 1181, 'dam': 752, 'trainstation': 441, 'bridge': 912, 'harbor': 2335, 'basketballcourt': 1172, 'overpass': 1335, 'stadium': 873, 'groundtrackfield': 1392, 'golffield': 978, 'expressway-service-area': 1560, 'airplane': 1320, 'airport': 1029, 'windmill': 166, 'chimney': 1277, 'ship': 771, 'tenniscourt': 916, 'expressway-toll-station': 305}


In [11]:
# Filter out objects whose class has fewer than 50 members
_annot_dict = dict()
for k, v in annot_dict.items():
    _v = []
    for obj in v:
        name = obj['name']
        if name in cls_dict:
            _v.append(obj)
    if len(_v) > 0:
        _annot_dict[k] = _v
annot_dict = _annot_dict


In [12]:
class_names = sorted(list(cls_dict.keys()))
print(class_names)

image_files = glob(os.path.join('JPEGImages-*', '*.jpg'))
print(f'Number of images: {len(image_files)}')

['airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge', 'chimney', 'dam', 'expressway-service-area', 'expressway-toll-station', 'golffield', 'groundtrackfield', 'harbor', 'overpass', 'ship', 'stadium', 'storagetank', 'tenniscourt', 'trainstation', 'windmill']
Number of images: 23463


In [None]:
# Clean Images
import cv2

root = './'
train_path = os.path.join(root, 'train')
test_path = os.path.join(root, 'test')
if not os.path.exists(train_path):
    os.mkdir(train_path)
if not os.path.exists(test_path):
    os.mkdir(test_path)

train_idx = 0
test_idx = 0

def object_to_data(image_file, obj):
    img = cv2.imread(image_file)
    img = img[obj['xmin']: obj['xmax'], obj['ymin']: obj['ymax']]
    assert obj['name'] in class_names
    target = class_names.index(obj['name'])
    return img, target

train_targets = []
test_targets = []

for image_file in image_files:
    train = os.path.dirname(image_file).endswith('trainval')
    fname = os.path.basename(image_file)
    if fname not in annot_dict:
        continue
    objects = annot_dict[fname]
    for obj in objects:
        data = object_to_data(image_file, obj)
        if train:
            train_targets.append(data[1])
            obj_name = f'{train_idx:0>5}.jpg'
            obj_path = os.path.join(train_path, obj_name)
            cv2.imwrite(obj_path, data[0])
            train_idx += 1
        else:
            test_targets.append(data[1])
            obj_name = f'{test_idx:0>5}.jpg'
            obj_path = os.path.join(test_path, obj_name)
            cv2.imwrite(obj_path, data[0])
            test_idx += 1
        

In [None]:
import pickle

with open('train_targets.pkl', 'wb') as f:
    pickle.dump(train_targets, f)
with open('test_targets.pkl', 'wb') as f:
    pickle.dump(test_targets, f)
with open('class_names.pkl', 'wb') as f:
    pickle.dump(class_names, f)

In [None]:
print(f"Processed train len: {len(glob(os.path.join('train', '*')))}")
print(f"Processed test len: {len(glob(os.path.join('test', '*')))}")

In [None]:
%rm -rf JPEGImages*
%rm -rf Annotations

In [None]:
# Visualize some data points
import numpy as np


def random_sampler(root, targets, size=64):
    rand_idx = np.random.choice(range(len(targets)), size=size, replace=False)
    image_names = [os.path.join(root, f'{idx:0>5}.jpg') for idx in rand_idx]
    images = [cv2.imread(image_name) for image_name in image_names]
    targets = [targets[idx] for idx in rand_idx]
    return images, targets

import matplotlib.pyplot as plt

fig, axs = plt.subplots(4, 4, figsize=(16, 16))

samples = random_sampler('train', train_targets, size=16)
for i in range(4):
    for j in range(4):
        image, target = samples[0][i*4+j], samples[1][i*4+j]
        axs[i, j].imshow(image)
        axs[i, j].title.set_text(class_names[target])
plt.show()

In [None]:
!zip -qr train.zip train train_targets.pkl class_names.pkl
!zip -qr test.zip test test_targets.pkl class_names.pkl


In [None]:
from IPython.display import FileLink
FileLink('train.zip')

In [None]:
FileLink('test.zip')