In [1]:
from pathlib import Path
from tqdm import tqdm
import shutil
import xml.etree.ElementTree as ET
import yaml

In [2]:
cur_path = Path('.')

In [3]:
cur_path.absolute()

PosixPath('/home/p/datasets/VOC')

In [4]:
images_path = cur_path / 'images'
labels_path = cur_path / 'labels'

In [5]:
images_train2012 = images_path / 'train2012'
images_val2012 = images_path / 'val2012'

images_train2007 = images_path / 'train2007'
images_val2007 = images_path / 'val2007'
images_test2007 = images_path / 'test2007'

images_train2012.mkdir(parents=True, exist_ok=True)
images_val2012.mkdir(parents=True, exist_ok=True)

images_train2007.mkdir(parents=True, exist_ok=True)
images_val2007.mkdir(parents=True, exist_ok=True)
images_test2007.mkdir(parents=True, exist_ok=True)

In [6]:
labels_train2012 = labels_path / 'train2012'
labels_val2012 = labels_path / 'val2012'

labels_train2007 = labels_path / 'train2007'
labels_val2007 = labels_path / 'val2007'
labels_test2007 = labels_path / 'test2007'

labels_train2012.mkdir(parents=True, exist_ok=True)
labels_val2012.mkdir(parents=True, exist_ok=True)

labels_train2007.mkdir(parents=True, exist_ok=True)
labels_val2007.mkdir(parents=True, exist_ok=True)
labels_test2007.mkdir(parents=True, exist_ok=True)

In [7]:
txt_paths = ['VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt',
        
        'VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt',
        'VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt',
        
        'VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Main/train.txt',
        'VOCtrainval_11-May-2012/VOCdevkit/VOC2012/ImageSets/Main/val.txt']

image_paths = ['VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages',
               
               'VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages',
               
               'VOCtrainval_11-May-2012/VOCdevkit/VOC2012/JPEGImages']

annotations = ['VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations',
               
               'VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007//Annotations',
               
               'VOCtrainval_11-May-2012/VOCdevkit/VOC2012/Annotations']

In [8]:
voc2007 = {'txt_paths': {'test': txt_paths[0], 'train': txt_paths[1], 'val': txt_paths[2]},
          'image_paths': {'test': image_paths[0], 'train': image_paths[1], 'val': image_paths[1]},
          'dest_path_img': {'test': images_test2007, 'train':images_train2007, 'val': images_val2007},
          'dest_path_txt': {'test': labels_test2007, 'train':labels_train2007, 'val': labels_val2007},
          'annotations': {'test': annotations[0], 'train':annotations[1], 'val': annotations[1]}}

In [9]:
voc2012 = {'txt_paths': {'test': '', 'train': txt_paths[3], 'val': txt_paths[4]},
          'image_paths': {'test': '', 'train': image_paths[2], 'val': image_paths[2]},
          'dest_path_img': {'test': '', 'train':images_train2012, 'val': images_val2012},
          'dest_path_txt': {'test': '', 'train':labels_train2012, 'val': labels_val2012},
          'annotations': {'test': '', 'train':annotations[2], 'val': annotations[2]}}

In [10]:
voc = {'2007': voc2007, '2012': voc2012}

In [11]:
missions = []
for year in ['2007', '2012']:
    for task in ['test', 'train', 'val']:
        if year == '2012' and task == 'test':
            continue
        print(year, task)
        missions.append((year, task))

2007 test
2007 train
2007 val
2012 train
2012 val


In [12]:
with open('classes.yaml', encoding='utf-8') as f:
    classes = yaml.safe_load(f)

In [13]:
classes_new = {}
for k, v in classes.items():
    classes_new[v] = k

In [14]:
classes_new

{'aeroplane': 0,
 'bicycle': 1,
 'bird': 2,
 'boat': 3,
 'bottle': 4,
 'bus': 5,
 'car': 6,
 'cat': 7,
 'chair': 8,
 'cow': 9,
 'diningtable': 10,
 'dog': 11,
 'horse': 12,
 'motorbike': 13,
 'person': 14,
 'pottedplant': 15,
 'sheep': 16,
 'sofa': 17,
 'train': 18,
 'tvmonitor': 19}

In [15]:
for year, task in missions:
    with open(str(Path(voc[year]['txt_paths'][task]).absolute()), encoding='utf-8') as f:
        image_ids = f.read().strip().split()
        
        
    for id_ in tqdm(image_ids, desc=f'{task}{year}'):
        src_img_path = Path(voc[year]['image_paths'][task]) / (id_ + '.jpg')

        shutil.copy(str(src_img_path.absolute()), voc[year]['dest_path_img'][task])
        
        src_path_xml = Path(voc[year]['annotations'][task]) / (id_ + '.xml')

        with open( str(src_path_xml.absolute()), encoding='utf-8') as f:
            root = ET.parse(f).getroot()
            size = root.find('size')
            width = int(size.find('width').text)
            height = int(size.find('height').text)
            
            objects = root.findall('object')
            s = ''
            for object_ in objects:
                name = object_.find('name').text
                class_index = classes_new[name]
                
                bndbox = object_.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)

                cx = (xmin + xmax) / 2 / width
                cy = (ymin + ymax) / 2 / height
                w = (xmax - xmin) / width
                h = (ymax - ymin) / height
                
                cx = round(cx, 6)
                cy = round(cy, 6)
                w = round(w, 6)
                h = round(h, 6)
                
                s += f"{class_index} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n"
        
        dst_path_txt = Path(voc[year]['dest_path_txt'][task]) / (id_ + '.txt')
        with open(str(dst_path_txt.absolute()), 'w', encoding='utf-8') as f:
            f.write(s)


test2007: 100%|███████████████████████████| 4952/4952 [00:01<00:00, 4032.29it/s]
train2007: 100%|██████████████████████████| 2501/2501 [00:00<00:00, 3929.29it/s]
val2007: 100%|████████████████████████████| 2510/2510 [00:00<00:00, 3934.26it/s]
train2012: 100%|██████████████████████████| 5717/5717 [00:03<00:00, 1560.52it/s]
val2012: 100%|████████████████████████████| 5823/5823 [00:03<00:00, 1885.92it/s]
