In [1]:
import os
import glob
import matplotlib.pyplot as plt
import json
from PIL import Image
import cv2
import shutil as sh
from tqdm import tqdm as tqdm
from xml.etree import ElementTree

In [15]:
class PascalConverter:
    
    def __init__(self, synth_dataset_dir, output_pascal_dir = '/'):
        self.synth_dataset_dir = synth_dataset_dir
        self.annotations = glob.glob(synth_dataset_dir+'/*.txt')
        self.images = glob.glob(synth_dataset_dir+'/*webp')
        self.output_pascal_dir = output_pascal_dir
        self.pascal_annotation_dir = os.path.join(self.output_pascal_dir, \
                                                  "Annotations")
        self.pascal_images_dir = os.path.join(self.output_pascal_dir, \
                                             "JPEGImages")
        
    def _indent(self, elem, level=0):
        i = "\n" + level*"  "
        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = i + "  "
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
            for elem in elem:
                self._indent(elem, level+1)
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        else:
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = i 
     
    @staticmethod
    def _get_info_from_txt(xml_name):    
        image_name = xml_name.split('.')[0]+'.webp'
        filename = os.path.basename(image_name)
        img = cv2.imread(image_name)
        answer = {'filename': filename, 'width': img.shape[1],\
                  'height': img.shape[0]}
        return answer

    @staticmethod
    def _get_info_from_block(anno_block): 
        x = []
        y = []
        value = ''
        for i in anno_block:
            value += i[-1]
            coord = [int(j) for j in i[:-2].split(' ')]
            for x_coord in (coord[0], coord[2], coord[4], coord[6]):
                x.append(x_coord)
            for y_coord in (coord[1], coord[3], coord[5], coord[7]):
                y.append(y_coord)

        x_min = min(x)
        y_min = min(y)

        x_max = max(x)
        y_max = max(y)

        answer = {'x_min':x_min, 'y_min':y_min, 'x_max':x_max, 'y_max':y_max, 'value':value}
        return answer
    
    def _convert_annotations(self, synth_anno, save_dir_name):    
        # extract base info 
        text_file = open(synth_anno, "r", encoding = "utf-8" )
        lines = text_file.readlines()

        part = []
        anno = []

        for i in lines:
            if i != '\n':        
                part+=[i[:-1]]
            else:
                anno+=[part]
                part = []

        # main setup
        image_info = self._get_info_from_txt(synth_anno)
        root = ElementTree.Element('annotation')

        # main subelement
        folder = ElementTree.SubElement(root, 'folder')
        folder.text = ' '
        filename = ElementTree.SubElement(root,'filename')
        filename.text = image_info['filename']
        source = ElementTree.SubElement(root, 'source')
        source.text = ' '
        size = ElementTree.SubElement(root, 'size')
        size.text = ''
        segmented = ElementTree.SubElement(root, 'segmented')
        segmented.text = '0'

        # source subelement

        database = ElementTree.SubElement(source, 'database')
        database.text = 'Unknown'

        annotation = ElementTree.SubElement(source, 'annotation')
        annotation.text = 'Unknown'

        image = ElementTree.SubElement(source, 'image')
        image.text = 'Unknown'

        # size subelement
        width = ElementTree.SubElement(size, 'width')
        width.text = str(image_info['width'])
        height = ElementTree.SubElement(size, 'height')
        height.text = str(image_info['height'])
        depth = ElementTree.SubElement(size, 'depth')
        depth.text = ' '

        for i in anno:
            box_info = self._get_info_from_block(i)

            # add object in for cicle as root element
            obj = ElementTree.SubElement(root, 'object')
            # add object subelements in for
            name = ElementTree.SubElement(obj, 'name')
            name.text = 'data'
            occluded = ElementTree.SubElement(obj, 'occluded')
            occluded.text = '0'
            bndbox = ElementTree.SubElement(obj, 'bndbox')
            attributes = ElementTree.SubElement(obj, 'attributes')

            # add bndbox subelements in for
            xmin = ElementTree.SubElement(bndbox, 'xmin')
            xmin.text = str(box_info['x_min'])
            ymin = ElementTree.SubElement(bndbox, 'ymin')
            ymin.text = str(box_info['y_min'])
            xmax = ElementTree.SubElement(bndbox, 'xmax')
            xmax.text = str(box_info['x_max'])
            ymax = ElementTree.SubElement(bndbox, 'ymax')
            ymax.text = str(box_info['y_max'])

            # add attributes sub and sub-sub elements
            sub_attributes = ElementTree.SubElement(attributes, 'attributes')
            name = ElementTree.SubElement(sub_attributes, 'name')
            name.text = 'value'
            value = ElementTree.SubElement(sub_attributes, 'value')
            value.text = box_info['value']


        #  formatting and write to file 
        tree = ElementTree.ElementTree(root)
        self._indent(root) # formating and tabulation

        txt_name = os.path.basename(synth_anno).split('.')[0]
        xml_name = txt_name +'.xml'

        save_name = os.path.join(save_dir_name, xml_name)
        tree.write(save_name,  encoding='utf-8', method ='xml')
        
    def _main_dir_create(self):
        if not os.path.exists(self.pascal_annotation_dir):
            os.mkdir(self.pascal_annotation_dir)                
        if not os.path.exists(self.pascal_images_dir):
            os.mkdir(self.pascal_images_dir)
                
    def _copy_images(self):
        print ('copying images...')
        for i in tqdm(self.images):
            sh.copy(i , self.pascal_images_dir)
                
    def start_convertion(self):
        self._main_dir_create()
        self._copy_images()
        print ('converting annotations...')
        for i in tqdm(self.annotations):
            self._convert_annotations(i, self.pascal_annotation_dir)                

In [13]:
# usage
converter = PascalConverter(synth_dataset_dir = '/home/alex/PycharmProjects/Playground/save_path',\
                           output_pascal_dir='/home/alex/poker/annotation_to_pascal/test_pascal')

In [14]:
converter.start_convertion()

100%|██████████| 10/10 [00:00<00:00, 1560.26it/s]
100%|██████████| 10/10 [00:00<00:00, 83.29it/s]

copying images...
converting annotations...



