In [30]:
import os
import glob
import json
import shutil as sh
from tqdm import tqdm as tqdm
from xml.etree import ElementTree

In [22]:
class FacebookConverter:
    
    def __init__(self, facebook_anno_path, facebook_img_path,\
                 output_pascal_dir):
        
        self.anno_path = facebook_anno_path
        self.imgs_path = facebook_img_path 
        
        self.images = glob.glob(self.imgs_path+'/*.jpg')
        
        self.output_pascal_dir = output_pascal_dir        
        self.pascal_annotation_dir = os.path.join(self.output_pascal_dir, \
                                                  "Annotations")
        
        self.pascal_images_dir = os.path.join(self.output_pascal_dir, \
                                             "JPEGImages")
        

    def _indent(self, elem, level=0):
        i = "\n" + level*"  "
        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = i + "  "
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
            for elem in elem:
                self._indent(elem, level+1)
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        else:
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = i    
                
                
    def _main_dir_create(self):
        if not os.path.exists(self.pascal_annotation_dir):
            os.mkdir(self.pascal_annotation_dir)                
        if not os.path.exists(self.pascal_images_dir):
            os.mkdir(self.pascal_images_dir)                
                
    def _copy_images(self):
        print ('copying images...')
        for i in tqdm(self.images):
            sh.copy(i , self.pascal_images_dir)                
                
                
                
    
    def _annotate_convertion(self):
        
        with open(self.anno_path) as f:   
            d = json.load(f)  
       
        print ('converting annotations...')
        
        for i,j in tqdm(d['imgToAnns'].items()):

            image_a = d['imgs'][i]
            file_name_a = image_a['file_name'].split('/')[-1]
         
            root = ElementTree.Element('annotation')

            # main subelement
            folder = ElementTree.SubElement(root, 'folder')
            folder.text = ' '
            filename = ElementTree.SubElement(root,'filename')
            filename.text = str(file_name_a) # filename
            source = ElementTree.SubElement(root, 'source')
            source.text = ' '
            size = ElementTree.SubElement(root, 'size')
            size.text = ''
            segmented = ElementTree.SubElement(root, 'segmented')
            segmented.text = '0'

            # source subelement

            database = ElementTree.SubElement(source, 'database')
            database.text = 'Unknown'

            annotation = ElementTree.SubElement(source, 'annotation')
            annotation.text = 'Unknown'

            image = ElementTree.SubElement(source, 'image')
            image.text = 'Unknown'

            # size subelement
            width = ElementTree.SubElement(size, 'width')
            width.text = str(image_a['width'])  # width
            height = ElementTree.SubElement(size, 'height')
            height.text = str(image_a['height']) # height
            depth = ElementTree.SubElement(size, 'depth')
            depth.text = ' '

            bbox = d['anns'] 
            
            for val in j:
                anno = d['anns'][val]
                bbox =  anno['bbox']

                x_min = bbox[2] # 4.1
                y_min = bbox[3] # 4.2
                x_max = bbox[0] # 4.3
                y_max = bbox[1] # 4.4

                value_a = anno['utf8_string'] # 4.5

                # add object in for cicle as root element
                obj = ElementTree.SubElement(root, 'object')
                # add object subelements in for
                name = ElementTree.SubElement(obj, 'name')
                name.text = 'data'
                occluded = ElementTree.SubElement(obj, 'occluded')
                occluded.text = '0'
                bndbox = ElementTree.SubElement(obj, 'bndbox')
                attributes = ElementTree.SubElement(obj, 'attributes')

                # add bndbox subelements in for
                xmin = ElementTree.SubElement(bndbox, 'xmin')
                xmin.text = str(x_min)
                ymin = ElementTree.SubElement(bndbox, 'ymin')
                ymin.text = str(y_min)
                xmax = ElementTree.SubElement(bndbox, 'xmax')
                xmax.text = str(x_max)
                ymax = ElementTree.SubElement(bndbox, 'ymax')
                ymax.text = str(y_max)

                # add attributes sub and sub-sub elements
                sub_attributes = ElementTree.SubElement(attributes, 'attributes')
                name = ElementTree.SubElement(sub_attributes, 'name')
                name.text = 'value'
                value = ElementTree.SubElement(sub_attributes, 'value')
                value.text = str(value_a)

            tree =ElementTree.ElementTree(root)
            self._indent(root)
            name = file_name_a.split('.')[0]+'.xml'
            save_name = os.path.join(self.pascal_annotation_dir, name)
            tree.write(save_name,  encoding='utf-8', method ='xml')
            
    def start_convertion(self):
        self._main_dir_create()
        self._copy_images()
        self._annotate_convertion()        

In [24]:
save_dir = '/home/alex/poker/facebook'
facebook_anno = '/home/alex/TextOCR_0.1_val.json'
img_path = '/home/alex/poker/background_extend'

In [28]:
facebook = FacebookConverter(facebook_anno_path=facebook_anno, \
                            facebook_img_path=img_path, output_pascal_dir=save_dir)

In [31]:
facebook.start_convertion()

100%|██████████| 37/37 [00:00<00:00, 902.67it/s]

copying images...



  1%|          | 34/3124 [00:00<00:09, 339.62it/s]

converting annotations...


100%|██████████| 3124/3124 [00:11<00:00, 276.98it/s]
