## Extract patch images from patch annotations
Read XML label file and extract patch images from original image

In [7]:
import cv2
import os
import json
import xml.etree.ElementTree as ET

# home = '/media/data1/Ace/11000_Marine_objects/11200_Dataset/11220_Images/11221_KOMPSAT-5'
home = '/media/data1/Ace/11000_Marine_objects/11200_Dataset/11220_Images/11222_Sentinel-1'
img_home = os.path.join(home, 'PNG')
# ann_home = os.path.join(home, 'Annotations')
ann_home = '/home/sjhong/work/script/work/Annotations'
# save_home = os.path.join(home, 'PNG', 'patch')
save_home = os.path.join('/home/sjhong/work/script/work/patch')
os.makedirs(save_home, exist_ok=True)

size = 1024  # Size of bounding box
empty_img = []
num_patch = 0

def check_box(box, size=1024):
    """Check box size
    
    Check if box size is the same as "size"
    Args:
        box(list[int,]): bounding box. [xmin, ymin, xmax, ymax]
        size(int): size of bouding box. default: 1024
        
    Returns:
        (bool): True if box size is the same as "size"
    """
    width = box[2] - box[0]
    height = box[3] - box[1]
    
    if width == size and height == size:
        return True
    
    return False

def correct_box(box, size=1024):
    """Correct box coordinates
    
    In labelImg, if xmin/ymin is 0, it record 0 as 1.
    
    For example, if the size of a box is 1024,
    [0, 0, 1024, 1024] -> [1, 1, 1024, 1024]
    [1, 1, 1025, 1025] -> [1, 1, 1025, 1025]
    [2, 2, 1026, 1026] -> [2, 2, 1026, 1026]
    
    So, you have to change the coordinate of the box of a size "1023".
    For example,
    [1, 1, 1024, 1024] -> [0, 0, 1024, 1024] (Change This)
    [1, 1, 1025, 1025] -> [1, 1, 1025, 1025]
    [2, 2, 1026, 1026] -> [2, 2, 1026, 1026]
    
    Sometimes, for some reason, when the box is at the corner, its size become (size-1).
    You have to check this case and correct it.
    For example,
    [1, 1, 1023, 1023] -> [1, 1, 1024, 1024]
    
    Args:
        box(list[int,]): bounding box. [xmin, ymin, xmax, ymax]
        size(int): size of bouding box. default: 1024
    Returns:
        box(list[int,]): bounding box. [xmin, ymin, xmax, ymax]
    """
    width = box[2] - box[0]
    height = box[3] - box[1]
    
    if width != size:
        if box[0] > 1:
            box[2] = box[0] + size
        else:
            box[0] = box[2] - size
        
    if height != size:
        if box[1] > 1:
            box[3] = box[1] + size
        else:
            box[1] = box[3] - size
        
    return box
    
def correct_box2(box, size=1024):
    if box[0] < 0:
        box[0] = 0
    if box[1] < 0:
        box[1] = 0
        
    box[2] = box[0] + size
    box[3] = box[1] + size
        
    return box

for img_name in os.listdir(img_home):
    print('Processing {}'.format(img_name))
    img_path = os.path.join(img_home, img_name)
    ann_path = os.path.join(ann_home, img_name[:-3] + 'xml')
    
    tree = ET.parse(ann_path)
    root = tree.getroot()
    objs = root.findall('object')
    if objs:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        for obj in objs:
            # Coord of labelImg starts from 1
            bndbox = [int(coord.text) for coord in obj.find('bndbox').getchildren()]  # bndbox = [xmin, ymin, xmax, ymax]
#             bndbox = correct_box(bndbox)
            bndbox = correct_box2(bndbox)
            
            if check_box(bndbox):
                patch_img = img[bndbox[1]:bndbox[1]+size, bndbox[0]:bndbox[0]+size]
                patch_name = '{}_{}_{}_{}_{}.png'.format(img_name[:-4], bndbox[0], bndbox[0]+size, bndbox[2]-1, bndbox[2]+size)
                cv2.imwrite(os.path.join(save_home, patch_name), patch_img)
                num_patch += 1
            else:
                print('Wrong boxes: ', bndbox)
    else:
        empty_img.append(img_name)
        
if empty_img:
    print('Annotations not eixst: ', empty_img)
print('Total number of patches: ', num_patch)
print('Done')

Processing s1a-iw-grd-vv-20230510t215257-20230510t215322-048477-05d4d9-001.png




Processing s1a-iw-grd-vv-20230705t204355-20230705t204420-049293-05ed64-001.png
Processing s1a-iw-grd-vv-20230520t100540-20230520t100605-048616-05d8e5-001.png
Processing s1a-iw-grd-vv-20230213t015011-20230213t015040-047211-05aa2e-001.png
Processing s1a-iw-grd-vv-20230410t103340-20230410t103409-048033-05c60d-001.png
Processing s1a-iw-grd-vv-20220528t205209-20220528t205234-043416-052f37-001.png
Processing s1a-iw-grd-vv-20230531t174140-20230531t174205-048781-05ddd1-001.png
Processing s1a-iw-grd-vv-20230605t174943-20230605t175008-048854-05e008-001.png
Processing s1a-iw-grd-vv-20230709t180500-20230709t180525-049350-05ef34-001.png
Processing s1a-iw-grd-vv-20230404t180455-20230404t180520-047950-05c33c-001.png
Processing s1a-iw-grd-vv-20230615t111535-20230615t111600-048995-05e45e-001.png
Processing s1a-iw-grd-vv-20230527t095533-20230527t095558-048718-05dbf4-001.png
Processing s1a-iw-grd-vv-20230412t204351-20230412t204416-048068-05c738-001.png
Processing s1a-iw-grd-vv-20230215t215255-20230215t21