In [6]:
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET 
import numpy as np
import os
import random

In [7]:
# list of paths, where to take source photos
dataset_paths      = []
# dataset_paths.append(Path('LADD','LADD_V4_spring', 'LizaAlertDroneDatasetV4_Spring'))
dataset_paths.append(Path('LADD','LADD_V4_summer', 'LizaAlertDroneDatasetV4_Summer'))
dataset_paths.append(Path('LADD','LADD_V4_winter', 'LizaAlertDroneDatasetV4_Winter'))

# names of source folders in datasets
images_folder      =         'JPEGImages'
annotations_folder =         'Annotations'

# target folder - keeps resized X and y images
crops_folder       =         'dataset'
    
# common crops details
initial_crop_size  =         512
crop_size          =         512
output_size        =         512

# random cropping details
r_files_per_dataset=         10000
crops_per_image    =         1

# bbox cropping details
b_files_per_dataset=         10000

# rate of quality decrease for X images
compress_ratios    =         [2,4,6] 

if crops_folder not in os.listdir():
    os.mkdir(Path(crops_folder))
    os.mkdir(Path(crops_folder, 'X'))
    os.mkdir(Path(crops_folder, 'y'))

In [10]:
def get_random_crops():
    
    total_files_1 = 0
    for path in dataset_paths:
        total_files_1+=len(os.listdir(Path(path, images_folder)))
    total_files_2=r_files_per_dataset*len(dataset_paths)
        
    total_files = min(total_files_1, total_files_2)
    
    print('Generate random crops from %s files...' % total_files)
    
    n_crops = 0
    n_files = 0
    
    for dataset_path in dataset_paths:
        
        ds_files=0

        for filename in os.listdir(Path(dataset_path, images_folder)):

            img = cv2.imread(str(Path(dataset_path, images_folder, filename)))
            height, width, _ = img.shape 

            for crop in range(crops_per_image):

                h0 = int(random.random()*(height - initial_crop_size))
                w0 = int(random.random()*(width - initial_crop_size))
                
                y_init = img[h0:h0+initial_crop_size, w0:w0+initial_crop_size, :]
                y = cv2.resize(y_init, (crop_size, crop_size))
                
                for compress_ratio in compress_ratios:                    
                    
                    X = cv2.resize(y, (int(crop_size/compress_ratio),int(crop_size/compress_ratio)))
                    X = cv2.resize(X, (output_size, output_size)) #lo-res

                    cv2.imwrite(str(Path(crops_folder, 'X', '0'+format(n_crops,'05')+'c'+str(compress_ratio)+'i'+str(initial_crop_size)+'.jpg')), X)
                    cv2.imwrite(str(Path(crops_folder, 'y', '0'+format(n_crops,'05')+'c'+str(compress_ratio)+'i'+str(initial_crop_size)+'.jpg')), y)
                    
                    n_crops+=1
                
            n_files+=1
            ds_files+=1
            
            if n_files in np.floor(np.linspace(0, total_files, 10)):print(np.int(n_files/total_files*100),'% passed...')
            if ds_files>r_files_per_dataset: break
    print(n_crops, 'random crops created.')
    
get_random_crops()

Generate random crops from 268 files...
10 % passed...
22 % passed...
33 % passed...
44 % passed...
55 % passed...
66 % passed...
77 % passed...
88 % passed...
100 % passed...
2680 random crops created.


In [8]:
def get_target_crops():
    
    total_files_1 = 0
    for path in dataset_paths:
        total_files_1+=len(os.listdir(Path(path, images_folder)))
    total_files_2=b_files_per_dataset*len(dataset_paths)
        
    total_files = min(total_files_1, total_files_2)
    
    print('Generate target crops from %s files...' % total_files)
    
    n_crops = 0
    n_files = 0
    
    for dataset_path in dataset_paths:
        
        ds_files=0
        
        for filename in os.listdir(Path(dataset_path, annotations_folder)):
            
            if not filename.endswith('.xml'): continue
        
            fullname = Path(dataset_path, annotations_folder, filename)    
            tree = ET.parse(fullname)    
            root = tree.getroot()    
            bbox_num = 0
            img = cv2.imread(str(Path(dataset_path, images_folder, filename[:-3]+'jpg')))
            
            for rec in root:

                # get source image size
                if rec.tag == 'size': 
                    height = int(rec.findtext('height'))
                    width = int(rec.findtext('width'))

                # list all available bboxes        
                if rec.tag == 'object': 
                    for box in rec:
                        if box.tag=='bndbox':

                            # get initial bbox corners
                            ymin = int(box.findtext('ymin'))
                            ymax = int(box.findtext('ymax'))
                            xmin = int(box.findtext('xmin'))
                            xmax = int(box.findtext('xmax'))

                            # calculate necessary padding to get crop of crop_size
                            padding_w = int((initial_crop_size - (xmax - xmin))/2.)
                            padding_h = int((initial_crop_size - (ymax - ymin))/2.)
                            
                            # get random shift within 25% of crop_size from bbox center
                            random_dx = int((random.random()-.5)*.5*initial_crop_size)
                            random_dy = int((random.random()-.5)*.5*initial_crop_size)
                            
                            # calculate crop corners
                            new_xmin = xmin - padding_w + random_dx
                            new_xmax = xmax + padding_w + random_dx
                            new_ymin = ymin - padding_h + random_dy
                            new_ymax = ymax + padding_h + random_dy
                            
                            # do not proceed if crop is outside of image
                            if (new_xmin<1 or new_xmax>width-1 or new_ymin<1 or new_ymax>height-1):continue

                            dx = new_xmax - new_xmin
                            dy = new_ymax - new_ymin

                            # correct crop corners to get exact crop_size
                            if dx<crop_size:
                                if ((new_xmax+new_xmin)/2.)<(width/2.):
                                    new_xmax+=1
                                else:
                                    new_xmin-=1
                            if dy<crop_size:
                                if ((new_ymax+new_ymin)/2.)<(height/2.):
                                    new_ymax+=1
                                else:
                                    new_ymin-=1

                            # create crop                            
                            y_init = img[new_ymin:new_ymax, new_xmin:new_xmax]                            
                            y = cv2.resize(y_init, (crop_size, crop_size))                           

                            # generate X-y
                            for compress_ratio in compress_ratios:
 
                                X = cv2.resize(y, (int(crop_size/compress_ratio),int(crop_size/compress_ratio)))
                                X = cv2.resize(X, (output_size, output_size)) #lo-res

                                cv2.imwrite(str(Path(crops_folder, 'X', '1'+format(n_crops,'05')+'c'+str(compress_ratio)+'i'+str(initial_crop_size)+'.jpg')), X)
                                cv2.imwrite(str(Path(crops_folder, 'y', '1'+format(n_crops,'05')+'c'+str(compress_ratio)+'i'+str(initial_crop_size)+'.jpg')), y)
                                n_crops+=1
 
                            # goto next bbox in current file
                            bbox_num = bbox_num + 1  
        
            n_files+=1
            ds_files+=1
            
            if n_files in np.floor(np.linspace(0, total_files, 10)):print(np.int(n_files/total_files*100),'% passed...')
            if ds_files>b_files_per_dataset: break
                
    print(n_crops, 'crops created.')

get_target_crops()

Generate target crops from 769 files...
11 % passed...
22 % passed...
33 % passed...
44 % passed...
55 % passed...
66 % passed...
77 % passed...
88 % passed...
100 % passed...
5841 crops created.
