In [3]:
import os
import glob
import numpy as np
from PIL import Image

In [1]:
def DataIterator(test_path, gt_path, test_prefix='', gt_prefix='',
              test_format='png', gt_format='png', start = 0, end = -1, 
         process_gt = lambda img: img, id_format = lambda form: form, im_proc = lambda proc : proc):
    """
    Iterator over the desired slice of the data
    :param test_path: (str) relative or absolute path to the test results images
    :param gt_path: (str) relative or absolute path to the ground truth images
    :param test_prefix: (str) prefix of the test files before their ID (e.g.
    test_A_001235.png has test_A_ as prefix)
    :param gt_prefix: (str) prefix of the ground truth files before their ID
    (e.g. gt001235.png has gt as prefix)
    :param test_format: (str) format of the test images
    :param gt_format: (str) format of the ground truth images
    :param start: (int) Id of the first element of the sequence
    :param start: (int) Id of the last element of the sequence
    :param process_gt: function to be applied to the grountruth for obtaining a specific format
    :param id_format: function to be applied to the start and end in order to find them on the dataset
    :yields: (tuple) Pair of Image - GrounTruth
        -Image
        -Ground Truth
    """    
    #We are suposing that the images and the gt's have a sortable common part in their names
    
    #Get all files in each directory
    test_files = glob.glob(os.path.join(test_path, test_prefix + '*.' + test_format))
    gt_files = glob.glob(os.path.join(gt_path, gt_prefix + '*.' + gt_format))

    if len(test_files) == 0:
        print ("No images found!")
        return
    
    if len(gt_files) == 0:
        print ("No GT found!")
        return    

    #We can avoid the suposition that images and gt are ordered or that all files can be found
    #in each folder by taking the intersection of their ids
    pre_id_imgs = os.path.join(test_path, test_prefix)
    pre_id_gt = os.path.join(gt_path, gt_prefix)

    indices_im = np.array([filename.replace(pre_id_imgs, '').replace('.' + test_format,'') for filename in test_files])
    indices_gt = np.array([filename.replace(pre_id_gt, '').replace('.' + gt_format,'') for filename in gt_files])

    common_id = np.in1d(indices_im , indices_gt)

    #Get common indices in image folder and GT folder
    indices_im = indices_im[common_id]

    #filter indices between init and end 
    ini = np.where(indices_im == id_format(start))
    end = np.where(indices_im == id_format(end))

    if(len(ini[0])):
        ini = ini[0][0]
    else:
        print ("Couldn't find first element in the dataset. Starting from the beggining")
        ini = 0

    if(len(end[0])):
        end = end[0][0]
    else:
        print ("Couldn't find last element of the sequence in the dataset. Ending at the last element")
        end = -1
    
    if end < ini:
        print("last element comes before than the first. Inverting them. Possible corruption?")
        ini, end = end, ini
    
    #Get target elements               
    indices_im = indices_im[ini:end]

    #Reform path's with common indices
    for Im_p, gt_p in [(pre_id_imgs + ind + '.' + test_format, pre_id_gt + ind + '.' + gt_format) for ind in indices_im]:
            
            #These prints are only in the notebook!
            print (Im_p)
            print(gt_p)
            
            pil_img_test = Image.open(Im_p)
            img_test = proc(np.array(pil_img_test))
            
            pil_img_gt = Image.open(gt_p)
            real_img_gt = process_gt(np.array(pil_img_gt))
            
            yield(img_test, real_img_gt)



In [7]:
def id_format(x):
    return str(int(x)).zfill(6)

dataset = 'fall'

gt_folder = os.path.join('datasets', dataset,'groundtruth')

tests_folder = os.path.join('datasets', dataset,
                                'input')  


x = DataIterator(tests_folder, gt_folder, test_prefix='in',
                               gt_prefix='gt', test_format='jpg',
                               gt_format='png',start = 1, end = 3, id_format = id_format)

In [8]:
next(x)

datasets\fall\input\in000001.jpg
datasets\fall\groundtruth\gt000001.png


(array([[[180, 191, 209],
         [180, 191, 209],
         [180, 191, 209],
         ..., 
         [247, 255, 255],
         [245, 254, 253],
         [245, 254, 253]],
 
        [[181, 192, 210],
         [181, 192, 210],
         [180, 191, 209],
         ..., 
         [246, 255, 254],
         [246, 255, 254],
         [248, 255, 255]],
 
        [[181, 192, 210],
         [181, 192, 210],
         [181, 192, 210],
         ..., 
         [246, 255, 254],
         [248, 255, 255],
         [249, 255, 255]],
 
        ..., 
        [[222, 213, 184],
         [223, 214, 185],
         [224, 215, 186],
         ..., 
         [186, 190, 189],
         [185, 189, 188],
         [185, 189, 188]],
 
        [[207, 198, 169],
         [214, 205, 176],
         [216, 207, 178],
         ..., 
         [189, 193, 192],
         [188, 192, 191],
         [188, 192, 191]],
 
        [[204, 195, 166],
         [217, 208, 179],
         [224, 215, 186],
         ..., 
         [189, 193, 192

In [9]:
next(x)

datasets\fall\input\in000002.jpg
datasets\fall\groundtruth\gt000002.png


(array([[[181, 192, 210],
         [181, 192, 210],
         [180, 191, 209],
         ..., 
         [246, 254, 255],
         [246, 254, 255],
         [248, 255, 255]],
 
        [[181, 192, 210],
         [181, 192, 210],
         [180, 191, 209],
         ..., 
         [248, 255, 255],
         [247, 255, 255],
         [247, 255, 255]],
 
        [[182, 193, 211],
         [181, 192, 210],
         [180, 191, 209],
         ..., 
         [249, 255, 255],
         [248, 255, 255],
         [246, 254, 255]],
 
        ..., 
        [[231, 217, 191],
         [233, 219, 193],
         [233, 219, 193],
         ..., 
         [185, 189, 188],
         [185, 189, 188],
         [185, 189, 188]],
 
        [[219, 205, 179],
         [224, 210, 184],
         [224, 210, 184],
         ..., 
         [190, 194, 193],
         [189, 193, 192],
         [189, 193, 192]],
 
        [[220, 206, 180],
         [228, 214, 188],
         [229, 215, 189],
         ..., 
         [188, 192, 191

In [10]:
next(x)

StopIteration: 