In [1]:
import os
import multiprocessing
from functools import partial
import numpy as np

from keras_video_datagen import ImageDataGenerator

In [2]:
datagen = ImageDataGenerator()

train_data = datagen.flow_from_directory('data/train',
                                         target_size=(70, 140),
                                         batch_size=2,
                                         frames_per_step=12,
                                         shuffle=False,
                                         color_mode='rgb')

val_data = datagen.flow_from_directory('data/validation',
                                       target_size=(1, 140),
                                       batch_size=2,
                                       frames_per_step=12,
                                       shuffle=False,
                                       color_mode='rgb')

Found 480 images belonging to 2 classes.
Found 480 images belonging to 2 classes.


In [3]:
x, y, kek = train_data.next()

# print(x.shape)
# print(y.shape)
#
# print(y, end='\n\n')

# x, y, kek = val_data.next()
#
# print(x.shape)
# print(y.shape)
#
# print(y)

n: 480
self.batch_index: 0
index_array: [0, 1, 2, 3 ... 476, 477, 478, 479]
++current_index = (0 * 2 * 12) % 480 = 0
current_index + batch_size * frames_per_step = 0 + 2 * 12 = 24
n > 24
++current_batch_size =  2
++index_array[0:0+12*2]: [0, 1, 2, 3 ... 20, 21, 22, 23]


index_array: [0, 1, 2, 3 ... 20, 21, 22, 23]
current_index: 0
current_batch_size: 2
batch_x.shape:  (2, 12, 70, 140, 3)


In [4]:
directory = 'data/train/'

classes = []

for subdir in sorted(os.listdir(directory)):
    if os.path.isdir(os.path.join(directory, subdir)):
        classes.append(subdir)
print(classes)

class_indices = dict(zip(classes, range(len(classes))))
print(class_indices)


['Begin', 'Web']
{'Begin': 0, 'Web': 1}


In [5]:
def _count_valid_files_in_directory(directory, white_list_formats, follow_links):
    def _recursive_list(subpath):
        return sorted(os.walk(subpath, followlinks=follow_links), key=lambda tpl: tpl[0])

    samples = 0
    for _, _, files in _recursive_list(directory):
        for fname in sorted(files):
            is_valid = False
            for extension in white_list_formats:
                if fname.lower().endswith('.' + extension):
                    is_valid = True
                    break
            if is_valid:
                samples += 1
    return samples

white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'ppm'}
follow_links=False

In [6]:
def _list_valid_filenames_in_directory(directory, white_list_formats,
                                       class_indices, follow_links):

    def _recursive_list(subpath):
        return sorted(os.walk(subpath, followlinks=follow_links), key=lambda tpl: tpl[0])

    classes = []
    filenames = []
    subdir = os.path.basename(directory)
    basedir = os.path.dirname(directory)

    for root, _, files in _recursive_list(directory):
        for fname in sorted(files):
            is_valid = False
            for extension in white_list_formats:
                if fname.lower().endswith('.' + extension):
                    is_valid = True
                    break
            if is_valid:
                classes.append(class_indices[subdir])
                # add filename relative to directory
                absolute_path = os.path.join(root, fname)
                filenames.append(os.path.relpath(absolute_path, basedir))

    return classes, filenames

In [7]:
pool = multiprocessing.pool.ThreadPool()
function_partial = partial(_count_valid_files_in_directory,
                           white_list_formats=white_list_formats,
                           follow_links=follow_links)
samples = sum(pool.map(function_partial,
                       (os.path.join(directory, subdir)
                        for subdir in classes)))

print(int(samples))

480


In [8]:
# second, build an index of the images in the different class subfolders
results = []

filenames = []
self_classes = np.zeros((samples,), dtype='int32')
i = 0
k = 0
for dirpath in (os.path.join(directory, subdir) for subdir in classes):
    # print(dirpath)
    results.append(pool.apply_async(_list_valid_filenames_in_directory,
                                    (dirpath, white_list_formats,
                                     class_indices, follow_links)))

for res in results:
    classes, filenames = res.get()
    self_classes[i:i + len(classes)] = classes
    filenames += filenames
    print('self_classes[%d:%d + %d]: %s' % (i, i, len(classes), self_classes[i:i + len(classes)]))
    i += len(classes)
pool.close()
pool.join()

print('\n\nlen(results)', len(results))
print('len(results[0].get())):', len(results[0].get()))
print('results[0].get():', results[0].get())
print('len(results[0].get())):', len(results[0].get()))
print('results[1].get():', results[1].get(), end='\n\n')

# print('len classes:', len(classes))
# print('classes:', classes, end='\n\n')
# print('len(filenames)', len(filenames))
# print('filenames:', filenames, end='\n\n')

print('len(self_classes):', len(self_classes))
print('self_classes:', self_classes, end='\n\n')


self_classes[0:0 + 240]: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
self_classes[240:240 + 240]: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1