In [1]:
import json
import tensorflow as tf
import numpy as np

In [2]:
import PIL
from PIL import Image

In [3]:
import random

In [4]:
import os
os.chdir("../")

In [5]:
class_map = {"Dog": 0}

In [10]:
def read_label_json(filepath):
    """
    Will contain
    """
    with open(filepath, "r") as filereader:
        labelled_data = json.load(filereader)
    return labelled_data


def annotation_generator(annotations, image_width, image_height, batch_size):
    random.shuffle(annotations)
    annotation_length = len(annotations)
    for annotation_ndx in range(0, annotation_length, batch_size):
        batched_images_as_list = []
        batched_labels_as_list = []
        for batch_index, batch_num in enumerate(range(annotation_ndx, 
                                                      min(annotation_ndx + batch_size, annotation_length))):
            image = Image.open(annotations[batch_num]["image_path"])
            image = image.resize(size=(image_width, image_height), resample=PIL.Image.LANCZOS)
            bboxes = np.array(
                [
                    [
                        bbox[0] / image_width,
                        bbox[1] / image_height,
                        bbox[2] / image_width,
                        bbox[3] / image_height,
                    ] 
                    for bbox in annotations[batch_num]["bboxes"]
                ]
            )
            classes = np.array(
                [[class_map[class_label]] for class_label in annotations[batch_num]["classes"]]
            )
            label = np.concatenate([bboxes, classes], axis=-1)
            batched_labels_as_list.append(
                np.concatenate(
                    [
                        np.expand_dims(np.ones(label.shape[0]) * batch_index, axis=-1),
                        label
                    ],
                    axis=-1
                )
            )
            batched_images_as_list.append(image)
        
        # combine them
        batch_images = np.stack(batched_images_as_list, axis=0)
        batch_labels = np.concatenate(batched_labels_as_list, axis=0)
        yield batch_images, batch_labels


#     images, labels = [], []
#     for annotation in annotations:
#         image = Image.open(annotation["image_path"])  # read image
#         image = image.resize(
#             size=(image_width, image_height), resample=PIL.Image.LANCZOS
#         )
#         bboxes = np.array(
#             [
#                 [
#                     bbox[0] / image_width,
#                     bbox[1] / image_height,
#                     bbox[2] / image_width,
#                     bbox[3] / image_height,
#                 ]
#                 # [bbox[0]/640, bbox[1]/640, bbox[2]/640, bbox[3]/640]
#                 for bbox in annotation["bboxes"]
#             ]
#         )
#         classes = np.array(
#             [[class_map[class_label]] for class_label in annotation["classes"]]
#         )
#         label = np.concatenate([bboxes, classes], axis=-1)
#         images.append(image)
#         labels.append(label)
#     images = images * 10
#     labels = labels * 10
    
#     # batch generator
#     l = len(images)
#     for ndx in range(0, l, batch_size):
#         batch_images = np.stack(images[ndx:min(ndx + batch_size, l)], axis=0)
#         labels_stacked = []
#         for batch_index, label in enumerate(labels[ndx:min(ndx + batch_size, l)]):
#             labels_stacked.append(
#                 np.concatenate(
#                     [
#                         np.expand_dims(np.ones(label.shape[0]) * batch_index, axis=-1),
#                         label
#                     ],
#                     axis=-1
#                 )
#             )
#         batch_labels = np.concatenate(labels_stacked, axis=0)
#         yield np.cast[np.float32](batch_images), batch_labels


        
def combine(images, labels):
    images = tf.stack(images, axis=0) / 255.0
    labels_list_with_batch = []
    for batch_index, label in enumerate(labels):
        labels_list_with_batch.append(
            tf.concat(
                [tf.expand_dims(tf.ones(label.shape[0]) * batch_index, axis=-1), label],
                axis=-1,
            )   
        )
    return images, tf.concat(labels_list_with_batch, axis=0)


def batch(dataset, batch_size):
    batch_size = batch_size
    batch = []
    for num, (i, l) in enumerate(dataset):
        batch.append(num)
        if len(batch) == batch_size:
            yield batch
            batch = []
    if batch:
        yield batch        


def tf_dataloader(config_filepath):
    # Get Values from the config
    input_height = config_filepath["INPUT_H"]
    input_width = config_filepath["INPUT_W"]
    annotation_filename = config_filepath["annotation_path"]
    annotations = read_label_json(annotation_filename)
    batch_size = 2
    train_dataset = tf.data.Dataset.from_generator(
        lambda: annotation_generator(
            annotations["annotations"]["train"] * 100, input_width, input_height, batch_size
        ),  
        (tf.float32, tf.float32),
    )   
#     train_dataset = train_dataset.repeat(10)
    validation_dataset = tf.data.Dataset.from_generator(
        lambda: annotation_generator(
            annotations["annotations"]["validation"]*10, input_width, input_height, batch_size
        ),  
        (tf.float32, tf.float32),
    )   
#     validation_dataset = validation_dataset.repeat(10)

    return train_dataset, validation_dataset


def tf_dataloader_v2(label_file_path, image_height, image_width):
    if label_file_path:
        annotations = read_label_json(label_file_path)
        train_dataset = tf.data.Dataset.from_generator(
            lambda: annotation_generator(annotations, input_width, input_height),
            (tf.float32, tf.float32),
        )   
        return train_dataset
    else:
        return None


In [11]:
with open('cfg/config.json', 'r') as f:
    cfg = json.load(f)

In [12]:
t, v = tf_dataloader(cfg)

In [17]:
t.__len__

<bound method DatasetV2.__len__ of <FlatMapDataset shapes: (<unknown>, <unknown>), types: (tf.float32, tf.float32)>>

In [206]:
for i, l in t:
    print(i.shape)
    print(l)

(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]], shape=(2, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]], shape=(2, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [0.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(4, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(3, 6), dtype=float32)
(2, 4

(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(3, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(3, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(3, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [0.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]
 [1.         0.50961536 0.5168269  0

(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(3, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [0.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]
 [1.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]], shape=(3, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]], shape=(2, 6), dtype=float32)
(2, 416, 416, 3)
tf.Tensor(
[[0.         0.50961536 0.5168269  0.46875    0.5769231  0.        ]
 [1.         0.37740386 0.5168269  0.3966346  0.5769231  0.        ]
 [1.         0.7139423  0.53365386 0.3846154  0.59134614 0.        ]], shape=(3, 6), dtype=float32)
(2, 4