## Data Loader

In [4]:
import tensorflow as tf
import json
import os
import logging  # To log errors or missing files
from collections import namedtuple
import numpy

In [5]:
# configures basic logging to print informational messages
logging.basicConfig(level=logging.INFO)

In [6]:
annotation_file = "annotations.json"
image_folder = "coco-nyaptola/images"

# Convert to absolute paths for reliable file access
annotation_file = os.path.abspath(annotation_file)
image_folder = os.path.abspath(image_folder)


In [7]:
ImageData = namedtuple('ImageData',['image','target'])

In [8]:
img_path_str = ""

In [9]:
# Load annotations from a COCO JSON file
def load_annotations(annotation_file):
    with open(annotation_file, 'r') as f:
        annotations = json.load(f) #loaded annotations dictionary
    return annotations

In [10]:
def load_image_and_annotations(image_path, annotations):
    # Initialize an empty image path string
    image_path_str = ""

    try:
        # Ensure that the input is a TensorFlow tensor
        if isinstance(image_path, tf.Tensor):
            # Convert Tensor to Python string for file operations
            image_path_str = image_path.numpy().decode("utf-8")
        else:
            raise ValueError("Expected TensorFlow tensor for image path.")

        # Read the image from the file path
        image = tf.io.decode_png(tf.io.read_file(image_path_str), channels=3)

        # Get the image information from the annotations
        image_info = next(
            (img for img in annotations['images'] if os.path.basename(image_path_str) in img.values()),
            None
        )

        if not image_info:
            raise ValueError(f"Image '{image_path_str}' not found in annotations.")

        image_id = image_info['id']

        # Get bounding boxes and classes for the image
        bboxes = [
            ann['bbox'] for ann in annotations['annotations']
            if ann['image_id'] == image_id
        ]
        classes = [
            ann['category_id'] for ann in annotations['annotations']
            if ann['image_id'] == image_id
        ]

        return image, {'boxes': bboxes, 'classes': classes}

    except Exception as e:
        # Log the error and return default values
        logging.warning(f"Error loading image: {e}")
        return tf.zeros([1, 1, 3], dtype=tf.uint8), {'boxes': [], 'classes': []}

In [11]:
def data_loader(annotation_file, image_folder):

    # Create a tf.data.Dataset with images, bounding boxes, and classes from a COCO-format dataset.
    annotations = load_annotations(annotation_file)

    # Create a list of image file paths
    image_files = [
        tf.convert_to_tensor(os.path.join(image_folder, str(img['image_id'])), dtype=tf.string)
        for img in annotations
    ]

    # Convert image file paths to TensorFlow-compatible tensors
    image_files = [tf.convert_to_tensor(img, dtype=tf.string) for img in image_files]

    # Function to load image data along with annotations, with error handling
    def load_data(image_file):
        image, target = load_image_and_annotations(image_file, annotations)
        
        if image is None:  # If image loading failed
            return None, {'boxes': tf.zeros([0, 4], dtype=tf.float32), 'classes': tf.zeros([0], dtype=tf.int32)}

        # Convert bounding boxes and classes to TensorFlow-compatible formats
        bboxes = tf.convert_to_tensor(target['boxes'], dtype=tf.float32)
        classes = tf.convert_to_tensor(target['classes'], dtype=tf.int32)

        return image, {'boxes': bboxes, 'classes': classes}

    # Map function with error handling

    # def map_function(image_file):
    #     image, bboxes, classes = tf.py_function(
    #         load_data,
    #         [image_file],
    #         Tout=(tf.uint8, tf.float32, tf.int32),
    #     )

    #     return image, bboxes, classes

    dataset = tf.data.Dataset.from_tensor_slices(image_files).map(
        load_data)
        
    def filter_data(image, bounding_boxes, dataset):
      return image is not None

    dataset = dataset.filter(lambda image, bounding_boxes: filter_data(image, bounding_boxes, dataset))

    return dataset


In [12]:
train_dataset = data_loader(annotation_file, image_folder)

Cause: Unable to locate the source code of <function normcase at 0x000002786C04C680>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code


Cause: Unable to locate the source code of <function normcase at 0x000002786C04C680>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code


Cause: Unable to locate the source code of <function normcase at 0x000002786C04C680>. Note that functions defined in certain environments, like the interactive Python shell, do not expose their source code. If that is the case, you should define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.experimental.do_not_convert. Original error: could not get source code
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method


In [13]:
# prints the type of train_dataset. 
print(type(train_dataset))  # Should be <class 'tensorflow.python.data.ops.dataset_ops.DatasetV2'>
print(train_dataset.element_spec)  # Should reflect the expected structure of elements


<class 'tensorflow.python.data.ops.filter_op._FilterDataset'>
(TensorSpec(shape=(1, 1, 3), dtype=tf.uint8, name=None), {'boxes': TensorSpec(shape=(0,), dtype=tf.float32, name=None), 'classes': TensorSpec(shape=(0,), dtype=tf.int32, name=None)})


In [14]:
for data in train_dataset.take(5):  # Adjust the number of samples
    image, target = data
    print("Image shape:", image.shape)  # Check the image shape and other attributes
    print("target:", target)  # Check bounding boxes and classes


Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}


## Data Visualization

In [15]:
from keras_cv import visualization

In [22]:
type(train_dataset)

tensorflow.python.data.ops.filter_op._FilterDataset

In [16]:
def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )

In [21]:
def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    type(inputs)
    boxes = keras_cv.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": inputs["objects"]["label"],
        "boxes": boxes,
    }
    return {"images": image, "bounding_boxes": bounding_boxes}

In [18]:
BATCH_SIZE = 4

In [19]:
# Assuming your_data is your JSON list
visualized_data = visualize_dataset(train_dataset, value_range=None, rows=50, cols=5, bounding_box_format="xywh")

TypeError: tuple indices must be integers or slices, not str