## Data Loader

In [4]:
import tensorflow as tf
import json
import os
import logging  # To log errors or missing files
from collections import namedtuple
import numpy

In [5]:
# configures basic logging to print informational messages
logging.basicConfig(level=logging.INFO)

In [6]:
annotation_file = "annotations.json"
image_folder = "coco-nyaptola/images"

# Convert to absolute paths for reliable file access
annotation_file = os.path.abspath(annotation_file)
image_folder = os.path.abspath(image_folder)


In [7]:
ImageData = namedtuple('ImageData',['image','target'])

In [8]:
img_path_str = ""

In [9]:
# Load annotations from a COCO JSON file
def load_annotations(annotation_file):
    with open(annotation_file, 'r') as f:
        annotations = json.load(f) #loaded annotations dictionary
    return annotations

In [10]:
def load_image_and_annotations(image_path, annotations):
    # Initialize an empty image path string
    image_path_str = ""

    try:
        # Ensure that the input is a TensorFlow tensor
        if isinstance(image_path, tf.Tensor):
            # Convert Tensor to Python string for file operations
            image_path_str = image_path.numpy().decode("utf-8")
        else:
            raise ValueError("Expected TensorFlow tensor for image path.")

        # Read the image from the file path
        image = tf.io.decode_png(tf.io.read_file(image_path_str), channels=3)

        # Get the image information from the annotations
        image_info = next(
            (img for img in annotations['images'] if os.path.basename(image_path_str) in img.values()),
            None
        )

        if not image_info:
            raise ValueError(f"Image '{image_path_str}' not found in annotations.")

        image_id = image_info['id']

        # Get bounding boxes and classes for the image
        bboxes = [
            ann['bbox'] for ann in annotations['annotations']
            if ann['image_id'] == image_id
        ]
        classes = [
            ann['category_id'] for ann in annotations['annotations']
            if ann['image_id'] == image_id
        ]

        return image, {'boxes': bboxes, 'classes': classes}

    except Exception as e:
        # Log the error and return default values
        logging.warning(f"Error loading image: {e}")
        return tf.zeros([1, 1, 3], dtype=tf.uint8), {'boxes': [], 'classes': []}

In [77]:
def data_loader(annotation_file, image_folder):
    # Load annotations from COCO-format JSON file
    annotations = load_annotations(annotation_file)

    # List of image file paths from annotations
    image_files = [
        tf.convert_to_tensor(os.path.join(image_folder, img[0]), dtype=tf.string)
        for img in annotations[0]
    ]

    def load_data(image_file):
        image, target = load_image_and_annotations(image_file, annotations)

        # Ensure bounding boxes and classes have expected shapes
        bboxes = target['boxes']
        classes = target['classes']

        if len(bboxes) == 0:
            bboxes = tf.zeros((1, 4), dtype=tf.float32)  # Safe empty bounding box
        if len(classes) == 0:
            classes = tf.zeros((1,), dtype=tf.int32)  # Safe empty class array

        return image, {'boxes': bboxes, 'classes': classes}

    # Create a dataset with bounding boxes and classes
    dataset = tf.data.Dataset.from_tensor_slices(image_files).map(load_data)

    # Add debugging information to ensure dataset is correct
    for data in dataset.take(5):
        image, target = data
        print("Image shape:", image.shape)  # Verify image shapes
        print("Bounding boxes shape:", target['boxes'].shape)
        print("Classes shape:", target['classes'].shape)

    return dataset


In [78]:
annotations = load_annotations("annotations.json")

print("Images in annotations:", len(annotations[0]))  # Ensure expected number of images
print("Annotations count:", len(annotations[1]))  # Ensure expected number of annotations


Images in annotations: 3
Annotations count: 3


In [79]:
train_dataset = data_loader(annotation_file, image_folder)

TypeError: string indices must be integers, not 'str'

In [75]:
# prints the type of train_dataset. 
print(type(train_dataset))  # Should be <class 'tensorflow.python.data.ops.dataset_ops.DatasetV2'>
print(train_dataset.element_spec)  # Should reflect the expected structure of elements


<class 'tensorflow.python.data.ops.filter_op._FilterDataset'>
(TensorSpec(shape=(1, 1, 3), dtype=tf.uint8, name=None), {'boxes': TensorSpec(shape=(0, 4), dtype=tf.float32, name=None), 'classes': TensorSpec(shape=(0,), dtype=tf.int32, name=None)})


In [56]:
for data in train_dataset.take(5):  # Adjust the number of samples
    image, target = data
    print("Image shape:", image.shape)  # Check the image shape and other attributes
    print("target:", target)  # Check bounding boxes and classes


Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0, 4), dtype=float32, numpy=array([], shape=(0, 4), dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0, 4), dtype=float32, numpy=array([], shape=(0, 4), dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0, 4), dtype=float32, numpy=array([], shape=(0, 4), dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0, 4), dtype=float32, numpy=array([], shape=(0, 4), dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0, 4), dtype=float32, numpy=array([], shape=(0, 4), dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtyp

## Data Visualization and Batching

In [57]:
import numpy
import keras
import keras_cv

In [58]:
type(train_dataset)

tensorflow.python.data.ops.filter_op._FilterDataset

In [59]:
# Length of json files, hardcoded for now
length = 100
SPLIT_RATIO = 0.8

In [60]:
# Determine the number of validation samples
num_val = int(length * SPLIT_RATIO)
 
# Split the dataset into train and validation sets
val_data = train_dataset.take(num_val)
train_data = train_dataset.skip(num_val)

In [64]:
def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format, class_mapping):
    # Extract a single batch from the dataset
    inputs = next(iter(inputs.take(1)))

    image = inputs[0]
    bounding_box_info = inputs[1]

    # Convert to NumPy arrays
    bounding_boxes = bounding_box_info["boxes"].numpy()
    classes = bounding_box_info["classes"].numpy()

    # Ensure the image is a batch of images
    if len(image.shape) == 3:  # If it's a single image
        image = tf.expand_dims(image, axis=0)  # Convert to a batch of 1

    bounding_box_data = {"boxes": bounding_boxes, "classes": classes}

    # Visualization function call with the corrected shape
    visualization.plot_bounding_box_gallery(
        image.numpy(),  # Convert image to NumPy with proper shape
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_box_data,  # Pass bounding box data
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )


In [65]:
class_mapping = { 0: "nyatapola" }

In [70]:
visualize_dataset(
    train_data, bounding_box_format="xyxy", value_range=(0, 255), rows=2, cols=2, class_mapping = class_mapping
)

IndexError: index 0 is out of bounds for axis 0 with size 0