## Data Loader

In [42]:
import tensorflow as tf
import json
import os
import logging  # To log errors or missing files
from collections import namedtuple

In [43]:
# configures basic logging to print informational messages
logging.basicConfig(level=logging.INFO)

In [44]:
annotation_file = "D:/Projects/DL/MonumentDetection/DL/keras_cv/coco-nyaptola/result.json"
image_folder = "D:/Projects/DL/MonumentDetection/DL/keras_cv/coco-nyaptola/images"

# Convert to absolute paths for reliable file access
annotation_file = os.path.abspath(annotation_file)
image_folder = os.path.abspath(image_folder)


In [45]:
ImageData = namedtuple('ImageData',['image','target'])

In [55]:
img_path_str = ""

In [56]:
# Load annotations from a COCO JSON file
def load_annotations(annotation_file):
    with open(annotation_file, 'r') as f:
        annotations = json.load(f) #loaded annotations dictionary
    return annotations

In [60]:
def load_image_and_annotations(image_path, annotations):
    # Load an image and its corresponding bounding boxes and classes from COCO annotations.
    # image_path is a tensor and annotations is a dictionary
    img_path_str = ""
    try:
        # Convert Tensor to Python string for file operations
        if isinstance(image_path, tf.Tensor):
            image_path_str = tf.strings.as_string(image_path).numpy().decode("utf-8")
        else:
            raise ValueError("Expected Tensorflow tensor for image path.")

        # Read the image file
        image = tf.io.decode_jpeg(tf.io.read_file(image_path_str), channels=3)

        # Get the image info from the annotations
        image_info = next(
            (img for img in annotations['images'] if img['file_name'] == os.path.basename(image_path_str)),
            None
        )

        if not image_info:
            raise ValueError(f"Image '{image_path_str}' not found in annotations.")

        image_id = image_info['id']

        # Get the bounding boxes and classes for this image
        bboxes = [
            ann['bbox'] for ann in annotations['annotations']
            if ann['image_id'] == image_id
        ]
        classes = [
            ann['category_id'] for ann in annotations['annotations']
            if ann['image_id'] == image_id
        ]

        return image, {'boxes': bboxes, 'classes': classes}

    except Exception as e:
        # Log the error and return None
        logging.warning(f"Error loading image ': {e}")
        return tf.zeros([1, 1, 3], dtype=tf.uint8), {'boxes': [], 'classes': []}  # Return default values



In [73]:
def data_loader(annotation_file, image_folder):

    # Create a tf.data.Dataset with images, bounding boxes, and classes from a COCO-format dataset.
    annotations = load_annotations(annotation_file)

    # Create a list of image file paths
    image_files = [
        tf.convert_to_tensor(os.path.join(image_folder, img['file_name']), dtype=tf.string)
        for img in annotations['images']
    ]

    # Convert image file paths to TensorFlow-compatible tensors
    image_files = [tf.convert_to_tensor(img, dtype=tf.string) for img in image_files]

    # Function to load image data along with annotations, with error handling
    def load_data(image_file):
        image, target = load_image_and_annotations(image_file, annotations)
        
        if image is None:  # If image loading failed
            return None, {'boxes': tf.zeros([0, 4], dtype=tf.float32), 'classes': tf.zeros([0], dtype=tf.int32)}

        # Convert bounding boxes and classes to TensorFlow-compatible formats
        bboxes = tf.convert_to_tensor(target['boxes'], dtype=tf.float32)
        classes = tf.convert_to_tensor(target['classes'], dtype=tf.int32)

        return image, {'boxes': bboxes, 'classes': classes}

    # Map function with error handling

    # def map_function(image_file):
    #     image, bboxes, classes = tf.py_function(
    #         load_data,
    #         [image_file],
    #         Tout=(tf.uint8, tf.float32, tf.int32),
    #     )

    #     return image, bboxes, classes

    dataset = tf.data.Dataset.from_tensor_slices(image_files).map(
        load_data)
        
    def filter_data(image, bounding_boxes, dataset):
      return image is not None

    dataset = dataset.filter(lambda image, bounding_boxes: filter_data(image, bounding_boxes, dataset))

    return dataset


In [74]:
train_dataset = data_loader(annotation_file, image_folder)



In [75]:
# prints the type of train_dataset. 
print(type(train_dataset))  # Should be <class 'tensorflow.python.data.ops.dataset_ops.DatasetV2'>
print(train_dataset.element_spec)  # Should reflect the expected structure of elements


<class 'tensorflow.python.data.ops.filter_op._FilterDataset'>
(TensorSpec(shape=(1, 1, 3), dtype=tf.uint8, name=None), {'boxes': TensorSpec(shape=(0,), dtype=tf.float32, name=None), 'classes': TensorSpec(shape=(0,), dtype=tf.int32, name=None)})


In [76]:
for data in train_dataset.take(5):  # Adjust the number of samples
    image, target = data
    print("Image shape:", image.shape)  # Check the image shape and other attributes
    print("target:", target)  # Check bounding boxes and classes


Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
Image shape: (1, 1, 3)
target: {'boxes': <tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>, 'classes': <tf.Tensor: shape=(0,), dtype=int32, numpy=array([], dtype=int32)>}
