In [66]:
# Necessary imports for the EDA
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import seaborn as sns
import cv2
import plotly.express as px
import xml.etree.ElementTree as ET # Using this to parse the XML annotations file
from imblearn.over_sampling import RandomOverSampler # For balancing
from imblearn.under_sampling import RandomUnderSampler # For balancing

In [67]:
bccd_images_dir = './BCCD_Dataset/BCCD/JPEGImages/'
bccd_annotations_dir = './BCCD_Dataset/BCCD/Annotations/'
cbc_images_dir = './Complete-Blood-Cell-Count-Dataset/Training/Images/'
cbc_annotations_dir = './Complete-Blood-Cell-Count-Dataset/Training/Annotations/'

In [79]:
def load_annotation(annotation_path):
    '''
    Load and parse an XML annotation file.

    This function reads an XML file, parses the file, and extracts details about objects 
    found within the smear.
    Parameters:
    - annotation_path (str): The file path to the XML annotation file.

    Returns:
    - dict: A dictionary containing a list of objects, each represented as a dictionary with
      keys for the object's name, pose, truncated status, difficulty status, and bounding box.
      The bounding box itself is a dictionary with 'xmin', 'ymin', 'xmax', and 'ymax'.
    '''

    tree = ET.parse(annotation_path)
    root = tree.getroot()

    annotations = {
        'objects': []
    }

    for obj in root.findall('.//object'):
        obj_name = obj.find('.//name').text
        obj_pose = obj.find('.//pose').text
        obj_truncated = int(obj.find('.//truncated').text)
        obj_difficult = int(obj.find('.//difficult').text)

        bndbox = obj.find('.//bndbox')
        bbox = {
            'xmin': int(bndbox.find('.//xmin').text),
            'ymin': int(bndbox.find('.//ymin').text),
            'xmax': int(bndbox.find('.//xmax').text),
            'ymax': int(bndbox.find('.//ymax').text)
        }

        annotations['objects'].append({
            'name': obj_name,
            'pose': obj_pose,
            'truncated': obj_truncated,
            'difficult': obj_difficult,
            'bbox': bbox
        })

    return annotations