In [127]:
import os
import xmltodict
from dicttoxml import dicttoxml

In [179]:
import numpy as np
from scipy.optimize import linear_sum_assignment
from pathlib import Path

def euclidean_distance(point1, point2):
    """Calculate the Euclidean distance between two points."""
    return ((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)**0.5

def closest_pairs_hungarian(a, b):
    """Connect each body from list a to its closest tail from list b using the Hungarian algorithm."""
    num_bodies = len(a)
    num_tails = len(b)
    
    # Create a matrix to store the distances between every body and tail
    cost_matrix = np.zeros((num_bodies, num_tails))
    
    for i, body in enumerate(a):
        for j, tail in enumerate(b):
            cost_matrix[i][j] = euclidean_distance(body, tail)
    
    # Use the Hungarian algorithm to find the optimal assignment
    row_indices, col_indices = linear_sum_assignment(cost_matrix)
    
    # Convert the result to a list of pairs
    pairings = list(zip(row_indices, col_indices))
    
    return pairings

def merge_bounding_boxes(box1, box2):
    """Merge two bounding boxes into one."""
    # Extract coordinates
    x1a, y1a, x2a, y2a = box1
    x1b, y1b, x2b, y2b = box2
    
    # New top-left coordinates will be the minimum of the x1 and y1 coordinates of both boxes
    x1 = min(x1a, x1b)
    y1 = min(y1a, y1b)
    
    # New bottom-right coordinates will be the maximum of the x2 and y2 coordinates of both boxes
    x2 = max(x2a, x2b)
    y2 = max(y2a, y2b)
    
    return (x1, y1, x2, y2)

def voc_to_yolo(voc_dict, class_mapping):
    """Convert Pascal VOC format dictionary to YOLO format strings."""
    yolo_data = []
    
    img_width = int(voc_dict['annotation']['size']['width'])
    img_height = int(voc_dict['annotation']['size']['height'])

    for obj in voc_dict['annotation']['object']:
        # Extract coordinates from the VOC format
        xmin = float(obj['bndbox']['xmin'])
        ymin = float(obj['bndbox']['ymin'])
        xmax = float(obj['bndbox']['xmax'])
        ymax = float(obj['bndbox']['ymax'])

        # Convert to YOLO format
        x_center = (xmin + xmax) / 2
        y_center = (ymin + ymax) / 2
        width = xmax - xmin
        height = ymax - ymin

        # Normalize the coordinates
        x_center /= img_width
        y_center /= img_height
        width /= img_width
        height /= img_height

        class_id = class_mapping[obj['name']]
        yolo_data.append(f"{class_id} {x_center} {y_center} {width} {height}")

    return yolo_data

def merge_boxes(file_dir, filepath, save_dir):
	read_path = Path(file_dir) / filepath
	xml_file = open(read_path,"r")
	xml_string = xml_file.read()
	python_dict = xmltodict.parse(xml_string)

	tails = []
	bodies = []

	if type(python_dict['annotation']['object']) == dict: 
		python_dict['annotation']['object'] = list([python_dict['annotation']['object']])

	for obj_id, obj in enumerate(python_dict['annotation']['object']):
		center = [(int(obj['bndbox']['xmax']) + int(obj['bndbox']['xmin'])) / 2, (int(obj['bndbox']['ymax']) + int(obj['bndbox']['ymin'])) / 2]
		if obj['name'] == 'rat':
			bodies.append([obj_id, center])
		else:
			tails.append([obj_id, center])

	# Example
	bodies = np.array(bodies)
	tails = np.array(tails)

	used = []
	final = []
	if len(bodies) > 0 and len(tails) > 0:
		pairings = np.array(closest_pairs_hungarian(
			np.stack(bodies[:, 1]),
			np.stack(tails[:, 1])
		))

		for i, (a, b) in enumerate(zip(np.stack(bodies[pairings[:, 0], 0]), np.stack(tails[pairings[:, 1], 0]))):
			used.append(a)
			used.append(b)

			# bodies[i]

			a_bbox = list(map(float, python_dict['annotation']['object'][a]['bndbox'].values()))
			b_bbox = list(map(float, python_dict['annotation']['object'][b]['bndbox'].values()))

			c_bbox = merge_bounding_boxes(a_bbox, b_bbox)

			final.append({
				'name': 'rat',
				'pose': 'Unspecified',
				'truncated': '0',
				'difficult': '0',
				'bndbox': {'xmin': c_bbox[0], 'ymin': c_bbox[1], 'xmax': c_bbox[2], 'ymax': c_bbox[3]}
			})

		used = list(sorted(used))[::-1]

	for obj_id, obj in enumerate(python_dict['annotation']['object']):
		if len(used) > 0 and obj_id >= used[-1]:
			used.pop()
			continue
		
		final.append({**obj, 'name': 'rat'})
	python_dict['annotation']['object'] = final

	yolo_format = voc_to_yolo(python_dict, class_mapping = {'rat': 0})
	
	write_file_name = python_dict['annotation']['filename'].rstrip('.jpg') + '.txt'

	my_file = open(Path(save_dir) / write_file_name, 'w')
	my_file.write("\n".join(yolo_format))
	my_file.close()

In [180]:
for filepath in list(sorted(os.listdir('/app/data'))):
	if filepath[-3:] == 'xml':
		merge_boxes('/app/data', filepath, '/app/rats/labels/')

  bodies = np.array(bodies)
  tails = np.array(tails)
