In [1]:
import re
from functools import partial
from typing import Optional, Tuple

import xml.etree.ElementTree as ET
from zss import Node, simple_distance
from zss.compare import Operation


def strip_ns(tag: str) -> str:
    return tag.split('}')[-1] if '}' in tag else tag


def xml_to_zss(xml: ET.Element, strip: bool = True) -> Node:
    id = ':'.join([strip_ns(xml.tag) if strip else xml.tag,
                   xml.attrib.get('name', ''),
                   xml.attrib.get('xmi:type', '')])
    return Node(id, list(map(xml_to_zss, xml)))


def xml_comp_metrics(
    file1: str,
    file2: str,
    strip: bool = True,
    ) -> Tuple[float, float, float, float]:
    xml1, xml2 = map(ET.parse, [file1, file2])
    zss1, zss2 = map(partial(xml_to_zss, strip=strip), [xml1.getroot(), xml2.getroot()])
    dist, ops = simple_distance(zss1, zss2, return_operations=True)
    tp = sum(1 for op in ops if op.type == Operation.match)
    fp = sum(1 for op in ops if op.type in [Operation.insert, Operation.update])
    fn = sum(1 for op in ops if op.type in [Operation.remove, Operation.update])
    return dist, tp, fp, fn

# # Usage:
# dist, tp, fp, fn = xml_comp_metrics('diagram1.xmi', 'diagram2.xmi')
# print(f'Distance, true positives, false positives, and false negatives: {dist, tp, fp, fn}')

In [2]:
import glob
import os
import subprocess

In [3]:
for file in glob.glob('*.puml'):
    if not os.path.exists(file.replace('.puml', '.xmi')):
        subprocess.run(['java', '-jar', 'plantuml.jar', '-txmi', file])

In [8]:
for name in os.listdir('.'):
    if name.endswith('.xmi') and name != 'Benchmark.xmi':
        print(f'Distance, true positives, false positives, and false negatives between benchmark and {name.split(".")[0]}: ',
              xml_comp_metrics('Benchmark.xmi', name))

Distance, true positives, false positives, and false negatives between benchmark and o3-Zero-Shot:  (49.0, 123, 45, 13)
Distance, true positives, false positives, and false negatives between benchmark and flash-Chain-of-thought:  (47.0, 92, 28, 44)
Distance, true positives, false positives, and false negatives between benchmark and o3-Chain-of-thought:  (30.0, 124, 28, 12)
Distance, true positives, false positives, and false negatives between benchmark and o3-In-Context:  (11.0, 125, 11, 11)
Distance, true positives, false positives, and false negatives between benchmark and maverick-In-Context:  (86.0, 50, 10, 86)
Distance, true positives, false positives, and false negatives between benchmark and maverick-Zero-Shot:  (79.0, 57, 3, 79)
Distance, true positives, false positives, and false negatives between benchmark and maverick-Chain-of-thought:  (81.0, 55, 11, 81)
Distance, true positives, false positives, and false negatives between benchmark and flash-In-Context:  (58.0, 78, 16, 58