You are a specialised circuit-diagram interpreter. Given the image of an electrical circuit diagram, identify which symbol classes from the list below appear in the image. IMPORTANT RULES: - Only list symbols you can clearly see and identify in the image - Do not guess or assume symbols are present - If you're uncertain about a symbol, do not include it - Look for distinctive visual features of each symbol type - A typical simple circuit might only contain 2-5 different symbol types. List only the classes you find, in a python list, and nothing else. Symbol classes: ["terminal", "gnd", "vss", "voltage.dc", "voltage.ac", "voltage.battery", "resistor", "resistor.adjustable", "resistor.photo", "capacitor.unpolarized", "capacitor.polarized", "capacitor.adjustable", "inductor", "inductor.ferrite", "inductor.coupled", "transformer", "diode", "diode.light_emitting", "diode.thyrector", "diode.zener", "diac", "triac", "thyristor", "varistor", "transistor.bjt", "transistor.fet", "transistor.photo", "operational_amplifier", "operational_amplifier.schmitt_trigger", "optocoupler", "integrated_circuit", "integrated_circuit.ne555", "integrated_circuit.voltage_regulator", "xor", "and", "or", "not", "nand", "nor", "probe", "probe.current", "probe.voltage", "switch", "relay", "socket", "fuse", "speaker", "motor", "lamp", "microphone", "antenna", "crystal", "mechanical", "magnetic", "optical", "block"]

In [1]:
from textwrap import dedent

def get_vlm_resistor_xml_prompt(cls_name:str) -> str:
    """Return VLM prompt for zero-shot resistor detection in XML format."""
    return f'You are a specialised circuit-diagram interpreter performing zero-shot inference. Given the image of an electrical circuit diagram, identify all symbols of class "{cls_name}". For each symbol from that class, output one <points> tag and nothing else, in the exact format: <points x1="…" y1="…" x2="…" y2="…" x3="…" y3="…" alt="{cls_name}">{cls_name}</points> Use the coordinates of the center of the bounding box of each symbol. Do not include any additional text or tags.\nExample for 3 different locations: <points x1="20.8" y1="42.4" x2="21.0" y2="53.6" x3="22.3" y3=54.8 alt="{cls_name}">{cls_name}</points>'

# def get_vlm_symbol_centres_prompt(cls_name: str) -> str:
#     """Return prompt for zero-shot symbol-centre extraction."""
#     return dedent(f'''
#         You are a specialised circuit-diagram interpreter performing zero-shot inference.
#         Given the image of an electrical circuit diagram, identify **all** symbols of class
#         "{cls_name}".

#         - Output exactly one <points> tag and *nothing else*.
#         - Inside that tag, list one coordinate pair per detected symbol, in order:
#           x1="…" y1="…" x2="…" y2="…" x3="…" y3="…" … continuing as needed.
#           Use the centre of each symbol's bounding box and the number being the percentage of the image width and height.
#         - If the image contains only one symbol, output only x1 and y1; if two, add x2 y2; and so on.
#         - Do **not** invent coordinates only include pairs for symbols that truly exist.
        
#         Example for a single symbol:
#         <points x1="41.0" y1="58.3" alt="{cls_name}">{cls_name}</points>
        
#         Example for four symbols:
#         <points x1="12.1" y1="23.4" x2="45.0" y2="67.2" x3="78.8" y3="11.9" x4="102.5" y4="53.0"
#         alt="{cls_name}">{cls_name}</points>
#     ''').strip()
    
def get_vlm_symbol_centres_prompt(cls_name: str) -> str:
    """Return prompt for zero-shot symbol-centre extraction."""
    return (f"You are a specialised circuit-diagram interpreter performing zero-shot inference.\n"
            f"Given the image of an electrical circuit diagram, identify **all** symbols of class\n"
            f"\"{cls_name}\".\n"
            f"\n"
            f"- Output exactly one <points> tag and *nothing else*.\n"
            f"- Inside that tag, list one coordinate pair per detected symbol, in order:\n"
            f"  x1=\"…\" y1=\"…\" x2=\"…\" y2=\"…\" x3=\"…\" y3=\"…\" … continuing as needed.\n"
            f"  Use the centre of each symbol's bounding box and the number being the percentage of the image width and height.\n"
            f"- If the image contains only one symbol, output only x1 and y1; if two, add x2 y2; and so on.\n"
            f"- Do **not** invent coordinates only include pairs for symbols that truly exist.\n"
            f"\n"
            f"Example for a single symbol:\n"
            f"<points x1=\"41.0\" y1=\"58.3\" alt=\"{cls_name}\">{cls_name}</points>\n"
            f"\n"
            f"Example for four symbols:\n"
            f"<points x1=\"12.1\" y1=\"23.4\" x2=\"45.0\" y2=\"67.2\" x3=\"78.8\" y3=\"11.9\" x4=\"102.5\" y4=\"53.0\"\n"
            f"alt=\"{cls_name}\">{cls_name}</points>")


In [29]:
get_vlm_resistor_xml_prompt("capacitor.unpolarized")
get_vlm_resistor_xml_prompt("diode.zener")
get_vlm_resistor_xml_prompt("gnd")
get_vlm_resistor_xml_prompt("inductor")
get_vlm_resistor_xml_prompt("integrated_circuit")
get_vlm_resistor_xml_prompt("probe")
get_vlm_resistor_xml_prompt("resistor")
get_vlm_resistor_xml_prompt("socket")
get_vlm_resistor_xml_prompt("terminal")
get_vlm_resistor_xml_prompt("voltage.dc")


'You are a specialised circuit-diagram interpreter performing zero-shot inference. Given the image of an electrical circuit diagram, identify all symbols of class "voltage.dc". For each symbol from that class, output one <points> tag and nothing else, in the exact format: <points x1="…" y1="…" x2="…" y2="…" x3="…" y3="…" alt="voltage.dc">voltage.dc</points> Use the coordinates of the center of the bounding box of each symbol. Do not include any additional text or tags.\nExample for 3 different locations: <points x1="20.8" y1="42.4" x2="21.0" y2="53.6" x3="22.3" y3=54.8 alt="voltage.dc">voltage.dc</points>'

In [54]:
from typing import List

intersection=None

def jaccard_accuracy(predictions: List[str], targets: List[str]) -> float:
    """Compute set-based IoU (penalises misses and extras)."""
    global intersection
    pred_set = set(predictions)
    target_set = set(targets)
    intersection = pred_set & target_set
    # print(f"{intersection=}")
    union = pred_set | target_set
    if not union:
        return 1.0
    return (len(intersection) / len(union)) * 100

preds = ["terminal", "gnd", "resistor", "diode.zener", "voltage.battery", "socket", "fuse", " switch"]




targets = ["capacitor.unpolarized","gnd","integrated_circuit.voltage"]

# print(len(preds))
jaccard_accuracy(
    predictions=preds, 
    targets=targets)

10.0

In [30]:
for i in intersection:
    print(get_vlm_resistor_xml_prompt(i))

You are a specialised circuit-diagram interpreter performing zero-shot inference. Given the image of an electrical circuit diagram, identify all symbols of class "terminal". For each symbol from that class, output one <points> tag and nothing else, in the exact format: <points x1="…" y1="…" x2="…" y2="…" alt="terminal">terminal</points> Use the coordinates of the bounding polygon of each symbol. Do not include any additional text or tags.Example: <points x1="20.8" y1="42.4" x2="21.0" y2="53.6" alt="terminal">terminal</points>
You are a specialised circuit-diagram interpreter performing zero-shot inference. Given the image of an electrical circuit diagram, identify all symbols of class "resistor". For each symbol from that class, output one <points> tag and nothing else, in the exact format: <points x1="…" y1="…" x2="…" y2="…" alt="resistor">resistor</points> Use the coordinates of the bounding polygon of each symbol. Do not include any additional text or tags.Example: <points x1="20.8"

# Chain Of Thought