In [2]:
from lxml import etree as et
import pathlib

In [None]:
here = pathlib.Path(pathlib.Path(".").absolute())
workdir = here.parent / "work"
docdir = here.parent / "doc" 
encfile = docdir / "encoding_out.xml"

In [12]:
doc = et.parse(encfile)

In [41]:
from dataclasses import dataclass
@dataclass
class SwapLine:
    name: str
    input: str
    cid: int
    output: str

    def __str__(self):
        return f"{self.name}|{self.input}|{self.cid}|{self.output}"

    def __hash__(self):
        return hash((self.name, self.input, self.cid, self.output))

In [42]:
def get_swapline(tr: et._Element) -> SwapLine:
    return SwapLine(
        tr[0].text,
        tr[1].text,
        int(tr[2].text or "-1"),
        tr[3].text
    )

In [43]:
cnt = 0
outcnt = 0
swapset = set()
for row in doc.xpath("//tr"):
    line = get_swapline(row)
    if line.input != line.output:
        # print(line)
        swapset.add(line)
        outcnt += 1
    cnt += 1
length = len(swapset)
print(f"{length}/{outcnt}/{cnt}")

135/162/5008


In [44]:
ret = {}
for line in swapset:
    font = ret.get(line.name, [])
    font.append((line.input, line.output, line.cid))
    ret[line.name] = font

In [50]:
def mk_name(s: str) -> str:
    s = s.lower()
    s = s.replace("-", "_")
    return s

In [74]:
def sanity_check(swaps):
    """Sanity check of swapping table."""
    seen = set()
    dupes = [x[0] for x in swaps if x[0] in seen or seen.add(x[0])]
    if dupes:
        # print(dupes)
        for d in dupes:
            print(f"    Duplicate: \\u{ord(d):04x}")

In [100]:
with open(workdir / "swaps.py", "w", encoding="UTF-8") as outfile:
    outfile.write("swaptables = {\n")
    for fontname in sorted(ret.keys()):
        # print(fontname)
        outfile.write(f"    \"{fontname}\": {{\n")
        swaps = ret[fontname]
        # sanity_check(swaps)
        # outfile.write("        \"swap\": {\n")
        swaps.sort(key=lambda x: (ord(x[0]), x[2]))
        for swap in swaps:
            outfile.write("        ")
            outfile.write(f"u\"\\u{ord(swap[0]):04x}\": (")
            outfile.write("u\"")
            if len(swap[1]) < 2:
                outfile.write(f"\\u{ord(swap[1]):04x}\", {swap[2]}),\n")
            else:
                outfile.write(swap[1][0:-1])
                outfile.write(f"\\u{ord(swap[1][-1]):04x}\", {swap[2]}),\n")
        outfile.write("    },\n")
        # outfile.write("        \"cid_swap\": {\n")
        # swaps.sort(key=lambda x: (x[2], ord(x[0])))
        # for swap in swaps:
        #     outfile.write("            ")
        #     outfile.write(f"{swap[2]: }: ")
        #     outfile.write("u\"")
        #     if len(swap[1]) < 2:
        #         outfile.write(f"\\u{ord(swap[1]):04x}\",\n")
        #     else:
        #         outfile.write(swap[1][0:-1])
        #         outfile.write(f"\\u{ord(swap[1][-1]):04x}\",\n")
        # outfile.write("        },\n")
        # outfile.write("    },\n")
    outfile.write("}\n")