In [None]:
import re

In [None]:
class PropertyInfo:
    def __init__(self, access, type, name):
        self.access = access
        self.name = name
        self.type = type
    def __str__(self):
        return "%s %s %s { get; set; }" % (self.access, self.type, self.name)
class FieldInfo:
    def __init__(self, access, type, name):
        self.access = access
        self.name = name
        self.type = type
    def __str__(self):
        return "%s %s %s;" % (self.access, self.type, self.name)
class EnumValueInfo:
    def __init__(self, name, value):
        self.name = name
        self.value = value
    def __str__(self):
        return f"{self.name} = {self.value},"
class ClassInfo:
    def __init__(self, namespace, name, base_classes, type):
        assert namespace is not None
        self.namespace = namespace
        self.name = name
        self.fullname = f"{namespace}.{name}" if len(namespace) > 0 else name
        self.base_classes = base_classes
        self.type = type
        self.is_enum = type == "enum"
        self.is_interface = type == "interface"
        self.properties = []
        self.subclasses = []
    def toLines(self):
        header = f"public {self.type} {self.name}"
        if len(self.base_classes) > 0:
            header += f": {self.base_classes}"
        header += " {"
        lines = [header]
        for p in self.properties:
            lines += ["    " + str(p)]
        for sc in self.subclasses:
            lines += [""]
            lines += ["    " + l for l in sc.toLines()]
        lines += ["}"]
        return lines
    def __str__(self):
        lines = self.toLines()
        return "\n".join(lines)

In [None]:
def process(path):
    with open(path, "r", encoding="utf-8") as f:
        lines = f.readlines()
        
    classes = {}
    
    # stats
    cnt_important = 0
    unrecognized = []
    outside_class = []
    conflict_classes = {}

    in_comment_block = False
    current_ns = None
    current_class = None
    accessors = "^(public|private|protected|internal|protected internal)"
    types = "([\w\[\]\.<>\*,\$]+)"
    identifiers = "([\w\.<>,\*\$]+)"
    re_field = re.compile("%s( static)?( readonly)? %s %s;" % (accessors, types, identifiers))
    re_method = re.compile(accessors + ".+\{ \}$")
    re_property = re.compile("%s( virtual)?( abstract)?( sealed)?( override)?( static)? %s %s \{( get;)?( set;)? \}" % (accessors, types, identifiers))
    def addClass(cls):
        nonlocal conflict_classes
        name = cls.fullname
        if name in classes:
            if name not in conflict_classes:
                conflict_classes[name] = 0
            conflict_classes[name] += 1
        else:
            classes[name] = cls
        return cls
    def checkClass(line):
        nonlocal current_class, current_ns
        keys = [" class ", " struct ", " enum ", " interface "]
        for key in keys:
            if key not in line:
                continue
            if "//" in line:
                line = line[:line.find("//")]
            start = line.find(key) + len(key)
            name = line[start:line.find(" ", start+1)]
            if ": " in line:
                start = line.find(": ") + 2
                base_classes = line[start:line.find(" ", start+1)].replace(", ", ",")
            else:
                base_classes = ""
            if current_ns is None:
                print(i, line)
            current_class = addClass(ClassInfo(current_ns, name, base_classes, key.replace(" ", "")))
            current_ns = None
            return True
        return False
    for i in range(len(lines)):
        line = lines[i].strip()
        # comments
        if line.startswith("*/"):
            in_comment_block = False
            continue
        if in_comment_block:
            continue
        if line.startswith("//"):
            if line.startswith("// Namespace:"):
                current_ns = line[len("// Namespace: "):]
                cnt_important += 1
            continue
        if line.startswith("/*"):
            in_comment_block = True
            continue
        # empty
        if len(line) == 0:
            continue
        # attributes
        if line.startswith("["):
            continue

        cnt_important += 1
        line = line.replace(", ", ",")
        if line == "}" or line == "{}":
            current_class = None
            continue
        if line == "{":
            continue
        if checkClass(line):
            if current_class.is_enum:
                enum_re = re.compile(f"^public const {current_class.name} (\w+) = (.+);$")
            continue
        if current_class is None:
            outside_class += [[i, lines[i]]]
            continue
        if current_class.is_interface:
            continue
        if current_class.is_enum:
            m = enum_re.match(line)
            if m is not None:
                name, value = m.groups()
                current_class.properties += [EnumValueInfo(*m.groups())]
            continue
        if " const " in line:
            continue
        if "/*Metadata offset" in line:
            continue

        # property
        m = re_property.match(line)
        if m is not None:
            if "set;" in line:
                access, _, _, _, _, static, type, name, _, _ = m.groups()
                if static is None:
                    current_class.properties += [PropertyInfo(access, type, name)]
            continue
        # field
        m = re_field.match(line)
        if m is not None:
            access, static, readonly, type, name = m.groups()
            if static is None and readonly is None:
                current_class.properties += [FieldInfo(access, type, name)]
            continue
        # method
        if re_method.match(line) is not None:
            continue

        unrecognized += [[i, line]]
    print(f"processed {len(lines)} lines, {cnt_important} important")
    if len(unrecognized) > 0:
        print(f"[WARN] {len(unrecognized)} lines were not recognized")
    if len(outside_class) > 0:
        print(f"[WARN] {len(outside_class)} lines were outside class")
    if len(conflict_classes) > 0:
        print(f"[WARN] {len(conflict_classes)} fullnames are used by multiple classes")

    by_names = {}
    for k in classes:
        cls = classes[k]
        name = cls.name
        if name not in by_names:
            by_names[name] = []
        by_names[name] += [cls]

    removal = []
    failed_removal = []
    for fullname in classes:
        cls = classes[fullname]
        name = cls.name
        idx = name.rfind(".")
        if idx < 0:
            continue
        sc_name = name[:idx]
        if sc_name not in by_names:
            continue
        if len(by_names[sc_name]) > 1:
            failed_removal += [[sc_name, fullname]]
            continue
        sc_cls = by_names[sc_name][0]
        sc_cls.subclasses += [cls]
        cls.name = name[idx+1:]
        removal += [fullname]
    for fullname in removal:
        classes.pop(fullname)
    if len(failed_removal) > 0:
        print(f"[WARN] {len(failed_removal)} subclasses failed to find unambiguous parent class")

    return classes, by_names, [unrecognized, outside_class, conflict_classes, failed_removal]

In [None]:
def find(classes, name):
    lst = []
    for k in classes:
        if name.lower() in k.lower():
            lst += [k]
    return lst

def addDependencies(cls, by_names, processed):
    lst = [cls]
    re_generics = re.compile("^([^<]+)<([^>]+)>$")
    if cls.is_enum:
        return lst
    for t in [*[p.type for p in cls.properties], cls.base_classes]:
        if t in processed:
            continue
        processed[t] = True

        t = t.replace("[]", "")
        if t in ["bool", "byte", "sbyte", "char", "decimal", "double",
                 "float", "int", "uint", "nint", "nuint", "long", "ulong",
                 "short", "ushort", "object", "string", "dynamic"]:
            continue
        m = re_generics.match(t)
        all_t = [t]
        if m is not None:
            name, params = m.groups()
            all_t = [name, *params.split(",")]
        for t in all_t:
            if t in by_names:
                l = by_names[t]
                if len(l) > 1:
                    print(f"could not determine {t}")
                    for c in l:
                        print(c.fullname)
                    continue
                c = l[0]
                c_deps = addDependencies(c, by_names, processed)
                for c in c_deps:
                    if c in lst or c in cls.subclasses:
                        continue
                    lst += [c]
            else:
                print(f"not found: {t}")
    return lst

def save(lst, path):
    lines = []
    for cls in lst:
        lines += [*cls.toLines()]
        lines += [""]
    with open(path, "w") as f:
        f.write("\n".join(lines))

In [None]:
classes, by_names, dbg = process("R:/dump.cs")

In [None]:
lst = find(classes, "characterdata")
for i in range(len(lst)):
    print(i, lst[i], sep="\t")

In [None]:
lst = addDependencies(classes[lst[2]], by_names, {})
print([c.fullname for c in lst])

In [None]:
save(lst, "R:/1.cs")

In [None]:
lst = find(classes, "keyframes")
print(lst)

In [None]:
save([classes[k] for k in lst[:3]], "R:/2.cs")