In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../src")

In [3]:
from CompoTree import ComponentTree, Radicals, TSVariants, CharLexicon, CTFounds
from gensim.models.keyedvectors import KeyedVectors
import igraph as ig

In [6]:
ctree = ComponentTree.load()
radicals = Radicals.load()
tsvars = TSVariants.load()
lexicon = CharLexicon.load()

In [12]:
trad_chars = [x for x in lexicon if not tsvars.is_simplified(x)]

In [135]:
char_pools = [x for x in trad_chars if 
              x != tsvars.convert(x)[0]]
char_pools = [tsvars.convert(x)[0] for x in char_pools]


In [140]:
char_decomp = []
for x in set(char_pools):
    try:
        decomp_x = (x, ctree.query(x, max_depth=1, use_flag="shortest")[0])
        char_decomp.append(decomp_x)
    except:
        pass

char_decomp = [x for x in char_decomp if not isinstance(x[1], str) and (len(x[1].components())==2)]


In [141]:
compo_dict = {}
edge_dict = {}
for char_x, decomp_x in char_decomp:
    compos = decomp_x.components()
    compo_dict[compos[0]] = compo_dict.get(compos[0], len(compo_dict))
    compo_dict[compos[1]] = compo_dict.get(compos[1], len(compo_dict))
    
    # edges
    eid = (compo_dict[compos[0]], compo_dict[compos[1]])
    if eid in edge_dict:
        print("duplicated decompositions: ", char_x, decomp_x)
    edge_dict[eid] = edge_dict.get(eid, len(edge_dict))

duplicated decompositions:  晖 ⿰['日']['军']


In [142]:
len(char_pools), len(char_decomp), len(compo_dict), len(edge_dict)

(1673, 1573, 812, 1572)

In [143]:
G = ig.Graph()
for compo_x, compo_id in compo_dict.items():
    G.add_vertex(compo_id, compo=compo_x)
for edge_x, edge_id in edge_dict.items():
    src, tgt = edge_x
    G.add_edge(src, tgt)

In [144]:
compo_rev = {v: k for k, v in compo_dict.items()}
[(compo_rev[x.source], compo_rev[x.target]) for x in G.es[:10]]

[('冫', '中'),
 ('㐅', '朩'),
 ('广', '由'),
 ('口', '屯'),
 ('讠', '川'),
 ('阝', '人'),
 ('犭', '争'),
 ('分', '页'),
 ('讠', '平'),
 ('歹', '戋')]

In [145]:
G.summary()

'IGRAPH UN-- 812 1572 -- \n+ attr: compo (v), name (v)'

In [148]:
[len(x) for x in G.components()]

[782, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2]

In [146]:
from graphviz import Graph

In [147]:
vG = Graph(comment="Component Graph", format="svg", engine='sfdp')
# G.attr('node', fontname="HanaMinA")

for compo_x, compo_id in compo_dict.items():
    # vG.node(str(compo_id), str(compo_x))
    vG.node(str(compo_id), "")
vG.edges([(str(src), str(tgt)) for src, tgt in edge_dict.keys()])

In [150]:
vG.render("Graph.simp.gv.svg", view=True)

'Graph.simp.gv.svg.svg'