In [157]:
import operator
import collections
import nntraining.resources.definition as nntrain_def
import networkx
import matplotlib.pyplot as plt
%matplotlib widget

In [69]:
G = networkx.DiGraph()

ALL_WORDS = nntrain_def.read_refined(nntrain_def.refined_path)
ALL_NOUNS = dict(filter(lambda k_v: k_v[0][1] in ("nom", ), ALL_WORDS.items()))
noun_list = set(map(operator.itemgetter(0), ALL_NOUNS))

print(len(ALL_WORDS))
print(len(ALL_NOUNS))

1763212
361332


In [191]:
def entries(nom, dic=ALL_NOUNS):
    return dict(filter(lambda k_v: k_v[0][0] == nom, dic.items()))

def _iterated_relations(relation_fn, G, nodes, n_iteration):
    with_relations_added = set()
    added = set(nodes) if any(isinstance(nodes, t) for t in [list, set]) else {nodes}
    for i in range(n_iteration):
        for n in list(added - with_relations_added):
            added.update(relation_fn(G, n))
            with_relations_added.add(n)
    return added
    
def iterated_successors(G, nodes, n_iteration=1):
    return _iterated_relations(lambda G, n: G.successors(n), G, nodes, n_iteration)

def iterated_predecessors(G, nodes, n_iteration=1):
    return _iterated_relations(lambda G, n: G.predecessors(n), G, nodes, n_iteration)

def iterated_neighbors(G, nodes, n_iteration=1):
    if isinstance(G, networkx.DiGraph):
        G = G.to_undirected()
    return _iterated_relations(networkx.neighbors, G, nodes, n_iteration)

In [125]:
words = dict(list(ALL_NOUNS.items()))
relations = []
for (word, gram_type), definitions in words.items():
    for definition in definitions:
        for link in definition["links"]:
            if link in noun_list:
                relations.append((word, link))
G.add_nodes_from(map(operator.itemgetter(0), words.keys()))
G.add_edges_from(relations)

In [204]:
nodes = iterated_successors(G, ["souris"], 32)
G_small = G.subgraph(nodes)

In [230]:
print(len(G))
for i in range(32):
    print(len(iterated_successors(G, ["français"], i)))

361332
1
6
66
707
3528
8396
12461
14810
15903
16407
16646
16772
16817
16840
16846
16851
16855
16857
16859
16860
16861
16862
16863
16863
16863
16863
16863
16863
16863
16863
16863
16863


In [111]:
fig, ax = plt.subplots()
networkx.draw(G_small, ax=ax, with_labels=True, font_size=8, node_size=0, font_weight="ultralight", width=0.25)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [114]:
plt.close("all")

In [202]:
mot = "molette"
for i in G.successors(mot): print(f"{mot} -> {i}")
print()
for i in G.predecessors(mot): print(f"{i} -> {mot}")

molette -> éperon
molette -> étoile
molette -> cheval
molette -> roue
molette -> sorte
molette -> roulette
molette -> meule
molette -> disque
molette -> grès
molette -> faïence
molette -> plomb
molette -> cône
molette -> marbre
molette -> sommet
molette -> souris
molette -> ordinateur
molette -> maladie
molette -> chevaux
molette -> tumeur
molette -> molle
molette -> jambe
molette -> cépage
molette -> chevalement
molette -> haut fourneau
molette -> skip

éperon -> molette
vernis -> molette
clé à molette -> molette
broyon -> molette
croisille -> molette
moleté -> molette
molettage -> molette
faux-carré -> molette
bâton de colle -> molette
tricône -> molette


In [74]:
entries("souris")

{('souris',
  'nom'): [{'definition': "Petit rongeur de la famille des muridés, du genre ''Mus'', en général la souris commune ''Mus musculus''.",
   'links': ['rongeur', 'muridé'],
   'categories': ['zoologie|nocat=1']}, {'definition': 'Dispositif de commande tenu à la main, connecté à un ordinateur, et dont le déplacement sur une surface entraîne le déplacement d’un repère sur l’écran.',
   'links': [],
   'categories': ['info|fr']}, {'definition': 'Partie du gigot de mouton.',
   'links': ['gigot', 'mouton'],
   'categories': ['boucherie|fr']}, {'definition': 'Jeune fille ou femme.',
   'links': [],
   'categories': ['familier|fr']}, {'definition': 'Terme affectueux utilisé pour s’adresser à un petit enfant.',
   'links': [],
   'categories': ['familier|fr']}, {'definition': 'Tampon hygiénique.',
   'links': ['tampon hygiénique'],
   'categories': ['familier|fr']}, {'definition': 'Phalène de l’euphorbe (papillon).',
   'links': ['phalène de l’euphorbe'],
   'categories': ['zoologie|

In [205]:
collections.Counter(map(len, networkx.connected_components(G.to_undirected())))

Counter({16863: 1})

In [207]:
collections.Counter(map(len, networkx.strongly_connected_components(G_small)))

Counter({1: 1475, 2: 11, 4: 1, 15362: 1})

In [222]:
def boom():
    c = 0
    for node in G_small:
        for n in filter(lambda n: n not in G_small, G.predecessors(node)):
            print(n, len(iterated_successors(G, n, 16)))
            if c > 10:
                return
            c += 1
boom()

ligature 16860
harmonica 16859
cromorne 16861
régale 16862
sans-gêne 16860
cuivrette 16859
zourna 16857
sarrusophone 16859
cialamella 16859
cor de basset 16858
clarinette de basset 16857
suona 16858


In [254]:
degrees = G.degree()
print(list(sorted(degrees, key=operator.itemgetter(1), reverse=True))[:10])

fig, ax = plt.subplots()
cts = list(map(operator.itemgetter(1), list(degrees)))
plt.hist(cts, bins=40, range=(0, 40))

[('français', 42529), ('action', 3554), ('habitant', 3069), ('personne', 2034), ('espèce', 1668), ('québécois', 1556), ('partie', 1494), ('oiseau', 1467), ('langue', 1407), ('substance', 1263)]


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

(array([1.05452e+05, 1.41996e+05, 3.21100e+04, 2.05760e+04, 1.47870e+04,
        1.03860e+04, 7.12700e+03, 5.22500e+03, 3.86800e+03, 2.88600e+03,
        2.22200e+03, 1.66600e+03, 1.37300e+03, 1.14400e+03, 9.23000e+02,
        7.65000e+02, 6.14000e+02, 5.47000e+02, 4.78000e+02, 4.41000e+02,
        3.80000e+02, 3.25000e+02, 2.95000e+02, 2.74000e+02, 2.63000e+02,
        2.19000e+02, 2.33000e+02, 2.09000e+02, 1.75000e+02, 1.50000e+02,
        1.50000e+02, 1.33000e+02, 1.30000e+02, 1.19000e+02, 1.07000e+02,
        1.19000e+02, 1.12000e+02, 8.90000e+01, 8.70000e+01, 1.74000e+02]),
 array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
        13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
        26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
        39., 40.]),
 <a list of 40 Patch objects>)