In [1]:
import os
import string
from collections import Counter
from collections import defaultdict
from itertools import product

import networkx as nx
import nltk
from nltk.util import ngrams

In [2]:
ingredients_graph = nx.read_gexf('data/spanish_ingredients_lexicon_5.gexf')

In [3]:
g_nlg = nx.read_gexf('out/elbulli_nlg.gexf')

In [4]:
g_dat = nx.read_gexf('out/elbulli_dat.gexf')

In [5]:
prep_prod_rels = ['bañado', 'alcohol', 'chocolate', 'lacteo', 'nuevaPasta', 'producto', 'relleno']
prep_ingr_rels = ['composicion']
prep_flav_rels = ['sabor']
prep_tech_rels = ['tecnica']

def get_prep_products(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_prod_rels}

def get_prep_ingredients(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_ingr_rels}

def get_prep_flavors(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_flav_rels}

def get_prep_components(g, prep):
    prods = get_prep_products(g, prep)
    prods = set(map(lambda x: x.replace('Producto:', ''), prods))
    ingrs = get_prep_ingredients(g, prep)
    flavs = get_prep_flavors(g, prep)
    flavs = set(map(lambda x: x.replace('sabor:', ''), flavs))
    return prods.union(ingrs).union(flavs)

def get_prep_techniques(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_tech_rels}

def get_recip_preparations(g, recip):
    return [k for k in g[recip] if g[recip][k]['edgetype'] == 'elaboracion']

def get_recip_products(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_products(g, prep)}

def get_recip_ingredients(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_ingredients(g, prep)}

def get_recip_flavors(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_flavors(g, prep)}

def get_recip_components(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_components(g, prep)}

def get_recip_techniques(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_techniques(g, prep)}

def get_nodes_by_type(g, typ):
    return [n for n, data in g.nodes_iter(data=True) if data['nodetype'] == typ]

def get_recipes(g):
    return get_nodes_by_type(g, 'Receta')

def get_preparations(g):
    return get_nodes_by_type(g, 'Elaboracion')

In [6]:
def all_one_value(d):
    return all(map(lambda x: len(x) == 1, d.values()))

def my_node_connected_component(g, n):
    if n in ingredients_graph:
        r = nx.node_connected_component(ingredients_graph, n)
    else:
        r = {n}
    return r

def equivalent_components(comps_dat, comps_nlg):
    if comps_dat == comps_nlg:
        r = True
    else:
        r = False
        if len(comps_dat) == len(comps_nlg):
            cartesian_product = product(
                *[my_node_connected_component(ingredients_graph, c) for c in comps_dat]
            )
            cartesian_product = map(set, cartesian_product)
            for comps in cartesian_product:
                if comps == comps_nlg:
                    r = True
                    break
    return r

In [7]:
n_recipes = len(get_recipes(g_dat))
n_preparations_dat = len(get_preparations(g_dat))
n_preparations_nlg = len(get_preparations(g_nlg))

In [8]:
print('Recipes:', n_recipes)
print('Preparations nlg:', n_preparations_nlg)
print('Preparations dat:', n_preparations_dat)

Recipes: 1214
Preparations nlg: 4636
Preparations dat: 7052


In [9]:
mapped_recipes = set()
bipartite_graph = nx.Graph()

In [11]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 0/1214 - 0.00%
Mapped preparations: 0/7052 - 0.00%


In [12]:
# 1st iteration: recipes with same preparations and same components

In [13]:
# 1st part: map preparations
for n in get_recipes(g_dat):
    if n not in mapped_recipes:
        preps_dat = get_recip_preparations(g_dat, n)
        preps_nlg = get_recip_preparations(g_nlg, n)
        if len(preps_dat) == len(preps_nlg):
            mappings = {}
            for i in range(len(preps_dat)):
                prep_dat = preps_dat[i]
                comps_dat = get_prep_components(g_dat, prep_dat)
                mappings[prep_dat] = []
                for j in range(len(preps_nlg)):
                    prep_nlg = preps_nlg[j]
                    comps_nlg = get_prep_components(g_nlg, prep_nlg)
                    if comps_dat == comps_nlg:
                        mappings[prep_dat].append(prep_nlg)
            if all_one_value(mappings):
                for k in mappings:
                    v = mappings[k][0]
                    bipartite_graph.add_node(k, {'nodetype': 'dat'})
                    bipartite_graph.add_node(v, {'nodetype': 'nlg'})
                    bipartite_graph.add_edge(k, v)
                mapped_recipes.add(n)

In [14]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 2/1214 - 0.16%
Mapped preparations: 2/7052 - 0.03%


In [15]:
# 2nd part: identify other recipes that contain mapped preparations
mapped_preparations = get_nodes_by_type(bipartite_graph, 'nlg')
for n in get_recipes(g_dat):
    if n not in mapped_recipes:
        preps_nlg = get_recip_preparations(g_nlg, n)
        if set(preps_nlg).intersection(mapped_preparations):
            print('HIT')

In [16]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 2/1214 - 0.16%
Mapped preparations: 2/7052 - 0.03%


In [17]:
# 2nd iteration: recipes with same preparations and equivalent components

In [18]:
# 1st part: map preparations
for n in get_recipes(g_dat):
    if n not in mapped_recipes:
        preps_dat = get_recip_preparations(g_dat, n)
        preps_nlg = get_recip_preparations(g_nlg, n)
        if len(preps_dat) == len(preps_nlg):
            mappings = {}
            for i in range(len(preps_dat)):
                prep_dat = preps_dat[i]
                comps_dat = get_prep_components(g_dat, prep_dat)
                mappings[prep_dat] = []
                for j in range(len(preps_nlg)):
                    prep_nlg = preps_nlg[j]
                    comps_nlg = get_prep_components(g_nlg, prep_nlg)
                    if equivalent_components(comps_dat, comps_nlg):
                        mappings[prep_dat].append(prep_nlg)
            if all_one_value(mappings):
                for k in mappings:
                    v = mappings[k][0]
                    bipartite_graph.add_node(k, {'nodetype': 'dat'})
                    bipartite_graph.add_node(v, {'nodetype': 'nlg'})
                    bipartite_graph.add_edge(k, v)
                mapped_recipes.add(n)

In [19]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 3/1214 - 0.25%
Mapped preparations: 3/7052 - 0.04%


In [20]:
# 2nd part: identify other recipes that contain mapped preparations
mapped_preparations = get_nodes_by_type(bipartite_graph, 'nlg')
for n in get_recipes(g_dat):
    if n not in mapped_recipes:
        preps_nlg = get_recip_preparations(g_nlg, n)
        if set(preps_nlg).intersection(mapped_preparations):
            print('HIT')

In [21]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 3/1214 - 0.00%
Mapped preparations: 3/7052 - 0.00%


In [22]:
# 3rd iteration: recipes with same preparations, equivalent components, and not all one value

In [23]:
# 1st part: map preparations
for n in get_recipes(g_dat):
    if n not in mapped_recipes:
        preps_dat = get_recip_preparations(g_dat, n)
        preps_nlg = get_recip_preparations(g_nlg, n)
        if len(preps_dat) == len(preps_nlg):
            mappings = {}
            for i in range(len(preps_dat)):
                prep_dat = preps_dat[i]
                comps_dat = get_prep_components(g_dat, prep_dat)
                mappings[prep_dat] = []
                for j in range(len(preps_nlg)):
                    prep_nlg = preps_nlg[j]
                    comps_nlg = get_prep_components(g_nlg, prep_nlg)
                    if equivalent_components(comps_dat, comps_nlg):
                        mappings[prep_dat].append(prep_nlg)
            for k in mappings:
                if len(mappings[k]) == 1:
                    v = mappings[k][0]
                    bipartite_graph.add_node(k, {'nodetype': 'dat'})
                    bipartite_graph.add_node(v, {'nodetype': 'nlg'})
                    bipartite_graph.add_edge(k, v)

In [24]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 3/1214 - 0.25%
Mapped preparations: 71/7052 - 1.01%


In [25]:
mappings

{'163-1': [],
 '163-2': [],
 '163-3': [],
 '163-4': ['Elaboracion19871997-512'],
 '163-5': []}

In [25]:
# 2nd part: identify other recipes that contain mapped preparations
mapped_preparations = get_nodes_by_type(bipartite_graph, 'nlg')
for n in get_recipes(g_dat):
    if n not in mapped_recipes:
        preps_nlg = get_recip_preparations(g_nlg, n)
        if set(preps_nlg).intersection(mapped_preparations):
            print('HIT')

HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT
HIT


In [20]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 3/1214 - 0.00%
Mapped preparations: 3/7052 - 0.00%


In [22]:
for r in get_recipes(g_nlg):
    for p in get_recip_preparations(g_nlg, r):
        if p in bipartite_graph:
            print(r)

39
543
196


In [16]:
bipartite_graph.edges()

[('543-1', 'Elaboracion19982001-942'),
 ('39-1', 'Elaboracion19871997-111'),
 ('Elaboracion19871997-129', '196-1')]

In [18]:
bipartite_graph.has_edge('Elaboracion19982001-942', '543-1') 

True

In [26]:
get_prep_components(g_dat, '932-8')

{'naranja'}

In [27]:
get_prep_components(g_nlg, 'Elaboracion20032004-733')

{'naranja'}

In [28]:
get_prep_components(g_nlg, 'Elaboracion20032004-736')

{'naranja'}

In [29]:
g_nlg['Elaboracion20032004-736']

{'DULCE': {'edgetype': 'mundo', 'id': '31301'},
 'TOQUES': {'edgetype': 'se clasifica', 'id': '15801'},
 'sabor:naranja': {'edgetype': 'sabor', 'id': '15800'}}

In [30]:
g_nlg['Elaboracion20032004-733']

{'DULCE': {'edgetype': 'mundo', 'id': '30878'},
 'TOQUES': {'edgetype': 'se clasifica', 'id': '21616'},
 'sabor:naranja': {'edgetype': 'sabor', 'id': '21615'}}

In [31]:
g_nlg['932']

{'2003': {'edgetype': 'publicado en', 'id': '24013'},
 'AMBIENTE': {'edgetype': 'temperatura', 'id': '21917'},
 'Elaboracion20032004-307': {'edgetype': 'elaboracion', 'id': '21927'},
 'Elaboracion20032004-571': {'edgetype': 'elaboracion', 'id': '21923'},
 'Elaboracion20032004-661': {'edgetype': 'elaboracion', 'id': '21929'},
 'Elaboracion20032004-671': {'edgetype': 'elaboracion', 'id': '21925'},
 'Elaboracion20032004-715': {'edgetype': 'elaboracion', 'id': '21924'},
 'Elaboracion20032004-733': {'edgetype': 'elaboracion', 'id': '21919'},
 'Elaboracion20032004-736': {'edgetype': 'elaboracion', 'id': '21918'},
 'Elaboracion20032004-751': {'edgetype': 'elaboracion', 'id': '21928'},
 'Elaboracion20032004-766': {'edgetype': 'elaboracion', 'id': '21921'},
 'Estilo20032004-14': {'edgetype': 'estilo', 'id': '21926'},
 'Estilo20032004-9': {'edgetype': 'estilo', 'id': '21922'},
 'POSTRES': {'edgetype': 'se clasifica', 'id': '21920'}}

In [87]:
preps_dat_nlg_dict = defaultdict(list)
for n, data in g_dat.nodes(data=True):
    if data['nodetype'] == 'Elaboracion':
        comps = get_prep_components(g_dat, n)
        nlg_preps = get_nlg_preparations(n)
        for nlg_prep in nlg_preps:
            nlg_comps = get_prep_components(g_nlg, nlg_prep)
            nlg_comps = set(map(clean_producto_sabor, nlg_comps))
            if nlg_comps == comps:
                preps_dat_nlg_dict[n].append(nlg_prep)
preps_dat_nlg_dict = dict(preps_dat_nlg_dict)

NameError: name 'get_nlg_preparations' is not defined

In [10]:
len(preps_dat_nlg_dict)

470

In [11]:
preps_dat_nlg_dict

{'1-2': ['Elaboracion19871997-523'],
 '1-3': ['Elaboracion19871997-542'],
 '1000-6': ['Elaboracion20032004-1267'],
 '1002-1': ['Elaboracion20032004-1439'],
 '1003-5': ['Elaboracion20032004-1474'],
 '1004-1': ['Elaboracion20032004-1270'],
 '1006-1': ['Elaboracion20032004-518'],
 '1007-1': ['Elaboracion20032004-1409'],
 '1008-2': ['Elaboracion20032004-260'],
 '1008-4': ['Elaboracion20032004-1274',
  'Elaboracion20032004-1352',
  'Elaboracion20032004-344',
  'Elaboracion20032004-1271',
  'Elaboracion20032004-1272'],
 '1008-5': ['Elaboracion20032004-1274',
  'Elaboracion20032004-1352',
  'Elaboracion20032004-344',
  'Elaboracion20032004-1271',
  'Elaboracion20032004-1272'],
 '1008-6': ['Elaboracion20032004-1274',
  'Elaboracion20032004-1352',
  'Elaboracion20032004-344',
  'Elaboracion20032004-1271',
  'Elaboracion20032004-1272'],
 '1013-1': ['Elaboracion20032004-1415'],
 '1014-5': ['Elaboracion20032004-1499'],
 '1016-1': ['Elaboracion20032004-1236'],
 '1018-10': ['Elaboracion20032004-349'

In [12]:
ddd={'a':0,'b':0,'c':0, 'd':0, 'e':0, 'e1':0, 'f':0,     'w':0, 'x':0, 'y':0, 'z':0}


def equals_preparations(n1, n2):
    ddd['a']+=1
    r = False
    data1 = g_dat.node[n1]
    data2 = g_dat.node[n2]
    if data1['title'] == data2['title']:
        ddd['b']+=1
        comps1 = get_prep_components(g_dat, n1)
        comps2 = get_prep_components(g_dat, n2)
        if comps1 == comps2:
            r = True
            ddd['c']+=1
        elif data1['title'] == 'Otros':
            ddd['d']+=1
        else:
            ddd['e']+=1
            
            diff1=comps1.difference(comps2)
            diff2=comps2.difference(comps1)
            if len(diff1) == len(diff2) == 1:
                ddd['e1']+=1
            
#             nlg_preps1 = get_nlg_preparations(n1)
#             for nlg_prep1 in nlg_preps1:
#                 nlg_comps1 = get_prep_components(g_nlg, nlg_prep1)
#                 nlg_comps1 = set(map(clean_producto_sabor, nlg_comps1))
#                 if nlg_comps1 == comps1:
#                     ddd['w']+=1
#                 if nlg_comps1 == comps2:
#                     ddd['x']+=1
            
#             nlg_preps2 = get_nlg_preparations(n2)
#             for nlg_prep2 in nlg_preps2:
#                 nlg_comps2 = get_prep_components(g_nlg, nlg_prep2)
#                 nlg_comps2 = set(map(clean_producto_sabor, nlg_comps2))
#                 if nlg_comps2 == comps1:
#                     ddd['y']+=1
#                 if nlg_comps2 == comps2:
#                     ddd['z']+=1
                    
#             comprobar si la nlg_prep1 es igual a la nlg_prep2
            
#             inters=comps1.intersection(comps2)
#             print(1, comps1)
#             print(2, comps2)
#             print('INTERSECT:', inters)
#             print('|INTERSECT|:', len(inters))
#             diff=comps1.symmetric_difference(comps2)
#             print('DIFF:', diff)
#             print('|DIFF|:', len(diff))
#             print()
    else:
        ddd['f']+=1
    return r

In [11]:
ddd

{'a': 49723652,
 'b': 909082,
 'c': 29426,
 'd': 873454,
 'e': 6202,
 'f': 48814570,
 'w': 287,
 'x': 116,
 'y': 116,
 'z': 287}

In [6]:
ddd

{'a': 0,
 'b': 0,
 'c': 0,
 'd': 0,
 'e': 0,
 'f': 0,
 'w': 0,
 'x': 0,
 'y': 0,
 'z': 0}

In [7]:
for n, data in g_dat.nodes(data=True):
    if data['nodetype'] == 'Elaboracion':
        if data['title'] == 'Otros':
            print(n, data['title'])
            break

205-6 Otros


In [13]:
%%time

preps = set()
for n1, data1 in g_dat.nodes(data=True):
    if data1['nodetype'] == 'Elaboracion':
        for n2, data2 in g_dat.nodes(data=True):
            if data2['nodetype'] == 'Elaboracion':
                if n1 != n2:
                    if equals_preparations(n1, n2):
                        preps.add(tuple(sorted([n1, n2])))

CPU times: user 1min 55s, sys: 44 ms, total: 1min 55s
Wall time: 1min 55s


In [9]:
assert(ddd['a']==ddd['b']+ddd['f'])

In [10]:
assert(ddd['b']==ddd['c']+ddd['d']+ddd['e'])

In [11]:
ddd

{'a': 49723652,
 'b': 909082,
 'c': 29426,
 'd': 873454,
 'e': 6202,
 'f': 48814570,
 'w': 287,
 'x': 116,
 'y': 116,
 'z': 287}

In [99]:
ddd

{'a': 49723652,
 'b': 907134,
 'c': 29388,
 'd': 871580,
 'e': 6166,
 'f': 48816518,
 'w': 287,
 'x': 116,
 'y': 116,
 'z': 287}

In [99]:
ddd

{'a': 49723652,
 'b': 907134,
 'c': 29388,
 'd': 871580,
 'e': 6166,
 'f': 48816518,
 'w': 287,
 'x': 116,
 'y': 116,
 'z': 287}

In [99]:
ddd

{'a': 49723652,
 'b': 907134,
 'c': 29388,
 'd': 871580,
 'e': 6166,
 'f': 48816518,
 'w': 287,
 'x': 116,
 'y': 116,
 'z': 287}

In [99]:
ddd

{'a': 49723652,
 'b': 907134,
 'c': 29388,
 'd': 871580,
 'e': 6166,
 'f': 48816518,
 'w': 287,
 'x': 116,
 'y': 116,
 'z': 287}

In [9]:
len(preps)

11298

In [20]:
# es_test = [
#     (1,2),(1,3),(2,3),(3,4),
#     (5,6),(6,7),(7,8),(7,9),
#     (10,11),(11,12),(12,13),
# #     (1,13),
#     (6,12),(1, 14)
# ]

In [69]:
%%time

ls=[]
for e1, e2 in es:
    found = False
    first_set = None
    to_remove=[]
    for x in ls:
        if (e1 in x or e2 in x) and not found:
            x.add(e1)
            x.add(e2)
            found = True
            first_set = x
        if x != first_set and (e1 in x or e2 in x) and found:
            x.add(e1)
            x.add(e2)
            for y in x:
                first_set.add(y)
            to_remove.append(x)
    if not found:
        ls.append({e1, e2})
    for y in to_remove:
        ls.remove(y)

In [70]:
len(ls)

821

In [71]:
for x in ls:
    ts = set()
    ingrs = set()
    descs = set()
    for y in x:
        ts.add(graph.node[y]['title'])
        ingrs.add(graph.node[y]['ingrs'])
        descs.add(graph.node[y]['desc'])
    if len(ts) != 1:
        print('AAA')
    break
#     if len(ingrs) != 1:
#         print('BBB')
#     if len(descs) != 1:
#         print('CCC')

In [72]:
list(x)[:4]

['799-8', '1003-5', '999-6', '262-9']

In [73]:
for y in list(ls[0])[:4]:
    print(graph.node[y]['title'])
    print(graph.node[y]['ingrs'])
    print(graph.node[y]['desc'])

Otros
          sal Maldon#          cacao en un espolvoreador#          la piel rallada de una mandarina#          la piel rallada de una lima#          la piel rallada de una naranja

Otros
     10 g de vinagre balsámico tradicional #          introducido en pipetas

Otros
   120 hojas de albahaca fresca de 0,5 cm #          de longitud#   100 g de azúcar

Otros
        4 brotes de romero fresco
######


In [74]:
equals_preparations('799-8', '1003-5')

True

In [75]:
get_prep_components(graph, '799-8')

set()

In [77]:
get_prep_products(graph, '799-8')

set()

In [81]:
{k for k in graph['799-8'] if graph['799-8'][k]['edgetype'] in prep_prod_rels}

set()

In [82]:
for k in graph['799-8']:
    print(k)

In [83]:
graph.node['799-8']

{'desc': '',
 'ingrs': '          sal Maldon#          cacao en un espolvoreador#          la piel rallada de una mandarina#          la piel rallada de una lima#          la piel rallada de una naranja',
 'label': '799-8',
 'nodetype': 'Elaboracion',
 'title': 'Otros'}

In [78]:
get_prep_ingredients(graph, '799-8')

set()

In [79]:
get_prep_flavors(graph, '799-8')

set()

In [76]:
get_prep_components(graph, '1003-5')

set()

In [68]:
c=d=0
for n, data in g_nlg.nodes(data=True):
    if data['nodetype'] == 'Elaboracion':
        c+=1
for n, data in graph.nodes(data=True):
    if data['nodetype'] == 'Elaboracion':
        d+=1

In [69]:
c

4636

In [70]:
d

7052