In [1]:
import os
import string
from collections import Counter
from collections import defaultdict
from itertools import product

import networkx as nx
import nltk
from nltk.util import ngrams

In [2]:
ingredients_graph = nx.read_gexf('data/spanish_ingredients_lexicon_5.gexf')

In [3]:
g_nlg = nx.read_gexf('out/elbulli_nlg.gexf')

In [4]:
g_dat = nx.read_gexf('out/elbulli_dat.gexf')

In [5]:
prep_prod_rels = ['bañado', 'alcohol', 'chocolate', 'lacteo', 'nuevaPasta', 'producto', 'relleno']
prep_ingr_rels = ['composicion']
prep_flav_rels = ['sabor']
prep_tech_rels = ['tecnica']

def get_prep_products(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_prod_rels}

def get_prep_ingredients(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_ingr_rels}

def get_prep_flavors(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_flav_rels}

def get_prep_components(g, prep):
    prods = get_prep_products(g, prep)
    prods = set(map(lambda x: x.replace('Producto:', ''), prods))
    ingrs = get_prep_ingredients(g, prep)
    flavs = get_prep_flavors(g, prep)
    flavs = set(map(lambda x: x.replace('sabor:', ''), flavs))
    return prods.union(ingrs).union(flavs)

def get_prep_techniques(g, prep):
    return {k for k in g[prep] if g[prep][k]['edgetype'] in prep_tech_rels}

def get_recip_preparations(g, recip):
    return [k for k in g[recip] if g[recip][k]['edgetype'] == 'elaboracion']

def get_recip_products(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_products(g, prep)}

def get_recip_ingredients(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_ingredients(g, prep)}

def get_recip_flavors(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_flavors(g, prep)}

def get_recip_components(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_components(g, prep)}

def get_recip_techniques(g, recip):
    preps = get_recip_preparations(g, recip)
    return {k for prep in preps for k in get_prep_techniques(g, prep)}

def get_nodes_by_type(g, typ):
    return [n for n, data in g.nodes_iter(data=True) if data['nodetype'] == typ]

def get_recipes(g):
    return get_nodes_by_type(g, 'Receta')

def get_preparations(g):
    return get_nodes_by_type(g, 'Elaboracion')

In [6]:
def my_node_connected_component(g, n):
    if n in ingredients_graph:
        r = nx.node_connected_component(ingredients_graph, n)
    else:
        r = {n}
    return r

def equivalent_components(comps_dat, comps_nlg):
    if comps_dat == comps_nlg:
        r = True
    else:
        r = False
        if len(comps_dat) == len(comps_nlg):
            cartesian_product = product(
                *[my_node_connected_component(ingredients_graph, c) for c in comps_dat]
            )
            cartesian_product = map(set, cartesian_product)
            for comps in cartesian_product:
                if comps == comps_nlg:
                    r = True
                    break
    return r

def reduce(s):
    r = s
    for e1 in r.copy():
        for e2 in r.copy():
            if e1 in e2 and e1 != e2:
                r.remove(e1)
                break
    return r

In [7]:
n_recipes = len(get_recipes(g_dat))
n_preparations_dat = len(get_preparations(g_dat))
n_preparations_nlg = len(get_preparations(g_nlg))

In [8]:
print('Recipes:', n_recipes)
print('Preparations nlg:', n_preparations_nlg)
print('Preparations dat:', n_preparations_dat)

Recipes: 1214
Preparations nlg: 4636
Preparations dat: 7052


In [9]:
mapped_recipes = set()
bipartite_graph = nx.Graph()

In [10]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 0/1214 - 0.00%
Mapped preparations: 0/7052 - 0.00%


In [11]:
for n in get_recipes(g_dat):
    preps_dat = get_recip_preparations(g_dat, n)
    preps_nlg = get_recip_preparations(g_nlg, n)
    mappings = {}
    for i in range(len(preps_dat)):
        prep_dat = preps_dat[i]
        comps_dat = get_prep_components(g_dat, prep_dat)
        mappings[prep_dat] = []
        for j in range(len(preps_nlg)):
            prep_nlg = preps_nlg[j]
            comps_nlg = get_prep_components(g_nlg, prep_nlg)
            if equivalent_components(reduce(comps_dat), reduce(comps_nlg)):
                mappings[prep_dat].append(prep_nlg)
    for k in mappings:
        if len(mappings[k]) == 1:
            v = mappings[k][0]
            bipartite_graph.add_node(k, {'nodetype': 'dat'})
            bipartite_graph.add_node(v, {'nodetype': 'nlg'})
            bipartite_graph.add_edge(k, v)
        mapped_recipes.add(n)

In [12]:
n1 = len(mapped_recipes)
n2 = n_recipes
print('Mapped recipes: %d/%d - %.2f%%' % (n1, n2, n1 / n2 * 100))
m1 = len(get_nodes_by_type(bipartite_graph, 'dat'))
m2 = len(get_preparations(g_dat))
print('Mapped preparations: %d/%d - %.2f%%' % (m1, m2, m1 / m2 * 100))

Mapped recipes: 1214/1214 - 100.00%
Mapped preparations: 694/7052 - 9.84%
