In [1]:
import csv

import pandas as pd
import networkx as nx
import nltk

import api

In [2]:
g_nlg = nx.read_gexf('out/elbulli_nlg_02.gexf')

In [3]:
g_dat = nx.read_gexf('out/elbulli_dat_03.gexf')

In [4]:
all_ingredients_graph = nx.read_gexf('data/spanish_ingredients_lexicon_5.gexf')

In [5]:
len(all_ingredients_graph)

26743

In [6]:
nlg_ingredients = api.all_ingredients(g_nlg)
for ingr in nlg_ingredients:
    ingr = ingr.lower()
    if ingr not in all_ingredients_graph:
        all_ingredients_graph.add_node(ingr, {'count': 0, 'label': ingr})

In [7]:
len(all_ingredients_graph)

26871

In [8]:
all_techniques_graph = nx.read_gexf('data/spanish_techniques_lexicon_5.gexf')

In [9]:
len(all_techniques_graph)

4372

In [10]:
nlg_techniques = api.all_techniques(g_nlg)
for tech in nlg_techniques:
    tech = tech.lower()
    if tech not in all_techniques_graph:
        all_techniques_graph.add_node(tech, {'count': 0, 'label': tech})

In [11]:
len(all_techniques_graph)

4385

In [12]:
with open('data/es_lexicon.csv') as f:
    reader = csv.reader(
        f,
        delimiter=' ',
    )
    lexicon = []
    for row in reader:
        for i in range(1, len(row[1:]), 2):
            entry = {}
            entry['flexion'] = row[0].lower()
            entry['lemma'] = row[i].lower()
            entry['eagle'] = row[i+1].lower()
            lexicon.append(entry)

In [13]:
lexicon_df = pd.DataFrame(lexicon)

In [14]:
lexicon_df.head()

Unnamed: 0,eagle,flexion,lemma
0,ncfs000,a,a
1,sps00,a,a
2,vmip3s0,aba,abar
3,vmm02s0,aba,abar
4,ncfs000,ababa,ababa


In [15]:
len(lexicon_df)

668825

In [16]:
verbs_df = lexicon_df[lexicon_df.eagle == 'vmn0000']

In [17]:
verbs_df.head()

Unnamed: 0,eagle,flexion,lemma
70,vmn0000,abajar,abajar
150,vmn0000,abalanzar,abalanzar
205,vmn0000,abalaustrar,abalaustrar
276,vmn0000,abalear,abalear
355,vmn0000,abalizar,abalizar


In [18]:
len(verbs_df)

7651

In [19]:
infinitive_verbs = verbs_df.flexion.unique()

In [20]:
inf_fin = set()
for n, data in api.nodes_by_type(g_dat, 'Acabacion', data=True):
    desc = data['desc']
    desc = desc.lower()
    tokens = nltk.word_tokenize(desc)
    techs = set(tokens).intersection(infinitive_verbs)
    for tech in techs:
        if tech not in all_techniques_graph:
            inf_fin.add(tech)

In [21]:
selected_finish = [
    'aderezar', 'agitar', 'alisar', 'aliñar', 'amoldar', 'apelmazar', 'aplanar', 'aplastar',
    'aposentar', 'apretar', 'atemperar', 'atravesar', 'azucarar', 'batir', 'bañar', 'clavar',
    'colar', 'congelar', 'cristalizar', 'cuajar', 'decorar', 'derretir', 'descabezar', 'descongelar',
    'desgrasar', 'deshuesar', 'desmoldar', 'embadurnar', 'embeber', 'enharinar', 'enrasar', 'enrollar',
    'ensartar', 'entibiar', 'escamar', 'escarchar', 'escudillar', 'escurrir', 'espolvorear', 'estabilizar',
    'estirar', 'estrujar', 'filetear', 'fundir', 'hidratar', 'hinchar', 'impregnar', 'incrustar',
    'insertar', 'intercalar', 'interrumpir', 'inyectar', 'ligar', 'mezclar', 'moler', 'montar',
    'pasar', 'pelar', 'pellizcar', 'perforar', 'picar', 'pinchar', 'potenciar', 'pulir',
    'pulverizar', 'puntear', 'rallar', 'rascar', 'rasgar', 'recortar', 'rectificar', 'reposar',
    'reseguir', 'reservar', 'rociar', 'salpimentar', 'sazonar', 'sumergir', 'templar', 'tostar',
    'triangular', 'triturar', 'trocear', 'turbinar', 'untar',
]

In [22]:
inf_prep = set()
for n, data in api.nodes_by_type(g_dat, 'Elaboracion', data=True):
    desc = data['desc']
    desc = desc.lower()
    tokens = nltk.word_tokenize(desc)
    techs = set(tokens).intersection(infinitive_verbs)
    for tech in techs:
        if tech not in all_techniques_graph and tech not in inf_fin:
            inf_prep.add(tech)

In [23]:
selected_preparation = [
    'absorber', 'adherir', 'aguar', 'agujerear', 'ahogar', 'amargar', 'amasar', 'bolear',
    'cascar', 'cepillar', 'chamuscar', 'chascar', 'concentrar', 'cuadrar', 'desalar', 'desangrar',
    'descarnar', 'descascarar', 'descascarillar', 'descorazonar', 'desgranar', 'deshacer',
    'deshilachar', 'deshojar', 'deshumedecer', 'desmenuzar', 'desmigar', 'despepitar',
    'destripar', 'desvenar', 'diluir', 'disolver', 'empapar', 'empastar', 'endulzar', 'equilibrar',
    'escabechar', 'evaporar', 'exprimir', 'filtrar', 'friccionar', 'granizar', 'homogeneizar', 'humedecer',
    'igualar', 'inflar', 'laminar', 'machacar', 'moldear', 'peinar', 'precipitar', 'prensar',
    'raspar', 'rebanar', 'refrescar', 'reintegrar', 'repelar', 'satinar', 'solidificar', 'sudar',
    'tamizar', 'temperar',
]

In [24]:
for tech in selected_finish:
    all_techniques_graph.add_node(tech, {'count': 0, 'label': tech})

In [25]:
for tech in selected_preparation:
    all_techniques_graph.add_node(tech, {'count': 0, 'label': tech})

In [26]:
len(all_techniques_graph)

4532

In [27]:
nx.write_gexf(all_ingredients_graph, 'out/spanish_ingredients_lexicon_04.gexf')

In [28]:
nx.write_gexf(all_techniques_graph, 'out/spanish_techniques_lexicon_04.gexf')