In [1]:
import csv
import itertools
import json
import os
import pickle
import time
from collections import defaultdict
from itertools import product
from itertools import permutations

import networkx as nx
import nltk
import requests
from nltk.corpus import cess_esp
from nltk.stem.snowball import SnowballStemmer
from nltk.util import ngrams
from pymongo import MongoClient

# Lexicon

In [2]:
client = MongoClient()
# client.drop_database('lexicon')
db = client.lexicon

In [3]:
# with open('data/es_lexicon.csv') as f:
#     reader = csv.reader(
#         f,
#         delimiter=' ',
#     )
#     docs = []
#     count = 0
#     for row in reader:
#         for i in range(1, len(row[1:]), 2):
#             entry = {}
#             entry['flexion'] = row[0].lower()
#             entry['lemma'] = row[i].lower()
#             entry['eagle'] = row[i+1].lower()
#             docs.append(entry)
#             count += 1
#         if count % 1000 == 0:
#             db.es_lexicon.insert_many(docs)
#             docs = []
#     db.es_lexicon.insert_many(docs)
#     docs = []

In [4]:
db.es_lexicon.count()

668825

# POS tagger

In [5]:
# tagged_sp_sents = cess_esp.tagged_sents()

In [6]:
# size = int(len(tagged_sp_sents) * 0.1)
# train_sp_sents = tagged_sp_sents[size:]
# test_sp_sents = tagged_sp_sents[:size]

In [7]:
# tagged_sp_words = cess_esp.tagged_words()

In [8]:
# tags = [tag for (word, tag) in tagged_sp_words]
# most_freq_tags = nltk.FreqDist(tags)
# most_freq_tags.most_common()[:10]

# [('sps00', 25272),
#  ('ncms000', 11428),
#  ('Fc', 11420),
#  ('ncfs000', 11008),
#  ('da0fs0', 6838),
#  ('da0ms0', 6012),
#  ('rg', 5937),
#  ('Fp', 5866),
#  ('cc', 5854),
#  ('ncmp000', 5711)]

In [9]:
# default_tag = 'ncms000'

In [10]:
# t0 = nltk.DefaultTagger(None)
# t1 = nltk.UnigramTagger(train_sp_sents, backoff=t0)
# t2 = nltk.BigramTagger(train_sp_sents, backoff=t1)
# sp_tagger = nltk.TrigramTagger(train_sp_sents, backoff=t2)

In [11]:
# sp_tagger.evaluate(test_sp_sents)

# 0.8815674255691769

In [12]:
# with open('data/sp_tagger.pickle', 'wb') as f:
#     pickle.dump(sp_tagger, f)

In [13]:
with open('data/sp_tagger.pickle', 'rb') as f:
    sp_tagger = pickle.load(f)

# Techniques

In [14]:
def is_spanish_techniques_file(filename):
    return filename.startswith('es_') and filename.endswith('_techniques.txt')

In [15]:
def add_node(g, n):
    if not n in g:
        g.add_node(n, count=0)

In [16]:
def add_edge(g, n1, n2):
    if n1 != n2 and not nx.has_path(g, n1, n2):
        g.add_edge(n1, n2)

In [17]:
graph_syn = nx.Graph()
techniques_root = 'data/techniques/'
for e in os.listdir(techniques_root):
    file_path = techniques_root + e
    if os.path.isfile(file_path):
        if is_spanish_techniques_file(e):
            with open(file_path) as f:
                for line in f:
                    syn_set = set()
                    techs1 = line.strip()
                    for techs2 in techs1.split(' / '):
                        for techs3 in techs2.split(' o '):
                            for tech in techs3.split('/'):
                                syn_set.add(tech)
                                add_node(graph_syn, tech)
                    syn_set = list(syn_set)
                    t1 = syn_set[0]
                    for t2 in syn_set[1:]:
                        add_edge(graph_syn, t1, t2)

In [18]:
len(graph_syn)

339

In [19]:
graph_syn.number_of_edges()

128

In [20]:
nx.number_connected_components(graph_syn)

211

In [21]:
nx.write_gexf(graph_syn, 'data/spanish_techniques_lexicon_1.gexf')

In [22]:
graph_syn = nx.read_gexf('data/spanish_techniques_lexicon_1.gexf')

# POS tagging

In [23]:
tag_mapping = {
    'a': 'adj',
    'r': 'adv',
    'd': 'det',
    'n': 'noun',
    'v': 'verb',
    'p': 'pron',
    'c': 'conj',
    'i': 'interj',
    's': 'prep',
    'f': 'punt',
    'z': 'num',
    'w': 'date-time',
}

def map_tag(eagle):
    return tag_mapping[eagle[0]]

def get_category(entry):
    return map_tag(entry['eagle'])

def has_category(category, entries):
    return category in map(get_category, entries)

def is_number(x):
    return x in ['dos', 'tres', 'cuatro', 'cinco', 'seis', 'siete', 'ocho', 'nueve']

def technique_tagger(x):
    result = []
    tokens = nltk.word_tokenize(x)
    tags = sp_tagger.tag(tokens)
    for token, tag in tags:
        if is_number(token):
            tag = 'num'
        elif tag:
            tag = map_tag(tag.lower())
        else:
            res = list(db.es_lexicon.find({'flexion': token}))
            if res:
                if has_category('adj', res):
                    tag = 'adj'
                elif has_category('noun', res):
                    tag = 'noun'
                elif has_category('verb', res):
                    tag = 'verb'
                elif has_category('det', res):
                    tag = 'det'
                elif has_category('pron', res):
                    tag = 'pron'
                elif has_category('prep', res):
                    tag = 'prep'
                elif has_category('num', res):
                    tag = 'num'
                else:
                    tag = get_category(res[0])
            else:
                tag = 'noun'
        result.append((token, tag))
    if len(result) == 1:
        tag = result[0][1]
        if tag not in ['noun', 'verb']:
            tag = 'noun'
            res = list(db.es_lexicon.find({'flexion': x}))
            if res:
                if has_category('noun', res):
                    tag = 'noun'
                elif has_category('verb', res):
                    tag = 'verb'
        result = [(x, tag)]
    return result

In [24]:
# %%time

# with open('data/spanish_techniques_postags.csv', 'w') as f:
#     writer = csv.writer(
#         f,
#         delimiter=',',
#         quotechar='"',
#         quoting=csv.QUOTE_MINIMAL
#     )
#     for tech in graph_syn.nodes_iter():
#         pos_tag = ' '.join(tag for token, tag in technique_tagger(tech))
#         row = [tech, pos_tag]
#         writer.writerow(row)
        
# CPU times: user 220 ms, sys: 4 ms, total: 224 ms
# Wall time: 1min 22s

In [25]:
postags = {}
with open('data/spanish_techniques_postags.csv') as f:
    reader = csv.reader(
        f,
        delimiter=',',
    )
    for row in reader:
        postags[row[0]] = row[1]

In [26]:
def get_postags(x):
    try:
        tags = postags[x]
    except:
        postags[x] = ' '.join(tag for token, tag in technique_tagger(x))
        tags = postags[x]
    return list(zip(nltk.word_tokenize(x),nltk.word_tokenize(tags)))

# Example
get_postags('cocción al vacío')

[('cocción', 'noun'), ('al', 'prep'), ('vacío', 'noun')]

# apicultur synonyms

In [27]:
nouns_and_verbs = set()
for tech in graph_syn.nodes_iter():
    for token, tag in get_postags(tech):
        if tag in ['noun', 'verb']:
            nouns_and_verbs.add(token)

In [28]:
len(nouns_and_verbs)

321

In [29]:
# with open('data/apicultur_techniques_synonyms.csv', 'w') as f:
#     writer = csv.writer(
#         f,
#         delimiter=',',
#         quotechar='"',
#         quoting=csv.QUOTE_MINIMAL
#     )
#     base_url = 'https://store.apicultur.com/api/sinonimosporpalabra/1.0.0/'
#     headers = {'Authorization': 'Bearer yUDGVYOcvFbr3hBCPW9TulJDvd8a'}
#     count = 0
#     for x in nouns_and_verbs:
#         if x in graph_syn:
#             url = base_url + x
#             response = requests.get(url, headers=headers)
#             if response.text:
#                 js = response.json()
#                 row = [x]
#                 for d in js:
#                     row.append(d['valor'])
#                 writer.writerow(row)
#             count += 1
#             if count % 20 == 0:
#                 time.sleep(65)

In [30]:
apicultur_syns = {}
with open('data/apicultur_techniques_synonyms.csv') as f:
    reader = csv.reader(
        f,
        delimiter=',',
    )
    for row in reader:
        apicultur_syns[row[0]] = row[1:]

In [31]:
len(apicultur_syns)

69

In [32]:
# Three options:

In [33]:
# 1) Some ingredients form K-n complete graphs: add the ingredients and the relationships

In [34]:
apicultur_graph = nx.Graph()
for k in apicultur_syns:
    syns = apicultur_syns[k]
    for syn in syns:
        apicultur_graph.add_edge(k, syn)

In [35]:
nx.number_connected_components(apicultur_graph)

35

In [36]:
def is_kn_complete(g):
    complete = True
    for n1 in g:
        for n2 in g:
            if n1 != n2 and not g.has_edge(n1, n2):
                complete = False
                break
        if not complete:
            break
    return complete

In [37]:
kn_complete_graphs = []
for subg in nx.connected_component_subgraphs(apicultur_graph):
    if is_kn_complete(subg):
        kn_complete_graphs.append(subg)

In [38]:
len(kn_complete_graphs)

4

In [39]:
kn_syns = []
for g in kn_complete_graphs:
    for n in g:
        for m in g:
            if n != m and (m, n) not in kn_syns:
                kn_syns.append((n, m))

In [40]:
len(kn_syns)

4

In [41]:
for n, m in kn_syns:
    add_node(graph_syn, n)
    add_node(graph_syn, m)
    add_edge(graph_syn, n, m)

In [42]:
# 2) Some ingredients co-occur always: add the relationships only

In [43]:
apicultur_cooccurences = nx.Graph()
for k in apicultur_syns:
    syns = list(set(apicultur_syns[k]))
    syn_set = [k] + syns
    for s in syn_set:
        if s not in apicultur_cooccurences:
            apicultur_cooccurences.add_node(s, {'count': 1})
        else:
            apicultur_cooccurences.node[s]['count'] += 1
    for i in range(len(syn_set) - 1):
        s1 = syn_set[i]
        for j in range(i + 1, len(syn_set)):
            s2 = syn_set[j]
            if s2 not in apicultur_cooccurences[s1]:
                apicultur_cooccurences.add_edge(s1, s2, {'count': 1})
            else:
                apicultur_cooccurences[s1][s2]['count'] += 1

In [44]:
cooccurence_syns = []
for n in apicultur_cooccurences:
    cn = apicultur_cooccurences.node[n]['count']
    for m in apicultur_cooccurences[n]:
        cm = apicultur_cooccurences.node[m]['count']
        ce = apicultur_cooccurences[n][m]['count']
        if cn == cm == ce:
            if (m, n) not in cooccurence_syns:
                cooccurence_syns.append((n, m))

In [45]:
len(cooccurence_syns)

7917

In [46]:
for n, m in cooccurence_syns:
    if n in graph_syn and m in graph_syn:
        add_edge(graph_syn, n, m)

In [47]:
# 3) All the ingredients in a row are in the graph: then add the relationships only (for less than 4 ingredients)

In [48]:
row_syns = []
for k in apicultur_syns:
    syns = list(set(apicultur_syns[k]))
    syn_set = [k] + syns
    if len(syn_set) < 4: # I assume too much noise for 4 and greater
        if all(map(lambda x: x in graph_syn, syn_set)):
            for syn in syns:
                row_syns.append((k, syn))

In [49]:
len(row_syns)

8

In [50]:
for n, m in row_syns:
    add_edge(graph_syn, n, m)

In [51]:
len(graph_syn)

343

In [52]:
graph_syn.number_of_edges()

141

In [53]:
nx.number_connected_components(graph_syn)

202

In [54]:
nx.write_gexf(graph_syn, 'data/spanish_techniques_lexicon_2.gexf')

In [55]:
graph_syn = nx.read_gexf('data/spanish_techniques_lexicon_2.gexf')

# Infinitive, gerund, and participle

In [56]:
stemmer = SnowballStemmer('spanish')

In [57]:
def is_infinitivable_word(word_tag):
    tag = word_tag[1]
    return tag in ['noun', 'verb']

def is_infinitivable_technique(technique):
    return any(map(is_infinitivable_word, get_postags(technique)))

def infinitive_noun(word):
    inf = word
    stem = stemmer.stem(word)
    r1 = db.es_lexicon.find_one({'lemma': {'$regex': '^' + stem}, 'eagle': 'vmn0000'})
    if r1:
        inf = r1['flexion']
    return inf

def infinitive_verb(word):
    if word.endswith('ar') or word.endswith('er') or word.endswith('ir'):
        return word
    
    inf = word
    r1 = db.es_lexicon.find_one({'flexion': word, 'eagle': {'$regex': '^v......$'}})
    if r1:
        inf = r1['lemma']
    return inf

def infinitive_word(word_tag):
    word = word_tag[0]
    tag = word_tag[1]
    inf = word
    if word.isalpha() and is_infinitivable_word(word_tag):
        if tag == 'noun':
            inf = infinitive_noun(word)
        elif tag == 'verb':
            inf = infinitive_verb(word)
    return inf

def infinitive_technique(technique):
    infs = map(infinitive_word, get_postags(technique))
    return ' '.join(infs)

# Example
infinitive_technique('guiso en olla')

'guisar en olla'

In [58]:
def is_gerundable_word(word_tag):
    tag = word_tag[1]
    return tag in ['noun', 'verb']

def is_gerundable_technique(technique):
    return any(map(is_gerundable_word, get_postags(technique)))

def gerund_noun(word):
    ger = word
    stem = stemmer.stem(word)
    r1 = db.es_lexicon.find_one({'lemma': {'$regex': '^' + stem}, 'eagle': 'vmg0000'})
    if r1:
        ger = r1['flexion']
    return ger

def gerund_verb(word):
    if word.endswith('iendo') or word.endswith('yendo'): # -ando is ambiguous
        return word
    
    ger = word
    r1 = db.es_lexicon.find_one({'flexion': word, 'eagle': {'$regex': '^v......$'}})
    if r1:
        lemma = r1['lemma']
        eagle = r1['eagle'][:2] + 'g0000'
        r2 = db.es_lexicon.find_one({'lemma': lemma, 'eagle': eagle})
        if r2:
            ger = r2['flexion']
    return ger

def gerund_word(word_tag):
    word = word_tag[0]
    tag = word_tag[1]
    ger = word
    if word.isalpha() and is_gerundable_word(word_tag):
        if tag == 'noun':
            ger = gerund_noun(word)
        elif tag == 'verb':
            ger = gerund_verb(word)
    return ger

def gerund_technique(technique):
    gers = map(gerund_word, get_postags(technique))
    return ' '.join(gers)

# Example
gerund_technique('guisar en olla')

'guisando en olla'

In [59]:
def is_participlable_word(word_tag):
    tag = word_tag[1]
    return tag in ['noun', 'verb']

def is_participlable_technique(technique):
    return any(map(is_participlable_word, get_postags(technique)))

def participle_noun(word):
    par = word
    stem = stemmer.stem(word)
    r1 = db.es_lexicon.find_one({'lemma': {'$regex': '^' + stem}, 'eagle': 'vmp00sm'})
    if r1:
        par = r1['flexion']
    return par

def participle_verb(word):
    # -ado and -ido are ambiguous
    
    par = word
    r1 = db.es_lexicon.find_one({'flexion': word, 'eagle': {'$regex': '^v......$'}})
    if r1:
        lemma = r1['lemma']
        r2 = db.es_lexicon.find_one({'lemma': lemma, 'eagle': 'vmp00sm'})
        if r2:
            par = r2['flexion']
    return par

def participle_word(word_tag):
    word = word_tag[0]
    tag = word_tag[1]
    par = word
    if word.isalpha() and is_participlable_word(word_tag):
        if tag == 'noun':
            par = participle_noun(word)
        elif tag == 'verb':
            par = participle_verb(word)
    return par

def participle_technique(technique):
    pars = map(participle_word, get_postags(technique))
    return ' '.join(pars)

# Example
participle_technique('guisar en olla')

'guisado en olla'

In [60]:
# %%time

# for tech in graph_syn.nodes():
#     if is_infinitivable_technique(tech):
#         inf = infinitive_technique(tech)
#         add_node(graph_syn, inf)
#         add_edge(graph_syn, tech, inf)
#     if is_gerundable_technique(tech):
#         ger = gerund_technique(tech)
#         add_node(graph_syn, ger)
#         add_edge(graph_syn, tech, ger)
#     if is_participlable_technique(tech):
#         par = participle_technique(tech)
#         add_node(graph_syn, par)
#         add_edge(graph_syn, tech, par)
            

# CPU times: user 2.61 s, sys: 112 ms, total: 2.72 s
# Wall time: 3min 59s

CPU times: user 2.51 s, sys: 76 ms, total: 2.58 s
Wall time: 4min 2s


In [61]:
# len(graph_syn)

# 701

701

In [62]:
# graph_syn.number_of_edges()

# 512

512

In [63]:
# nx.number_connected_components(graph_syn)

# 189

189

In [64]:
# nx.write_gexf(graph_syn, 'data/spanish_techniques_lexicon_3.gexf')

In [65]:
graph_syn = nx.read_gexf('data/spanish_techniques_lexicon_3.gexf')

# Normalization

In [66]:
# Numbers
def numbers(x):
    return x.replace(' 1 ', ' uno ') \
            .replace(' 2 ', ' dos ') \
            .replace(' 3 ', ' tres ') \
            .replace(' 4 ', ' cuatro ') \
            .replace(' 5 ', ' cinco ') \
            .replace(' 6 ', ' seis ') \
            .replace(' 7 ', ' siete ') \
            .replace(' 8 ', ' ocho ') \
            .replace(' 9 ', ' nueve ')

# Accent marks on vowels - {'á', 'ã', 'ç', 'è', 'é', 'ê', 'í', 'ñ', 'ò', 'ó', 'ú', 'ü', 'ō'}
def accent_marks(x):
    return x.replace('á', 'a') \
            .replace('ã', 'a') \
            .replace('è', 'e') \
            .replace('é', 'e') \
            .replace('ê', 'e') \
            .replace('í', 'i') \
            .replace('ò', 'o') \
            .replace('ó', 'o') \
            .replace('ō', 'o') \
            .replace('ú', 'u') \
            .replace('ü', 'u')

# Non-ascii consonants - {'á', 'ã', 'ç', 'è', 'é', 'ê', 'í', 'ñ', 'ò', 'ó', 'ú', 'ü', 'ō'}
def nonascii_consonants(x):
    return x.replace('ç', 'c') \
            .replace('ñ', 'n')
    
# Dashes (-)
def dashes1(x):
    return x.replace('-', ' ')

def dashes2(x):
    return x.replace('-', '')

# POS tags
# ADJETIVOS .... A ADJ ...... X
# ADVERBIOS .... R ADV
# DETERMINANTES  D DET
# NOMBRES ...... N NOUN ..... X
# VERBOS ....... V VERB ..... X
# PRONOMBRES ... P PRON
# CONJUNCIONES . C CONJ
# INTERJECCIONES I INTERJ
# PREPOSICIONES  S PREP
# PUNTUACIÓN ... F PUNTUATION
# NUMERALES .... Z NUM ...... X
# FECHAS Y HORAS W DATE-TIME
def pos_tags(x):
    tags = get_postags(x)
    filtered = [token
                for token, tag in tags
                if tag in ['adj', 'noun', 'verb', 'num']
               ]
    return ' '.join(filtered)

def itself(x):
    return x

funcs = [itself, pos_tags, numbers, accent_marks, nonascii_consonants, dashes1, dashes2]
combinations = []
for i in range(1, len(funcs) + 1):
    combinations.append(list(itertools.combinations(funcs, i)))
combinations = [c for comb in combinations for c in comb]

# def normalize(technique): # original time consuming version
#     result = set()
#     for c in combinations:
#         x = technique
#         for f in c:
#             x = f(x)
#         result.add(x)
#     return result

def normalize(technique): # dynamic programming version
    result = set()
    for c in combinations:
        x = technique
        for f in c:
            if not x in d[f.__name__]:
                d[f.__name__][x] = f(x)
            x = d[f.__name__][x]
        result.add(x)
    return result

In [67]:
len([list(map(lambda x: x.__name__, c)) for c in combinations])

127

In [68]:
# d = defaultdict(dict)

# or

# with open('data/spanish_techniques_normalization.pickle', 'rb') as f:
#     d = pickle.load(f)

In [69]:
# %%time

# for tech in graph_syn.nodes():
#     if len(nltk.word_tokenize(tech)) < 4:
#         norms = normalize(tech)
#         for norm in norms:
#             add_node(graph_syn, norm)
#             add_edge(graph_syn, tech, norm)

# d = dict(d)

# CPU times: user 920 ms, sys: 28 ms, total: 948 ms
# Wall time: 1min 25s

CPU times: user 184 ms, sys: 0 ns, total: 184 ms
Wall time: 186 ms


In [70]:
# with open('data/spanish_techniques_normalization.pickle', 'wb') as f:
#     pickle.dump(d, f)

In [71]:
# len(graph_syn)

# 964

964

In [72]:
# graph_syn.number_of_edges()

# 783

783

In [73]:
# nx.number_connected_components(graph_syn)

# 181

181

In [74]:
# nx.write_gexf(graph_syn, 'data/spanish_techniques_lexicon_4.gexf')

In [75]:
graph_syn = nx.read_gexf('data/spanish_techniques_lexicon_4.gexf')

# Combinations

In [76]:
def my_ngrams(technique):
    ngrms = []
    tokens = nltk.word_tokenize(technique)
    for i in range(1, len(tokens) + 1):
        ngrms.extend(ngrams(tokens, i))
    return list(map(lambda x: ' '.join(x), ngrms))

In [77]:
lengths = defaultdict(int)
for tech in graph_syn.nodes_iter():
    lengths[len(nltk.word_tokenize(tech))] += 1
lengths = dict(lengths)

In [78]:
lengths

{1: 502, 2: 241, 3: 179, 4: 34, 5: 8}

In [79]:
def ngram_combinations(technique):
    combs = []
    ngram_list = my_ngrams(technique)
    for i in range(1, len(ngram_list) + 1):
        combs.extend(permutations(ngram_list, i))
    combs = [list(c) for c in combs if ' '.join(c) == technique]
    return combs

# Example
ngram_combinations('olla a presión')

[['olla a presión'],
 ['olla', 'a presión'],
 ['olla a', 'presión'],
 ['olla', 'a', 'presión']]

In [80]:
def comb_syns(expr, syn_dict):
    res = set()
    combs = ngram_combinations(expr)
    for ngrms in combs:
        syn_list = [syn_dict[ngrm] for ngrm in ngrms]
        syn_comb = list(product(*syn_list))
        for sc in syn_comb:
            res.add(' '.join(sc))
    return list(res)

# Example
expr= 'olla a presión'
syn_dict = {
    'olla': ['olla', 'cacerola'],
    'a': ['a'],
    'presión': ['presión'],
    'olla a': ['olla a'],
    'a presión': ['presionada'],
    'olla a presión': ['olla a presión'],
}
comb_syns(expr, syn_dict)

['olla presionada',
 'cacerola presionada',
 'olla a presión',
 'cacerola a presión']

In [81]:
def create_syn_dict(ngrms):
    d = {}
    for ngrm in ngrms:
        d[ngrm] = set([ngrm])
    return d

create_syn_dict(my_ngrams('olla a presión'))

{'a': {'a'},
 'a presión': {'a presión'},
 'olla': {'olla'},
 'olla a': {'olla a'},
 'olla a presión': {'olla a presión'},
 'presión': {'presión'}}

In [82]:
def max_size_synset(synset):
    return len(nltk.word_tokenize(sorted(synset, key=lambda x: len(nltk.word_tokenize(x)), reverse=True)[0]))

In [83]:
%%time

for syns1 in list(nx.connected_components(graph_syn)):
    max_size = max_size_synset(syns1)
    if max_size < 4:
        for tech in syns1:
            ngrms = my_ngrams(tech)
            syn_dict = create_syn_dict(ngrms)
            for ngrm in ngrms:
                if ngrm in graph_syn and ngrm not in syns1:
                    syns2 = nx.node_connected_component(graph_syn, ngrm)
                    syn_dict[ngrm] = syn_dict[ngrm].union(syns2)
            syn_combs = comb_syns(tech, syn_dict)
            for syn_tech in syn_combs:
                add_node(graph_syn, syn_tech)
                add_edge(graph_syn, tech, syn_tech)

# CPU times: user 460 ms, sys: 0 ns, total: 460 ms
# Wall time: 456 ms

CPU times: user 460 ms, sys: 0 ns, total: 460 ms
Wall time: 456 ms


In [84]:
len(graph_syn)

1351

In [85]:
graph_syn.number_of_edges()

1170

In [86]:
nx.number_connected_components(graph_syn)

181

In [87]:
nx.write_gexf(graph_syn, 'data/spanish_techniques_lexicon_5.gexf')