# Diccionario escolar

A full entry consists of the following fields in the following order:

1. Citation Form
2. Gram. info (Abbrev. spn)
3. Glosses (Gae)
4. Variant Form (Iqu) [if present]

A minimal entry is a reduced entry that refers back to full entry, and consists of the following fields in the following order:

1. Citation Form
2. Variant Type (Reverse name, spn)
3. Variant of


In [2]:
import xml.etree.ElementTree as ET

In [160]:
infile = 'iqu_flex_export/FLEx_LIFT_20181017.lift.xml'
#reversaldict = 'iqu_ped_dict_rev_17apr2019.xhtml'
ns = {'default': 'http://www.w3.org/1999/xhtml'}

In [164]:
tree = ET.parse(infile)
root = tree.getroot()

entries = root.findall('entry')

In [210]:
def nodetext(node):
    '''Return all text found in node as a string.'''
    return ''.join(list(node.itertext()))

def get_headword(e):
    '''Return an entry's headword. Throw an error if entry's headword fields are missing
    or empty.'''
    try:
        hdwd = nodetext(e.find('citation/form[@lang="iqu"]/text')).strip()
        assert(hdwd is not None)
    except (AttributeError, AssertionError):
        hdwd = nodetext(e.find('lexical-unit/form[@lang="iqu"]/text')).strip()
        assert(hdwd is not None)
    return hdwd

def glosses2tex(e, lang):
    '''Return senses in latex format.'''
    tex = ''
    senses = e.findall('sense')
    for idx, s in enumerate(senses):
        tex += '  \\glosses{'
        if len(senses) > 1:
            tex += '{:d}. '.format(idx + 1)
        tex += '\n'
        try:
            glosses = s.findall('gloss[@lang="' + lang + '"]/text')
            for gloss in glosses:
                tex += '    \\gloss{' + ''.join(gloss.itertext()) + '}\n'
        except (AttributeError, TypeError):
            pass
#        try:
#            ginfo = s.find('grammatical-info').attrib['value']
#            tex += '    \\grammaticalinfo{' + ginfo + '}\n'
#        except AttributeError:
#            pass
#        tex += simplefield2tex(s, 'grammaticalnote', 'note[@type="grammar"]/form[@lang="eng"]/text', level=2)
#        tex += examples2tex(s)
        tex += '  }\n'
    return tex

def get_glosses(e):
    nodes = e.findall('.//default:span[@lang="ga"]', ns)
    if len(nodes) == 0:
        gl = ''
    elif len(nodes) == 1:
        gl = r'\gloss{ ' + nodetext(nodes[0]) + ' } '
    elif len(nodes) > 1:
        gl = [r'\gloss{ ' + str(idx+1) + '. ' + nodetext(n) + ' } ' for idx, n in enumerate(nodes)] 
        gl = ' '.join(gl)
    return gl

def get_variants(e, root):
    eid = '#' + e.attrib['id']
    variants = root.findall('.//default:div[@class="minorentryvariant"]//default:span[@class="referencedentry"]//default:a[@href="{:}"]'.format(eid), ns)
    # TODO: from each variant, go up to <div and then down to span[@class="headword"] to find content for variant
    if len(variants) == 0:
        vstr = ''
    elif len(variants) == 1:
        vstr = r'\variants{ Variante: ' + nodetext(variants[0]) + '} '
    else:
        vstr = r'\variants{ Variantes: ' + ', '.join([nodetext(n) for n in variants]) + '} '
    return vstr

def entry2latex(e):
    tex = '\entry{\n'
    tex += '\headword{' + get_headword(e) + '}\n'
    tex += glosses2tex(e, lang='ga')
    return tex
#    gram = e.find('.//default:span[@class="morphosyntaxanalysis"]//default:span[@lang="es"]', ns)
#    glosses = get_glosses(e)
#variants = get_variants(e, root)
#    ltx = \
#        r'\entry{ ' \
#        + r'\hdwd{ ' + nodetext(hdwd) + '} ' \
#        + r'\gram{ ' + nodetext(gram) + '} ' \
#        + glosses \
#        + variants \
#        + '} '
#    return ltx
        
def minorentry2latex(e):
    hdwd = e.find('.//default:span[@class="headword"]', ns)
    variant = e.find('.//default:span[@class="referencedentry"]//default:span[@lang="iqu"]', ns)
    ltx = \
        r'\entry{ ' \
        + r'\hdwd{ ' + nodetext(hdwd) + '} ' \
        + r'\variant{ Ver: ' + nodetext(variant) + '} ' \
        + '} '
    return ltx

In [211]:
for e in entries:
    ltx = entry2latex(e)
    print(ltx)

\entry{
\headword{=karíjata}
  \glosses{
    \gloss{vista}
  }

\entry{
\headword{kuupɨkiiraata}
  \glosses{
    \gloss{entre dos no más}
  }

\entry{
\headword{parikɨɨtáani}
  \glosses{
    \gloss{estar.cerca.al.suelo}
  }

\entry{
\headword{anijákwaa}
  \glosses{
    \gloss{grande, para algo que normalmente sería más pequeño, por ejemplo una gallina}
  }

\entry{
\headword{tiitiwa}
  \glosses{
    \gloss{pie.pl}
  }

\entry{
\headword{maájarina}
  \glosses{
  }

\entry{
\headword{kwaata}
  \glosses{1. 
    \gloss{claramente}
  }
  \glosses{2. 
    \gloss{claro}
  }

\entry{
\headword{sakana}
  \glosses{
    \gloss{carahuasca, clase de árboles en general. PL: sakanaa}
  }

\entry{
\headword{yaa}
  \glosses{
    \gloss{NOUN}
  }

\entry{
\headword{jaátaraatina}
  \glosses{
    \gloss{que.clase}
  }

\entry{
\headword{kaaya nikiiti}
  \glosses{
  }

\entry{
\headword{purisíiya}
  \glosses{
    \gloss{policia}
  }

\entry{
\headword{faltaasíini}
  \glosses{
    \gloss{faltar.respeto}
  }

In [194]:
e = root.find('entry[@guid="531fc799-4cf2-4bd6-b331-ec17612d4cc4"]')
nodetext(e.find('citation/form[@lang="iqu"]/text')) #/form[@lang="iqu"]/text')
get_headword(e)

'ajanɨ́ɨni'

## Reversal dictionary

The entries in the  'Reversals' section is relatively simple, and consists of the following fields:

1. Reversal Entries (Spn)
2. Gram. info
3. Citation Form

In [83]:
tree = ET.parse(reversaldict)
root = tree.getroot()

entries = root.findall('.//default:div[@class="reversalindexentry"]', ns)

In [85]:
for e in entries:
    hdwd = e.find('default:span[@lang="es"]', ns)
    gram = e.find('.//default:span[@class="ownertype_abbreviation"]', ns)
    gloss = e.find('.//default:span[@lang="ga"]', ns)
    # Variants???
    print(
        '\entry{',
        '\hdwd{', nodetext(hdwd), '}',
        '\gram{', nodetext(gram), '}',
        '\gloss{', nodetext(gloss), '}',
        '}'
    )

\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  } \gram{  } \gloss{  } }
\entry{ \hdwd{  