In [7]:
from sagas.nlu.ruleset_procs import get_verb_domain, parse_sents

# sents="A spider has four right eyes."
# lang='en'

sents="Uma aranha tem quatro olhos direitos."
lang='pt'

data = {'lang': lang, "sents": sents, 'engine': 'corenlp'}
doc_jsonify, resp = parse_sents(data)
v_domains=get_verb_domain(doc_jsonify)
v_domains

[{'index': '3',
  'text': 'tem',
  'lemma': 'ter',
  'upos': 'VERB',
  'xpos': '_',
  'feats': 'Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin',
  'governor': 0,
  'dependency_relation': 'root',
  'punct': [{'index': '7',
    'text': '.',
    'lemma': '.',
    'upos': 'PUNCT',
    'xpos': '_',
    'feats': '_',
    'governor': 3,
    'dependency_relation': 'punct'}],
  'obj': [{'index': '5',
    'text': 'olhos',
    'lemma': 'olho',
    'upos': 'NOUN',
    'xpos': '_',
    'feats': 'Gender=Masc|Number=Plur',
    'governor': 3,
    'dependency_relation': 'obj',
    'nummod': [{'index': '4',
      'text': 'quatro',
      'lemma': 'quatro',
      'upos': 'NUM',
      'xpos': '_',
      'feats': 'NumType=Card',
      'governor': 5,
      'dependency_relation': 'nummod'}],
    'amod': [{'index': '6',
      'text': 'direitos',
      'lemma': 'direito',
      'upos': 'ADJ',
      'xpos': '_',
      'feats': 'Gender=Masc|Number=Plur',
      'governor': 5,
      'dependency_relation': 'a

In [15]:
obj=v_domains[0]['obj'][0]
comps=[]
for k,c in obj.items():
    if isinstance(c, list):
        comps.append((k, [f"{citem['lemma']}_{citem['upos'].lower()}" for citem in c]))
comps_str=[f"{c[0]}:{','.join(c[1])}" for c in comps]
f"{obj['lemma']}_{obj['upos'].lower()} {comps_str}"

"olho_noun ['amod:direito_adj', 'nummod:quatro_num']"

In [2]:
def print_domains(sents, lang):
    from pprint import pprint
    data = {'lang': lang, "sents": sents, 'engine': 'corenlp'}
    doc_jsonify, resp = parse_sents(data)
    v_domains=get_verb_domain(doc_jsonify)
    pprint(v_domains)

print_domains("クモは4つの右の目をしています。", 'ja')

.. request is {'lang': 'ja', 'sents': 'クモは4つの右の目をしています。', 'engine': 'corenlp', 'pipelines': ['predicts']}
[{'aux': [{'dependency_relation': 'aux',
           'feats': '_',
           'governor': 10,
           'index': '12',
           'lemma': 'いる',
           'text': 'い',
           'upos': 'AUX',
           'xpos': '_'},
          {'dependency_relation': 'aux',
           'feats': '_',
           'governor': 10,
           'index': '13',
           'lemma': 'ます',
           'text': 'ます',
           'upos': 'AUX',
           'xpos': '_'}],
  'dependency_relation': 'root',
  'feats': '_',
  'governor': 0,
  'index': '10',
  'lemma': 'する',
  'mark': [{'dependency_relation': 'mark',
            'feats': '_',
            'governor': 10,
            'index': '11',
            'lemma': 'て',
            'text': 'て',
            'upos': 'SCONJ',
            'xpos': '_'}],
  'nsubj': [{'case': [{'dependency_relation': 'case',
                       'feats': '_',
                       'govern

In [6]:
def digest(sents, lang, comp='obj'):
    data = {'lang': lang, "sents": sents, 'engine': 'corenlp'}
    doc_jsonify, resp = parse_sents(data)
    v_domains=get_verb_domain(doc_jsonify)
    if len(v_domains)>0 and comp in v_domains[0]:
        obj=v_domains[0][comp][0]
        comps=[]
        for k,c in obj.items():
            if isinstance(c, list):
                comps.append((k, [f"{citem['lemma']}_{citem['upos'].lower()}" for citem in c]))
        comps_str=[f"{c[0]}:{','.join(c[1])}" for c in comps]
        return f"{obj['lemma']}_{obj['upos'].lower()} {comps_str}"
    return ""

print(digest("Uma aranha tem quatro olhos direitos.", 'pt'))
print(digest("A spider has four right eyes.", 'en'))
print(digest("クモは4つの右の目をしています。", 'ja'))
print(digest("Ela lava os braços do macaco.", "pt"))

olho_noun ['nummod:quatro_num', 'amod:direito_adj']
eye_noun ['nummod:four_num', 'amod:right_adj']
目_noun ['case:を_adp', 'nmod:右_noun']
.. request is {'lang': 'pt', 'sents': 'Ela lava os braços do macaco.', 'engine': 'corenlp', 'pipelines': ['predicts']}
braço_noun ['det:o_det', 'nmod:macaco_noun']


In [22]:
from sagas.nlu.uni_remote import dep_parse
from sagas.nlu.corenlp_parser import get_chunks

doc,_=dep_parse("クモは4つの右の目をしています。", 'ja', 'corenlp')
rs = get_chunks(doc)
rs

.. request is {'lang': 'ja', 'sents': 'クモは4つの右の目をしています。', 'engine': 'corenlp', 'pipelines': []}


[{'type': 'verb_domains',
  'word': 'し',
  'lemma': 'する',
  'index': '10',
  'upos': 'verb',
  'xpos': '_',
  'rel': 'root',
  'governor': 0,
  'domains': [('nsubj', '1', 'クモ', 'クモ', ['クモ', 'は'], ['c_noun']),
   ('obj', '8', '目', '目', ['4', 'つ', 'の', '右', 'の', '目', 'を'], ['c_noun']),
   ('mark', '11', 'て', 'て', ['て'], ['c_sconj']),
   ('aux', '12', 'い', 'いる', ['い'], ['c_aux']),
   ('aux', '13', 'ます', 'ます', ['ます'], ['c_aux']),
   ('punct', '14', '。', '。', ['。'], ['c_punct'])],
  'stems': [('nsubj', ['クモ', 'は']),
   ('obj', ['4', 'つ', 'の', '右', 'の', '目', 'を']),
   ('mark', ['て']),
   ('aux', ['いる']),
   ('aux', ['ます']),
   ('punct', ['。'])]}]

In [21]:
import logging
from sagas.nlu.ruleset_procs import group_by, children

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

def get_root_domain(sent_p):
    root = next(w for w in sent_p.words if w.dependency_relation in ('root', 'hed'))
    logging.debug(f"root: {root.index}, {root.text}({root.upos})")
    root_idx = int(root.index)
    domains = []
    stems = []
    rs = []
    for word in (w for w in sent_p.words if w.governor == root_idx):
        # print(f"{__name__}: {word.dependency_relation}: {word.text}")
        logging.debug(f"{word.dependency_relation}: {word.text}")
        # add_domain(domains, stems, word, sent_p)
        c=word
        c_domains = [w.ctx for w in children(c, sent_p)]
        domains.append({**c.ctx, **group_by(c_domains)})

    word = root
    token = {**word.ctx, **group_by(domains)}
#     rs.append({'type': 'root_domains', 'word': word.text, 'lemma': word.lemma,
#                'upos': word.upos.lower(), 'xpos': word.xpos.lower(),
#                'rel': word.dependency_relation, 'governor': word.governor,
#                'index': word.index, 'domains': domains, 'stems': stems})
    rs.append(token)
    return rs

def print_root_domains(sents, lang, comps):
    from pprint import pprint
    data = {'lang': lang, "sents": sents, 'engine': 'corenlp'}
    doc_jsonify, resp = parse_sents(data)
    domains=get_root_domain(doc_jsonify)
    pprint(domains)
    
    ###
    root=domains[0]
    print(f"* {root['lemma']}_{root['upos'].lower()}")
    for comp in comps:
        if comp in root:
            obj=root[comp][0]
            comps=[]
            for k,c in obj.items():
                if isinstance(c, list):
                    comps.append((k, [f"{citem['lemma']}_{citem['upos'].lower()}" for citem in c]))
            comps_str=[f"{c[0]}:{','.join(c[1])}" for c in comps]
            print(f"** {comp}:{obj['lemma']}_{obj['upos'].lower()} {comps_str}")

# our blackboard is still new.
print_root_domains('Papan tulis kami masih baru.', 'id', ['compound', 'amod'])

DEBUG:root:root: 1, Papan(NOUN)
DEBUG:root:compound: tulis
DEBUG:root:amod: baru
DEBUG:root:punct: .


[{'amod': [{'advmod': [{'dependency_relation': 'advmod',
                        'feats': '_',
                        'governor': 5,
                        'index': '4',
                        'lemma': 'masih',
                        'text': 'masih',
                        'upos': 'ADV',
                        'xpos': 'D--'}],
            'dependency_relation': 'amod',
            'feats': 'Degree=Pos|Number=Sing',
            'governor': 1,
            'index': '5',
            'lemma': 'baru',
            'text': 'baru',
            'upos': 'ADJ',
            'xpos': 'ASP'}],
  'compound': [{'dependency_relation': 'compound',
                'det': [{'dependency_relation': 'det',
                         'feats': 'Clusivity=Ex|Number=Plur|Person=1|PronType=Prs',
                         'governor': 2,
                         'index': '3',
                         'lemma': 'kami',
                         'text': 'kami',
                         'upos': 'PRON',
                