In [1]:
from sagas.nlu.nlu_tools import vis_tree
from sagas.nlu.ruleset_procs import cached_chunks
chunks = cached_chunks('Nosotros estamos en la escuela.',
                       source='es',
                       engine='stanza')
ds=chunks['root_domains'][0]
vis_tree(ds, 'es', trans=True)

root: estamos(estar; we're, verb, 2)
├── punct: .(_, punct, 6)
├── nsubj: Nosotros(yo; We, pron, 1)
└── obl: escuela(escuela; school, noun, 5)
    ├── det: la(el; the, det, 4)
    └── case: en(en; in, adp, 3)


In [2]:
chunks['doc'].as_json

[{'index': 1,
  'text': 'Nosotros',
  'lemma': 'yo',
  'upos': 'PRON',
  'xpos': 'PRON',
  'feats': 'Case=Acc,Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs',
  'governor': 2,
  'dependency_relation': 'nsubj',
  'entity': ['O'],
  'segments': []},
 {'index': 2,
  'text': 'estamos',
  'lemma': 'estar',
  'upos': 'VERB',
  'xpos': 'VERB',
  'feats': 'Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin',
  'governor': 0,
  'dependency_relation': 'root',
  'entity': ['O'],
  'segments': []},
 {'index': 3,
  'text': 'en',
  'lemma': 'en',
  'upos': 'ADP',
  'xpos': 'ADP',
  'feats': 'AdpType=Prep',
  'governor': 5,
  'dependency_relation': 'case',
  'entity': ['O'],
  'segments': []},
 {'index': 4,
  'text': 'la',
  'lemma': 'el',
  'upos': 'DET',
  'xpos': 'DET',
  'feats': 'Definite=Def|Gender=Fem|Number=Sing|PronType=Art',
  'governor': 5,
  'dependency_relation': 'det',
  'entity': ['O'],
  'segments': []},
 {'index': 5,
  'text': 'escuela',
  'lemma': 'escuela',
  'upos': 'NOUN',


In [3]:
from anytree.node.nodemixin import NodeMixin
from anytree.node.util import _repr
from sagas.nlu.uni_intf import SentenceIntf, WordIntf, RootWordImpl
from sagas.nlu.features import feats_map
class Token(object):
    def __init__(self, tok:WordIntf):
        self.tok=tok        
        self.name=tok.dependency_relation if tok is not None else '_'
class AnalNode(NodeMixin, Token):
    def __init__(self, tok, parent=None, children=None, **kwargs):
        super(AnalNode, self).__init__(tok)
        self.__dict__.update(kwargs)
        if tok:
            self.__dict__.update(tok.ctx)
            self.feats=feats_map(tok.feats)
        self.parent = parent
        if children:
            self.children = children

    def __repr__(self):
        return _repr(self)

words=chunks['doc'].words
root = AnalNode(words[0])
root

AnalNode(dependency_relation='nsubj', entity=['O'], feats={'Case': 'Acc,Nom', 'Gender': 'Masc', 'Number': 'Plur', 'Person': '1', 'PronType': 'Prs'}, governor=2, index=1, lemma='yo', name='nsubj', segments=[], text='Nosotros', tok=<JsonifyWordImpl index=1;text=Nosotros;lemma=yo;upos=PRON;xpos=PRON;feats=Case=Acc,Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs;governor=2;dependency_relation=nsubj>, upos='PRON', xpos='PRON')

In [4]:
from anytree import Node, RenderTree, AsciiStyle, Walker, Resolver

node_map={word.index:AnalNode(word) for word in words}
node_map[0]=AnalNode(None)
tree_root=next(w for w in node_map.values() if w.governor==0)
def set_parent(w):
    if w.tok:
        w.parent=node_map[w.tok.governor]
list(map(set_parent, node_map.values()))
# print(RenderTree(tree_root, style=AsciiStyle()).by_attr('name'))
print(RenderTree(tree_root, style=AsciiStyle()).by_attr(lambda n: f"{n.dependency_relation}: {n.text}"))

root: estamos
|-- nsubj: Nosotros
|-- obl: escuela
|   |-- case: en
|   +-- det: la
+-- punct: .


In [5]:
from anytree.search import findall, findall_by_attr
words=findall_by_attr(tree_root, name='upos', value='VERB')
words

(AnalNode(dependency_relation='root', entity=['O'], feats={'Mood': 'Ind', 'Number': 'Plur', 'Person': '1', 'Tense': 'Pres', 'VerbForm': 'Fin'}, governor=0, index=2, lemma='estar', name='root', segments=[], text='estamos', tok=<JsonifyWordImpl index=2;text=estamos;lemma=estar;upos=VERB;xpos=VERB;feats=Mood=Ind|Number=Plur|Person=1|Tense=Pres|VerbForm=Fin;governor=0;dependency_relation=root>, upos='VERB', xpos='VERB'),)

In [6]:
word=words[0]
if 'Person' in word.feats:
    personal=word.feats['Tense']+'_'+word.feats['Person']+'_'+word.feats['Number']
    print(personal)

Pres_1_Plur


In [7]:
from sagas.nlu.anal import build_anal_tree
from anytree.search import findall, findall_by_attr

sents='she sleeps on the green bed'
lang='en'
engine='stanza'

f=build_anal_tree(sents, lang, engine)
# f.verbs
# f.rels('nsubj')
f.draw()
f.verbs[0].text, f.verbs[0].is_cat('physical_condition', '~'), \
    f.verbs[0].is_cat('physical_condition', 'n')

root: sleeps (sleep, verb)
|-- nsubj: she (she, pron)
|-- obl: bed (bed, noun)
|   |-- case: on (on, adp)
|   |-- det: the (the, det)
|   +-- amod: green (green, adj)
+-- punct: . (., punct)


('sleeps', False, True)

In [8]:
f.verbs[0].axis

'sleep'

In [9]:
from anytree import Node, RenderTree, AsciiStyle, Walker, Resolver
r = Resolver('dependency_relation')
r.get(f, "./obl/amod")

AnalNode(dependency_relation='amod', entity=['O'], feats={'Degree': 'Pos'}, governor=6, index=5, lang='en', lemma='green', name='amod', segments=[], text='green', tok=<JsonifyWordImpl index=5;text=green;lemma=green;upos=ADJ;xpos=JJ;feats=Degree=Pos;governor=6;dependency_relation=amod>, upos='ADJ', xpos='JJ')

In [10]:
from sagas.nlu.anal import build_anal_tree
from anytree.search import findall, findall_by_attr
f=build_anal_tree('Nosotros estudiamos francés.', 'es', 'stanza')
f.draw()
f.verbs[0].text, f.verbs[0].is_cat('learn'), \
    f.verbs[0].axis

root: estudiamos (estudiar, verb)
|-- nsubj: Nosotros (yo, pron)
|-- obj: francés (francés, noun)
+-- punct: . (., punct)


('estudiamos', True, 'study')

In [11]:
f.spec()

'study'

In [12]:
# f.nouns[0].with_trans().sense
f.nouns[0].with_trans()
f.nouns[0].inherts('language|语言')

True

In [13]:
rs=f.walk_to(f.nouns[0])
for i,node in enumerate(rs):
    print(i, [n.text for n in node] if isinstance(node, tuple) else node.text)

0 []
1 estudiamos
2 ['francés']


In [14]:
node_repr=lambda n: f"{n.text}({n.dependency_relation})"
val_repr=lambda node: [node_repr(n) for n in node] if isinstance(node, tuple) else [node_repr(node)]
' ._ '.join([','.join(val_repr(r)) for r in f.walk_to(f.nouns[0])])

' ._ estudiamos(root) ._ francés(obj)'

In [15]:
f.resolve_rels('*subj')

[AnalNode(dependency_relation='nsubj', entity=['O'], feats={'Case': 'Acc,Nom', 'Gender': 'Masc', 'Number': 'Plur', 'Person': '1', 'PronType': 'Prs'}, governor=2, index=1, lang='es', lemma='yo', name='nsubj', segments=[], text='Nosotros', tok=<JsonifyWordImpl index=1;text=Nosotros;lemma=yo;upos=PRON;xpos=PRON;feats=Case=Acc,Nom|Gender=Masc|Number=Plur|Person=1|PronType=Prs;governor=2;dependency_relation=nsubj>, upos='PRON', xpos='PRON')]

In [16]:
from itertools import chain
def generic_paths(f):
    subjs=f.resolve_rels('*subj')
    start=subjs[0] if subjs else f
    for n in chain(f.nouns, f.adjectives):
        start.walk_to(n, verbose=True)

generic_paths(f)

Nosotros(nsubj) ._ estudiamos(root) ._ francés(obj)


In [17]:
f.rels('nsubj')[0].lemma, f.rels('nsubj')[0].axis, \
    f.rels('nsubj')[0].personal_pronoun_repr

('yo', 'I', '__1_Plur')

In [18]:
f.nouns[0].axis

'French'

In [19]:
f.axis

'study'

In [20]:
f.parent

AnalNode(engine='stanza', lang='es', name='_', sents='Nosotros estudiamos francés.', tok=None)

In [5]:
from sagas.nlu.anal import build_anal_tree, Doc
from anytree.search import findall, findall_by_attr
f=build_anal_tree('2008年12月に上海に行きたいです。', 'ja', 'stanza')
f.draw()
f.verbs[0].text, f.rels('iobj')[0].chunk, f.rels('iobj')[0].as_date()

root: 行き (行く, verb)
|-- iobj: 12月 (12月, noun)
|   |-- nmod: 年 (年, noun)
|   |   +-- nummod: 2008 (2008, num)
|   +-- case: に (に, adp)
|-- iobj: 上海 (上海, propn)
|   +-- case: に (に, adp)
|-- aux: たいです (たい, aux)
+-- punct: 。 (。, punct)


('行き',
 '2008年12月に',
 [('2008年12月', datetime.datetime(11, 4, 6, 22, 11, 11, 879709))])

In [18]:
import rx
from rx import operators as ops
def as_source(self):
    return rx.of(*self.descendants)

results=[]
as_source(f).pipe(ops.do_action(lambda n: print(n.lemma)),
                 ).subscribe(
        on_next=lambda n: results.append(n),
        on_error=lambda e: print(e),
    )

12月
年
2008
に
上海
に
たい
。


<rx.disposable.disposable.Disposable at 0x12ede1668>

In [4]:
from sagas.nlu.inspector_extractor import ex_translit
if ex_translit('', 'たいです', '', f.doc):
    print(f.doc.resultset)

[{'inspector': 'extract_comps', 'provider': '', 'part': 'anal:', 'value': 'taidesu', 'delivery': 'slot', 'pattern': '_'}, {'inspector': 'extract_comps', 'provider': '', 'part': 'anal:', 'value': 'taidesu', 'delivery': 'slot', 'pattern': '_'}, {'inspector': 'extract_comps', 'provider': '', 'part': 'anal:', 'value': 'taidesu', 'delivery': 'slot', 'pattern': '_'}]


In [4]:
n=f.rels('iobj')[0]
for node in n.iter_path_reverse():
    if isinstance(node, Doc):
        print(node.sents)

2008年12月に上海に行きたいです。


In [2]:
from itertools import chain
from sagas.nlu.constants import delim
iobj=f.rels('iobj')[0]
print(iobj.text)
for c in chain([iobj], iobj.descendants):
    print(c.index, c.text)
rs=sorted([(c.index, c.text) for c in chain([iobj], iobj.descendants)], key=lambda x:x[0])
lang='ja'
delim(lang).join([r[1] for r in rs])

12月
3 12月
2 年
1 2008
4 に


'2008年12月に'