In [34]:
import abc
class WordIntf(abc.ABC):
    def __init__(self, data):
        self.ctx=self.setup(data)
        
    @abc.abstractmethod
    def setup(self, data):
        pass
    
    @property
    def dependency_relation(self):
        return self.ctx['dependency_relation']    
    @property
    def lemma(self):
        """ Access lemma of this word. """
        return self.ctx['lemma']

    @property
    def governor(self):
        """ Access governor of this word. """
        return self.ctx['governor']

    @property
    def pos(self):
        """ Access (treebank-specific) part-of-speech of this word. Example: 'NNP'"""
        return self.ctx['pos']

    @property
    def text(self):
        """ Access text of this word. Example: 'The'"""
        return self.ctx['text']

    @property
    def xpos(self):
        """ Access treebank-specific part-of-speech of this word. Example: 'NNP'"""
        return self.ctx['xpos']

    @property
    def upos(self):
        """ Access universal part-of-speech of this word. Example: 'DET'"""
        return self.ctx['upos']

    @property
    def feats(self):
        """ Access morphological features of this word. Example: 'Gender=Fem'"""
        return self.ctx['feats']

    @property
    def index(self):
        """ Access index of this word. """
        return self.ctx['index']
    
    def __repr__(self):
        features = ['index', 'text', 'lemma', 'upos', 'xpos', 'feats', 'governor', 'dependency_relation']
        feature_str = ";".join(["{}={}".format(k, getattr(self, k)) for k in features if getattr(self, k) is not None])

        return f"<{self.__class__.__name__} {feature_str}>"
    
class SentenceIntf(abc.ABC):
    def __init__(self, sent):
        self._words, self._dependencies=self.setup(sent)
    
    @abc.abstractmethod
    def setup(self, sent):
        pass
    
    @property
    def dependencies(self):
        """ Access list of dependencies for this sentence. """
        return self._dependencies

    @property
    def words(self):
        """ Access list of words for this sentence. """
        return self._words

In [35]:
class WordImpl(WordIntf):
    def setup(self, data):
        features = ['index', 'text', 'lemma', 'upos', 'xpos', 'feats', 'governor', 'dependency_relation']
        stuffs=[1, 'ok', 'ok', 'VERB', 'v', ['ff'], 0, 'nsubj']
        return dict(zip(features, stuffs))

class SentImpl(SentenceIntf):
    def setup(self, words):
        governor, dependency_relation, word=(2,'rel',words[0])
        dependencies=[]
        dependencies.append((governor, dependency_relation, word))
        return words, dependencies
    
data={}
wi=WordImpl(data)
print(wi.index, wi.text, isinstance(wi, WordIntf))
si=SentImpl([wi])
print(si.words)
print(si.dependencies)

1 ok True
[<WordImpl index=1;text=ok;lemma=ok;upos=VERB;xpos=v;feats=['ff'];governor=0;dependency_relation=nsubj>]
[(2, 'rel', <WordImpl index=1;text=ok;lemma=ok;upos=VERB;xpos=v;feats=['ff'];governor=0;dependency_relation=nsubj>)]


In [30]:
class ParserImpl(object):
    def __call__(self, sents):
        return SentImpl(sents)

par=ParserImpl()
s=par([wi])
print(s.words, '**', s.dependencies)

[<WordImpl index=1;text=ok;lemma=ok;upos=VERB;xpos=v;feats=['ff'];governor=0;dependency_relation=nsubj>] ** [(2, 'rel', <WordImpl index=1;text=ok;lemma=ok;upos=VERB;xpos=v;feats=['ff'];governor=0;dependency_relation=nsubj>)]


In [36]:
from sagas.nlu.corenlp_helper import get_nlp
def test_parser(lang, sents):
    nlp=get_nlp(lang)
    doc = nlp(sents)
    print(len(doc.sentences[0].words))
    print([word.index for word in doc.sentences[0].words])
test_parser('en', 'it is a cat')

4
['1', '2', '3', '4']


In [37]:
class CoreNlpWordImpl(WordIntf):
    def setup(self, data):
        features = ['index', 'text', 'lemma', 'upos', 'xpos', 'feats', 'governor', 'dependency_relation']
        attrs={k:getattr(data, k) for k in features if getattr(data, k) is not None}
        return attrs
    
class CoreNlpSentImpl(SentenceIntf):
    def setup(self, sent):
        words=[]
        for word in sent.words:
            words.append(CoreNlpWordImpl(word))
        deps=[]
        for dep in sent.dependencies: 
            # (governor, word.dependency_relation, word)            
            deps.append((CoreNlpWordImpl(dep[0]), dep[1], CoreNlpWordImpl(dep[2])))
        return words, deps
class CoreNlpParserImpl(object):
    def __init__(self, lang):
        self.lang=lang
    def __call__(self, sents):
        nlp=get_nlp(self.lang)
        doc = nlp(sents)
        return CoreNlpSentImpl(doc.sentences[0])
    
parser=CoreNlpParserImpl('en')
doc=parser('it is a cat')
print(len(doc.words))
print([word.index for word in doc.words])

4
['1', '2', '3', '4']


In [41]:
from sagas.nlu.corenlp_parser import get_chunks
get_chunks(doc)
# r= get_chunks(doc)
# data_y = json.dumps(r, indent=2, ensure_ascii=False)
# print(data_y)

[{'aux': 'is',
  'delegator': False,
  'domains': [('nsubj', '1', 'it', 'it', ['it'], ['c_pron', 'x_prp']),
   ('cop', '2', 'is', 'be', ['is'], ['c_aux', 'x_vbz']),
   ('det', '3', 'a', 'a', ['a'], ['c_det', 'x_dt'])],
  'governor': 4,
  'head': 'cat',
  'head_pos': 'noun',
  'index': '2',
  'lemma': 'be',
  'rel': 'cop',
  'stems': [('nsubj', ['it']), ('cop', ['be']), ('det', ['a'])],
  'type': 'aux_domains'}]

In [46]:
import sagas
from sagas.tool.misc import print_stem_chunks
rs=get_chunks(doc)
for r in rs:
    df = sagas.to_df(r['domains'], ['rel', 'index', 'text', 'lemma', 'children', 'features'])
    print('%s(%s)'%(r['type'], r['lemma']))
    display(df)
    print_stem_chunks(r)

aux_domains(be)


Unnamed: 0,rel,index,text,lemma,children,features
0,nsubj,1,it,it,[it],"[c_pron, x_prp]"
1,cop,2,is,be,[is],"[c_aux, x_vbz]"
2,det,3,a,a,[a],"[c_det, x_dt]"
