In [44]:
import sagas.ja.knp_helper as kh
import sagas
from sagas.nlu.uni_impl_knp import tag_pos, pos_list

def trim_chunk(chunk):
    return [c for c in chunk if c!='']
def get_feats(tag):
    return ['c_{}'.format(tag_pos(tag)).lower(), 
            'x_{}'.format(pos_list(tag)[0]).lower()]
def get_governor(tag):
    if tag.parent_id == -1:
        governor = 0
    else:
        governor = tag.parent_id + 1
    return governor
def collect_bnst(result):
    rs=[]
    for bnst in result.bnst_list():
        rs.append(([tag.tag_id for tag in bnst.tag_list()],
                   [mrph.repname.split('/')[0] for mrph in bnst.mrph_list()]
                  ))
    return rs

def extract_predicates(result, verbose=True):    
    deps={}
    predict_keys=[]
    predicts=[]
    predict_tuples=[]
    bnst_set=collect_bnst(result)
    words=result.tag_list()
    for tag in words:
        if tag.pas is not None:  # find predicate
            predict_cnt=''.join(mrph.midasi for mrph in tag.mrph_list())
            # word_main=tag.mrph_list()[0]            
            if verbose:
                print(tag.tag_id, '. 述語: %s' % predict_cnt)
            # print(tag.features)
            # print(tag.normalized_repname)
            repname=tag.normalized_repname.split('/')
            predict_lemma=repname[0]
            predict_phonetic=repname[1]
            
            predict_keys.append(kh.merge_tag(tag))
            p_args=[]
            domains=[]
            for case, args in tag.pas.arguments.items():  # case: str, args: list of Argument class                
                for arg in args:  # arg: Argument class
                    arg_tag=words[arg.tid]
                    if verbose:
                        print('\t格: %s,  項: %s  (項の基本句ID: %d)' % (case, arg.midasi, arg.tid))
                    kh.put_items(deps, {tag.tag_id, arg.tid}, case)
                    cnt=''.join(mrph.midasi for mrph in arg_tag.mrph_list())
                    chunk=[b[1] for b in bnst_set if arg_tag.tag_id in b[0]][0]
                    p_args.append({'name':case, 'value':arg.midasi, 'text':cnt,
                                   'chunk':chunk,
                                   'upos':tag_pos(arg_tag),
                                   'start':arg.tid, 'end':arg.tid})
                    # ['rel', 'index', 'text', 'lemma', 'children', 'features']
                    feats=get_feats(arg_tag)
                    domains.append((case, arg.tid, cnt, arg.midasi, trim_chunk(chunk), feats))
            predicts.append({'index':tag.tag_id, 'predicate':predict_lemma, 
                             'cnt':predict_cnt, 'phonetic':predict_phonetic,
                             'args':p_args})
            predict_tuples.append({'type':'predicate', 'lemma':predict_lemma, 'index': tag.tag_id,
                              'rel': tag.dpndtype, 'governor': get_governor(tag),
                              'domains': domains, 'stems':[]})
    if verbose:
        print(deps, predict_keys)
        # print(predicts)
    return deps, predict_keys, predicts, predict_tuples

sents="望遠鏡で泳いでいる少女を見た。"

def parse_and_repr(sents):
    result = kh.knp.parse(sents)
    dep_sets, predict_keys, predicts, predict_tuples = extract_predicates(result, verbose=False)
    for pr in predicts:
        print(f"{pr['index']}. {pr['predicate']} ({pr['phonetic']}, {pr['cnt']})")
        cols=pr['args'][0].keys()
        rows=[row.values() for row in pr['args']]
        display(sagas.to_df(rows, cols))

    for t in predict_tuples:
        print(t)
parse_and_repr(sents)              

2. 泳ぐ (およぐ, 泳いでいる)


Unnamed: 0,name,value,text,chunk,upos,start,end
0,ガ,少女,少女を,"[少女, ]",NOUN,3,3


4. 見る (みる, 見た。)


Unnamed: 0,name,value,text,chunk,upos,start,end
0,ヲ,少女,少女を,"[少女, ]",NOUN,3,3
1,デ,鏡,鏡で,"[望遠, 鏡, ]",NOUN,1,1


{'type': 'predicate', 'lemma': '泳ぐ', 'index': 2, 'rel': 'D', 'governor': 4, 'domains': [('ガ', 3, '少女を', '少女', ['少女'], ['c_noun', 'x_n'])], 'stems': []}
{'type': 'predicate', 'lemma': '見る', 'index': 4, 'rel': 'D', 'governor': 0, 'domains': [('ヲ', 3, '少女を', '少女', ['少女'], ['c_noun', 'x_n']), ('デ', 1, '鏡で', '鏡', ['望遠', '鏡'], ['c_noun', 'x_n'])], 'stems': []}


In [45]:
parse_and_repr("私にとって、あなたは常に勝者になります。")

5. 鳴る (なる?成る, なります。)


Unnamed: 0,name,value,text,chunk,upos,start,end
0,ガ,あなた,あなたは,"[あなた, ]",NOUN,2,2
1,ニ,勝者,勝者に,"[勝者, ]",NOUN,4,4
2,修飾,常に,常に,[常に],ADV,3,3
3,ニトル,私,私に,"[私, ]",NOUN,0,0


{'type': 'predicate', 'lemma': '鳴る', 'index': 5, 'rel': 'D', 'governor': 0, 'domains': [('ガ', 2, 'あなたは', 'あなた', ['あなた'], ['c_noun', 'x_n']), ('ニ', 4, '勝者に', '勝者', ['勝者'], ['c_noun', 'x_n']), ('修飾', 3, '常に', '常に', ['常に'], ['c_adv', 'x_a']), ('ニトル', 0, '私に', '私', ['私'], ['c_noun', 'x_n'])], 'stems': []}


In [38]:
result = kh.knp.parse(sents)

for bnst in result.bnst_list():
    parent = bnst.parent
    if parent is not None:
        child_rep = " ".join(mrph.repname for mrph in bnst.mrph_list())
        parent_rep = " ".join(mrph.repname for mrph in parent.mrph_list())
        print(child_rep, "->", parent_rep)
        print('\t', [tag.tag_id for tag in bnst.tag_list()])

rs=[]
for bnst in result.bnst_list():
    rs.append(([tag.tag_id for tag in bnst.tag_list()],
               [mrph.repname for mrph in bnst.mrph_list()]
              ))
print(rs)

望遠/ぼうえん 鏡/きょう  -> 見る/みる 
	 [0, 1]
泳ぐ/およぐ いる/いる -> 少女/しょうじょ 
	 [2]
少女/しょうじょ  -> 見る/みる 
	 [3]
[([0, 1], ['望遠/ぼうえん', '鏡/きょう', '']), ([2], ['泳ぐ/およぐ', 'いる/いる']), ([3], ['少女/しょうじょ', '']), ([4], ['見る/みる', ''])]
