In [1]:
import json

from jsonpath_ng import jsonpath, parse

json_string = '{"id":1, "name":"Pankaj"}'
json_data = json.loads(json_string)

jsonpath_expression = parse('$.id')

match = jsonpath_expression.find(json_data)

print(match)
print("id value is", match[0].value)

[DatumInContext(value=1, path=Fields('id'), context=DatumInContext(value={'id': 1, 'name': 'Pankaj'}, path=Root(), context=None))]
id value is 1


In [3]:
raw='''{
  "employees": [
    {
      "id": 1,
      "name": "Pankaj",
      "salary": "10000"
    },
    {
      "name": "David",
      "salary": "5000",
      "id": 2
    }
  ]
}'''

import json
from jsonpath_ng import jsonpath, parse
json_data = json.loads(raw)
jsonpath_expression = parse('employees[*].id')

for match in jsonpath_expression.find(json_data):
    print(f'Employee id: {match.value}')

Employee id: 1
Employee id: 2


In [4]:
emp_ids_list = [match.value for match in jsonpath_expression.find(json_data)]
print(emp_ids_list)  # [1, 2]

[1, 2]


In [1]:
from jsonpath_ng import jsonpath, parse
jsonpath_expr = parse('foo[*].baz')
[match.value for match in jsonpath_expr.find({'foo': [{'baz': 1}, {'baz': 2}]})]

[1, 2]

In [7]:
from jsonpath_ng import jsonpath, parse
vals=[{'body': '8',
             'dim': 'number',
             'end': 1,
             'latent': False,
             'start': 0,
             'value': {'type': 'value', 'value': 8}}]
jsonpath_expr = parse('$[0].body')
next(match.value for match in jsonpath_expr.find(vals))

'8'

In [7]:
# Matches remember where they came from
>>> [str(match.full_path) for match in jsonpath_expr.find({'foo': [{'baz': 1}, {'baz': 2}]})]

['foo.[0].baz', 'foo.[1].baz']

In [8]:
# And this can be useful for automatically providing ids for bits of data that do not have them (currently a global switch)
>>> jsonpath.auto_id_field = 'id'
>>> [match.value for match in parse('foo[*].id').find({'foo': [{'id': 'bizzle'}, {'baz': 3}]})]


['foo.bizzle', 'foo.[1]']

In [9]:
# A handy extension: named operators like `parent`
>>> [match.value for match in parse('a.*.b.`parent`.c').find({'a': {'x': {'b': 1, 'c': 'number one'}, 'y': {'b': 2, 'c': 'number two'}}})]

['number one', 'number two']

In [10]:
>>> from jsonpath_ng.ext import parse
# A robust parser, not just a regex. (Makes powerful extensions possible; see below)
>>> jsonpath_expr = parse('foo[*].baz')

In [12]:
with open("./out/v_domain.json", 'r') as json_file:
    json_data = json.load(json_file)

json_data

{'index': '2',
 'text': 'want',
 'lemma': 'want',
 'upos': 'VERB',
 'xpos': 'VBP',
 'feats': 'Mood=Ind|Tense=Pres|VerbForm=Fin',
 'governor': 0,
 'dependency_relation': 'root',
 'xcomp': [{'index': '4',
   'text': 'play',
   'lemma': 'play',
   'upos': 'VERB',
   'xpos': 'VB',
   'feats': 'VerbForm=Inf',
   'governor': 2,
   'dependency_relation': 'xcomp',
   'obj': [{'index': '5',
     'text': 'music',
     'lemma': 'music',
     'upos': 'NOUN',
     'xpos': 'NN',
     'feats': 'Number=Sing',
     'governor': 4,
     'dependency_relation': 'obj'}],
   'mark': [{'index': '3',
     'text': 'to',
     'lemma': 'to',
     'upos': 'PART',
     'xpos': 'TO',
     'feats': '_',
     'governor': 4,
     'dependency_relation': 'mark'}]}],
 'nsubj': [{'index': '1',
   'text': 'I',
   'lemma': 'I',
   'upos': 'PRON',
   'xpos': 'PRP',
   'feats': 'Case=Nom|Number=Sing|Person=1|PronType=Prs',
   'governor': 2,
   'dependency_relation': 'nsubj'}],
 'punct': [{'index': '6',
   'text': '.',
   'lemm

In [14]:
[match.value for match in parse('$.text').find(json_data)]

['want']

In [18]:
exprs=['$.nsubj[*].text',
       '$.xcomp[*].obj[*].text'
      ]
for expr in exprs:
    print([(match.value, str(match.full_path)) for match in parse(expr).find(json_data)])

[('I', 'nsubj.[0].text')]
[('music', 'xcomp.[0].obj.[0].text')]


In [2]:
from sagas.nlu.ruleset_procs import cached_chunks
sents='I want to play music.' 
lang='en'
chunks=cached_chunks(sents, lang, 'corenlp')
chunks['verb_domains']

.. request is {'lang': 'en', 'sents': 'I want to play music.', 'engine': 'corenlp', 'pipelines': ['predicts']}


[{'index': '2',
  'text': 'want',
  'lemma': 'want',
  'upos': 'VERB',
  'xpos': 'VBP',
  'feats': 'Mood=Ind|Tense=Pres|VerbForm=Fin',
  'governor': 0,
  'dependency_relation': 'root',
  'nsubj': [{'index': '1',
    'text': 'I',
    'lemma': 'I',
    'upos': 'PRON',
    'xpos': 'PRP',
    'feats': 'Case=Nom|Number=Sing|Person=1|PronType=Prs',
    'governor': 2,
    'dependency_relation': 'nsubj'}],
  'punct': [{'index': '6',
    'text': '.',
    'lemma': '.',
    'upos': 'PUNCT',
    'xpos': '.',
    'feats': '_',
    'governor': 2,
    'dependency_relation': 'punct'}],
  'xcomp': [{'index': '4',
    'text': 'play',
    'lemma': 'play',
    'upos': 'VERB',
    'xpos': 'VB',
    'feats': 'VerbForm=Inf',
    'governor': 2,
    'dependency_relation': 'xcomp',
    'mark': [{'index': '3',
      'text': 'to',
      'lemma': 'to',
      'upos': 'PART',
      'xpos': 'TO',
      'feats': '_',
      'governor': 4,
      'dependency_relation': 'mark'}],
    'obj': [{'index': '5',
      'text': '

In [3]:
chunks['root_domains']

[{'index': '2',
  'text': 'want',
  'lemma': 'want',
  'upos': 'VERB',
  'xpos': 'VBP',
  'feats': 'Mood=Ind|Tense=Pres|VerbForm=Fin',
  'governor': 0,
  'dependency_relation': 'root',
  'nsubj': [{'index': '1',
    'text': 'I',
    'lemma': 'I',
    'upos': 'PRON',
    'xpos': 'PRP',
    'feats': 'Case=Nom|Number=Sing|Person=1|PronType=Prs',
    'governor': 2,
    'dependency_relation': 'nsubj'}],
  'punct': [{'index': '6',
    'text': '.',
    'lemma': '.',
    'upos': 'PUNCT',
    'xpos': '.',
    'feats': '_',
    'governor': 2,
    'dependency_relation': 'punct'}],
  'xcomp': [{'index': '4',
    'text': 'play',
    'lemma': 'play',
    'upos': 'VERB',
    'xpos': 'VB',
    'feats': 'VerbForm=Inf',
    'governor': 2,
    'dependency_relation': 'xcomp',
    'mark': [{'index': '3',
      'text': 'to',
      'lemma': 'to',
      'upos': 'PART',
      'xpos': 'TO',
      'feats': '_',
      'governor': 4,
      'dependency_relation': 'mark'}],
    'obj': [{'index': '5',
      'text': '

In [23]:
def normal_path(path):
    prefix='$.'
    suffix='.text,lemma'
    parts=path.split('/')
    parts_str='.'.join([f"{t}[*]" for t in parts])
    return f"{prefix}{parts_str}{suffix}"
print(normal_path('nsubj'))
print(normal_path('xcomp/obj'))

$.nsubj[*].text,lemma
$.xcomp[*].obj[*].text,lemma


In [20]:
from jsonpath_ng import jsonpath, parse
from sagas.nlu.inspector_wordnet import predicate

exprs=['$.nsubj[*].text,lemma',
       '$.xcomp[*].obj[*].text,lemma'
      ]
kind='sound/perception'
parsers=[parse(expr) for expr in exprs]
for chunk in chunks['verb_domains']:
    json_data=chunk
    # for expr in exprs:
    for parser in parsers:
        print([(match.value, str(match.full_path)) for match in parser.find(json_data)])
        word='/'.join([match.value for match in parser.find(json_data)])
        print(word, '->', predicate(kind, word, lang, '*'))

[('I', 'nsubj.[0].text'), ('I', 'nsubj.[0].lemma')]
I/I -> False
[('music', 'xcomp.[0].obj.[0].text'), ('music', 'xcomp.[0].obj.[0].lemma')]
music/music -> True


In [18]:
for chunk in chunks['root_domains']:
    json_data=chunk
    # for expr in exprs:
    for parser in parsers:
        print([(match.value, str(match.full_path)) for match in parser.find(json_data)])
        print('/'.join([match.value for match in parser.find(json_data)]))

[('I', 'nsubj.[0].text'), ('I', 'nsubj.[0].lemma')]
I/I
[('music', 'xcomp.[0].obj.[0].text'), ('music', 'xcomp.[0].obj.[0].lemma')]
music/music


In [1]:
from sagas.conf.conf import cf
cf.conf

{'collect_verbs': True,
 'trans_cache': True,
 'translator': 'google',
 'retries': 3,
 'enable_chunks_parse': True,
 'enable_ascii_viz': True,
 'append_ascii_viz': False,
 'print_not_matched': False,
 'print_inspector_result': True,
 'default_word_sets_langs': ['en', 'zh', 'ja', 'id', 'nl'],
 'bert_servant': 'localhost',
 'ofbiz_servant': 'localhost',
 'odoo_servant': 'localhost',
 'common_s': 'http://localhost:14000',
 'ner': 'http://localhost:8092/entities',
 'ner_ru': 'http://localhost:8095/entities',
 'duckling': 'http://0.0.0.0:8000/parse',
 'nlu_multilang_servant': 'http://localhost:18099',
 'words_servant': 'http://localhost:8093',
 'redis': 'localhost',
 'servants': {'corenlp': 'http://localhost:14000',
  'stanford': 'http://localhost:14001',
  'ltp': 'http://localhost:14005',
  'hanlp': 'http://localhost:14000',
  'knp': 'http://localhost:14000',
  'spacy': 'http://localhost:14001',
  'spacy_2.2': 'http://localhost:14002'},
 'dialectors': {'*': 'corenlp',
  'auto': 'corenlp',
