# Querying Files and Webpages using JSON-Path

In [1]:
import json
import requests
from jsonpath_ng import jsonpath
from jsonpath_ng.ext import parse

In [2]:
# read json file as dict
with open('quiz.json') as json_file:
    data = json.load(json_file)

type(data)

dict

In [3]:
data

{'quiz': {'sport': {'q1': {'question': 'Which one is correct team name in NBA?',
    'options': ['New York Bulls',
     'Los Angeles Kings',
     'Golden State Warriros',
     'Huston Rocket'],
    'answer': 'Huston Rocket'}},
  'maths': {'q1': {'question': '5 + 7 = ?',
    'options': ['10', '11', '12', '13'],
    'answer': '12'},
   'q2': {'question': '12 - 8 = ?',
    'options': ['1', '2', '3', '4'],
    'answer': '4'}}}}

In [4]:
jsonpath_expr = parse('quiz.*.*')
result = [match.value for match in jsonpath_expr.find(data)]
result

[{'question': 'Which one is correct team name in NBA?',
  'options': ['New York Bulls',
   'Los Angeles Kings',
   'Golden State Warriros',
   'Huston Rocket'],
  'answer': 'Huston Rocket'},
 {'question': '5 + 7 = ?',
  'options': ['10', '11', '12', '13'],
  'answer': '12'},
 {'question': '12 - 8 = ?', 'options': ['1', '2', '3', '4'], 'answer': '4'}]

In [5]:
jsonpath_expr = parse('$.quiz.maths.q1.options[*]')
result = [match.value for match in jsonpath_expr.find(data)]
result

['10', '11', '12', '13']

In [6]:
jsonpath_expr = parse('$.quiz.maths.q1.options[?(@ >= 12)]')
result = [match.value for match in jsonpath_expr.find(data)]
result

['12', '13']

In [7]:
jsonpath_expr = parse('$.quiz.*[?(@.question == "5 + 7 = ?")]')
result = [match.value for match in jsonpath_expr.find(data)]
result

[{'question': '5 + 7 = ?',
  'options': ['10', '11', '12', '13'],
  'answer': '12'}]

In [8]:
jsonpath_expr = parse('$.quiz.maths.*.options[\@]') # orders elements in lists descending / for ascending
result = [match.value for match in jsonpath_expr.find(data)]
result

[['13', '12', '11', '10'], ['4', '3', '2', '1']]

In [9]:
jsonpath_expr = parse('$.quiz.*[?(@.answer != "12")]')
result = [match.value for match in jsonpath_expr.find(data)]
result

[{'question': 'Which one is correct team name in NBA?',
  'options': ['New York Bulls',
   'Los Angeles Kings',
   'Golden State Warriros',
   'Huston Rocket'],
  'answer': 'Huston Rocket'},
 {'question': '12 - 8 = ?', 'options': ['1', '2', '3', '4'], 'answer': '4'}]

In [10]:
jsonpath_expr = parse('$..options[\@]') # orders elements in lists descending / for ascending
result = [match.value for match in jsonpath_expr.find(data)]
result

[['New York Bulls',
  'Los Angeles Kings',
  'Huston Rocket',
  'Golden State Warriros'],
 ['13', '12', '11', '10'],
 ['4', '3', '2', '1']]

### nbagames.json

In [11]:
# read json file as dict
with open('nba.json') as json_file: #, encoding="utf-8"
    data = json.load(json_file)

type(data)

dict

In [12]:
data['games'][0].keys()

dict_keys(['_id', 'teams', 'date'])

In [13]:
data['games'][0]['teams'][0].keys()

dict_keys(['name', 'abbreviation', 'score', 'home', 'won', 'results', 'players', 'city'])

In [14]:
data['games'][0]['date']['$date']

'1985-10-25T04:00:00.000+0000'

In [15]:
jsonpath_expr = parse('$..teams[?(@..city == "Washington")].name')
result = [match.value for match in jsonpath_expr.find(data)]
result

['Washington Bullets']

In [16]:
jsonpath_expr = parse('$.games[?(@..name == "Golden State Warriors")]..won')
result = [match.value for match in jsonpath_expr.find(data)]
print(result)
print(f'Winning percentage: {round(sum(result) / len(result) * 100)}%')

[1, 0]
Winning percentage: 50%


### Parsing Website

In [17]:
ggplot_req = requests.get("https://api.github.com/repos/hadley/ggplot2/issues")

In [18]:
# create string of a list
ggplot_dics=ggplot_req.content.decode('utf-8')

In [19]:
# create dictionary
data = {"content":json.loads(ggplot_dics)}

In [20]:
# query using jsonpath expression
jsonpath_expression = parse('*..user.id')

for match in jsonpath_expression.find(data):
    print(f'User id: {match.value}')

User id: 30030254
User id: 49372158
User id: 3817450
User id: 7499666
User id: 31279658
User id: 5557093
User id: 9857320
User id: 1978793
User id: 4773225
User id: 64136872
User id: 4773225
User id: 13736321
User id: 43342160
User id: 4773225
User id: 21158052
User id: 1978793
User id: 11757474
User id: 15105152
User id: 4990832
User id: 60670870
User id: 6590141
User id: 1105215
User id: 38475991
User id: 14003618
User id: 15717151
User id: 1978793
User id: 2175487
User id: 2146002
User id: 4095204
User id: 4877315


In [21]:
jsonpath_expression = parse('$..author_association')

for match in jsonpath_expression.find(data):
    print(f'author_association: {match.value}')

author_association: NONE
author_association: CONTRIBUTOR
author_association: NONE
author_association: NONE
author_association: NONE
author_association: NONE
author_association: NONE
author_association: MEMBER
author_association: CONTRIBUTOR
author_association: NONE
author_association: CONTRIBUTOR
author_association: NONE
author_association: NONE
author_association: CONTRIBUTOR
author_association: NONE
author_association: MEMBER
author_association: NONE
author_association: NONE
author_association: CONTRIBUTOR
author_association: NONE
author_association: NONE
author_association: NONE
author_association: NONE
author_association: CONTRIBUTOR
author_association: NONE
author_association: MEMBER
author_association: CONTRIBUTOR
author_association: NONE
author_association: NONE
author_association: CONTRIBUTOR


In [22]:
data="""[{
  "employees": [
    {
      "id": 1,
      "name": "Pankaj",
      "salary": "10000"
    },
    {
      "name": "David",
      "salary": "5000",
      "id": 2
    }
  ]
}]"""
data = json.loads(data)

In [23]:
jsonpath_expression = parse('employees[*].[id,name]')

for match in jsonpath_expression.find(data[0]):
    if type(match.value) == int:
        print(f'Employee id: {match.value}')
    else:
        print(f'Employee name: {match.value}')

Employee id: 1
Employee name: Pankaj
Employee id: 2
Employee name: David


In [24]:
emp_ids = parse('employees[*].[id]')

for emp_id in emp_ids.find(data[0]):
    emp_names = parse(f'$..employees[?(@.id == {emp_id.value})].name')
    for emp_name in emp_names.find(data[0]):
        print(f'Employee {emp_id.value}: {emp_name.value}')

Employee 1: Pankaj
Employee 2: David


### Use this for help

http://jsonpath.com/