In [1]:
from typing import Any, List, Union
import logging
import re
from pprint import pprint

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

class PathValue:
    """
    A class to represent a value from a path in a JSON object.
    """
    def __init__(self, value: Any, path: List[str] = None):
        self.value = value
        self.path = path

    def __repr__(self):
        return f"{self.__class__.__name__}(value={self.value}, path={self.path})"


In [2]:
def path_values(path: Union[str, List[str]], obj: Any) -> List[PathValue]:

    if isinstance(path, str):
        path = path.strip()
        tokens = re.findall(r'\*\*|\*|\[[^\]]+\]|[^.]+', path)

    elif isinstance(path, list):
        tokens = path

    if tokens[0] == '$':
        tokens = tokens[1:]

    return path_values_ast(obj, tokens, ['$'])


def path_values_ast(obj: Any,
                    tokens: List[str],
                    path: List[str]) -> List[PathValue]:
        if not tokens:
            return [PathValue(obj, path)]

        token = tokens[0]
        results = []

        if token == '**':
            results.extend(path_values_ast(obj, tokens[1:], path))

            if isinstance(obj, dict):
                for k, v in obj.items():
                    results.extend(path_values_ast(v, tokens, path+[k]))
            elif isinstance(obj, list):
                for i, item in enumerate(obj):
                    results.extend(path_values_ast(item, tokens, path+[[i]]))
        elif token == '*':
            if isinstance(obj, dict):
                for k, v in obj.items():
                    results.extend(path_values_ast(v, tokens[1:], path+[k]))
            elif isinstance(obj, list):
                for i, item in enumerate(obj):
                    results.extend(path_values_ast(item, tokens[1:], path+[[i]]))
        elif re.match(r'\[\d+\]', token):
            index = int(token.strip('[]'))
            if isinstance(obj, list) and 0 <= index < len(obj):
                results.extend(path_values_ast(obj[index], tokens[1:], path+[[index]]))
        elif isinstance(obj, dict) and token in obj:
            results.extend(path_values_ast(obj[token], tokens[1:], path+[token]))

        return results


In [3]:
data1 = {
    "users": [
        {
            "name": "Alice",
            "roles": ["admin", "user"],
            "profile": {
                "age": 30,
                "emails": ["alice@example.com", "alice.work@example.com"]
            }
        },
        {
            "name": "Bob",
            "roles": ["user"],
            "profile": {
                "age": 25,
                "emails": ["bob@example.com"]
            }
        },
        {
            "name": "Charlie",
            "roles": ["guest"],
            "profile": {
                "age": 35
            }
        }
    ],
    "settings": {
        "theme": "dark",
        "notifications": True
    }
}


data2 = {
    "users2": [
        {
            "name": "Alice2",
            "roles": ["user"],
            "profile": {
                "age": 30,
                "emails": ["alice2@example.com", "alice.work2@example.com"]
            }
        },
        {
            "name": "Bob2",
            "roles": ["admin", "user"],
            "profile": {
                "age": 35,
                "emails": ["bob2@example.com"]
            }
        },
        {
            "name": "Charlie2",
            "roles": ["guest"],
            "profile": {
                "age": 45
            }
        },
        {
            "name": "David",
            "roles": ["user"],
            "profile": {
                "age": 40
            }
        }
    ],
    "settings": {
        "theme": "dark",
        "notifications": True
    }
}

data = [data1, data2]

In [4]:
data

[{'users': [{'name': 'Alice',
    'roles': ['admin', 'user'],
    'profile': {'age': 30,
     'emails': ['alice@example.com', 'alice.work@example.com']}},
   {'name': 'Bob',
    'roles': ['user'],
    'profile': {'age': 25, 'emails': ['bob@example.com']}},
   {'name': 'Charlie', 'roles': ['guest'], 'profile': {'age': 35}}],
  'settings': {'theme': 'dark', 'notifications': True}},
 {'users2': [{'name': 'Alice2',
    'roles': ['user'],
    'profile': {'age': 30,
     'emails': ['alice2@example.com', 'alice.work2@example.com']}},
   {'name': 'Bob2',
    'roles': ['admin', 'user'],
    'profile': {'age': 35, 'emails': ['bob2@example.com']}},
   {'name': 'Charlie2', 'roles': ['guest'], 'profile': {'age': 45}},
   {'name': 'David', 'roles': ['user'], 'profile': {'age': 40}}],
  'settings': {'theme': 'dark', 'notifications': True}}]

In [5]:
names = path_values("*.users.*.name", data)
pprint(names)

[PathValue(value=Alice, path=['$', [0], 'users', [0], 'name']),
 PathValue(value=Bob, path=['$', [0], 'users', [1], 'name']),
 PathValue(value=Charlie, path=['$', [0], 'users', [2], 'name'])]


In [6]:
emails = path_values("**.emails", data)
pprint(emails)

[PathValue(value=['alice@example.com', 'alice.work@example.com'], path=['$', [0], 'users', [0], 'profile', 'emails']),
 PathValue(value=['bob@example.com'], path=['$', [0], 'users', [1], 'profile', 'emails']),
 PathValue(value=['alice2@example.com', 'alice.work2@example.com'], path=['$', [1], 'users2', [0], 'profile', 'emails']),
 PathValue(value=['bob2@example.com'], path=['$', [1], 'users2', [1], 'profile', 'emails'])]


In [7]:
data3 = {
   'name': 'Alice',
   'roles': ['admin', 'user'],
   'test': {
      'test2': {
         'test3': {
            'emails': ['alicedeep@example.com', ['bobdeep@test.com', 'joedeep@test.com']]
         }
      }
   },
   'profile': {
      'age': 30,
      'emails': ['alice@example.com', 'alice.work@example.com']
   }
}

In [8]:
data3

{'name': 'Alice',
 'roles': ['admin', 'user'],
 'test': {'test2': {'test3': {'emails': ['alicedeep@example.com',
     ['bobdeep@test.com', 'joedeep@test.com']]}}},
 'profile': {'age': 30,
  'emails': ['alice@example.com', 'alice.work@example.com']}}

In [9]:
emails2 = path_values("profile.emails", data3)
pprint(emails2)

[PathValue(value=['alice@example.com', 'alice.work@example.com'], path=['$', 'profile', 'emails'])]


In [10]:
emails3 = path_values("**.emails", data3)
pprint(emails3)

[PathValue(value=['alicedeep@example.com', ['bobdeep@test.com', 'joedeep@test.com']], path=['$', 'test', 'test2', 'test3', 'emails']),
 PathValue(value=['alice@example.com', 'alice.work@example.com'], path=['$', 'profile', 'emails'])]


In [11]:
emails4 = path_values("profile.emails.[0]", data3)
pprint(emails4)

[PathValue(value=alice@example.com, path=['$', 'profile', 'emails', [0]])]
