Skip to content

Conversation

@nineteendo
Copy link
Contributor

@nineteendo nineteendo commented Apr 1, 2025

Benchmark:

script
#mysys.py
import sys

def _getsizeof(obj, seen=None):
    if (obj_id := id(obj)) in seen:
        return 0
    size = sys.getsizeof(obj)
    seen.add(obj_id)
    if isinstance(obj, dict):
        size += sum([_getsizeof(k, seen) + _getsizeof(v, seen) for k, v in obj.items()])
    elif isinstance(obj, list):
        size += sum(_getsizeof(v, seen) for v in obj)
    return size

def getsizeof(obj):
    size = _getsizeof(obj, set())
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024:
            break
        size /= 1024
    return f"{size:.2f} {unit}"
::json_cache_keys.bat
@echo off
echo 100 repeated keys
call main\python            -m timeit -s "import json; s = json.dumps([{'abc': 0}] * 100)" "json.loads(s)"
call json_cache_keys\python -m timeit -s "import json; s = json.dumps([{'abc': 0}] * 100)" "json.loads(s, cache_keys=False)"
echo 10,000 repeated keys
call main\python            -m timeit -s "import json; s = json.dumps([{'abc': 0}] * 10_000)" "json.loads(s)"
call json_cache_keys\python -m timeit -s "import json; s = json.dumps([{'abc': 0}] * 10_000)" "json.loads(s, cache_keys=False)"
echo 1,000,000 repeated keys
call main\python            -m timeit -s "import json; s = json.dumps([{'abc': 0}] * 1_000_000)" "json.loads(s)"
call json_cache_keys\python -m timeit -s "import json; s = json.dumps([{'abc': 0}] * 1_000_000)" "json.loads(s, cache_keys=False)"

echo 100 different keys
call main\python            -m timeit -s "import json; s = json.dumps([{str(i): 0} for i in range(100)])" "json.loads(s)"
call json_cache_keys\python -m timeit -s "import json; s = json.dumps([{str(i): 0} for i in range(100)])" "json.loads(s, cache_keys=False)"
echo 10,000 different keys
call main\python            -m timeit -s "import json; s = json.dumps([{str(i): 0} for i in range(10_000)])" "json.loads(s)"
call json_cache_keys\python -m timeit -s "import json; s = json.dumps([{str(i): 0} for i in range(10_000)])" "json.loads(s, cache_keys=False)"
echo 1,000,000 different keys
call main\python            -m timeit -s "import json; s = json.dumps([{str(i): 0} for i in range(1_000_000)])" "json.loads(s)"
call json_cache_keys\python -m timeit -s "import json; s = json.dumps([{str(i): 0} for i in range(1_000_000)])" "json.loads(s, cache_keys=False)"

echo ### memory ###

echo 100 repeated keys
call main\python            -c "import json, mysys; print(mysys.getsizeof(json.loads(json.dumps([{'abc': 0}] * 100))))"
call json_cache_keys\python -c "import json, mysys; print(mysys.getsizeof(json.loads(json.dumps([{'abc': 0}] * 100), cache_keys=False)))"
echo 10,000 repeated keys
call main\python            -c "import json, mysys; print(mysys.getsizeof(json.loads(json.dumps([{'abc': 0}] * 10_000))))"
call json_cache_keys\python -c "import json, mysys; print(mysys.getsizeof(json.loads(json.dumps([{'abc': 0}] * 10_000), cache_keys=False)))"
echo 1,000,000 repeated keys
call main\python            -c "import json, mysys; print(mysys.getsizeof(json.loads(json.dumps([{'abc': 0}] * 1_000_000))))"
call json_cache_keys\python -c "import json, mysys; print(mysys.getsizeof(json.loads(json.dumps([{'abc': 0}] * 1_000_000), cache_keys=False)))"
100 repeated keys
10000 loops, best of 5: 25.5 usec per loop # before
10000 loops, best of 5: 26.3 usec per loop # after
# -> 1.03x faster (no difference)
10,000 repeated keys
100 loops, best of 5: 2.56 msec per loop # before
100 loops, best of 5: 2.3 msec per loop # after
# -> 1.11x faster
1,000,000 repeated keys
1 loop, best of 5: 359 msec per loop # before
1 loop, best of 5: 385 msec per loop # after
# -> 1.07x SLOWER

100 different keys
10000 loops, best of 5: 27.8 usec per loop # before
10000 loops, best of 5: 26.4 usec per loop # after
# -> 1.05x faster
10,000 different keys
100 loops, best of 5: 2.93 msec per loop # before
100 loops, best of 5: 2.33 msec per loop # after
# -> 1.26x faster
1,000,000 different keys
1 loop, best of 5: 676 msec per loop # before
1 loop, best of 5: 392 msec per loop # after
# -> 1.72x faster

### memory ###

100 repeated keys
18.94 KB # before
23.19 KB # after
# -> 1.22x more memory
10,000 repeated keys
1.84 MB # before
2.26 MB # after
# -> 1.23x more memory
1,000,000 repeated keys
183.53 MB # before
225.50 MB # after
# -> 1.23x more memory

@ghost
Copy link

ghost commented Apr 1, 2025

All commit authors signed the Contributor License Agreement.
CLA signed

Comment on lines +365 to +366
if not cache_keys:
kw['cache_keys'] = cache_keys
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like there are no tests to check if new parameters aren't passed.

@nineteendo nineteendo closed this Apr 1, 2025
@nineteendo nineteendo deleted the json_cache_keys branch April 1, 2025 18:10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant