In [5]:
!uv pip install joblib numpy diskcache

[2mUsing Python 3.11.11 environment at: /Users/lukastk/dev/20260113_w3pmcj__netrun2/netrun/.venv[0m
[2K[2mResolved [1m3 packages[0m [2min 50ms[0m[0m                                          [0m
[2K[2mInstalled [1m1 package[0m [2min 4ms[0m[0m                                  [0m
 [32m+[39m [1mdiskcache[0m[2m==5.6.3[0m


In [4]:
# from joblib import Memory
# location = 'your_cache_dir_goes_here'
# mem = Memory(location, verbose=1)
# import numpy as np
# a = np.vander(np.arange(3)).astype(float)
# square = mem.cache(np.square)
# b = square(a)

________________________________________________________________________________
[Memory] Calling numpy.square...
square(array([[0., 0., 1.],
       [1., 1., 1.],
       [4., 2., 1.]]))
___________________________________________________________square - 0.0s, 0.0min


> The default diskcache.Disk serialization uses pickling for both keys and values. **Unfortunately, pickling produces inconsistencies sometimes when applied to container data types like tuples. Two equal tuples may serialize to different bytes objects using pickle.** The likelihood of differences is reduced by using pickletools.optimize but still inconsistencies occur (#54). The inconsistent serialized pickle values is particularly problematic when applied to the key in the cache. **Consider using an alternative Disk type, like JSONDisk, for consistent serialization of keys.**

[Source](https://grantjenks.com/docs/diskcache/tutorial.html#caveats)

In [34]:
import diskcache
import tempfile

temp_dir = tempfile.mkdtemp()

cache = diskcache.Cache(disk=diskcache.JSONDisk, disk_compress_level=6, directory=temp_dir)

In [44]:
from pydantic import BaseModel

class MyModel(BaseModel):
    foo: str
    bar: dict

my_data = MyModel(foo='bar', bar={'baz': 'qux', 'quux': object()})

cache['my_data'] = my_data

TypeError: Object of type MyModel is not JSON serializable

In [47]:
import diskcache
import tempfile

temp_dir = tempfile.mkdtemp()

cache = diskcache.Cache(directory=temp_dir)

from pydantic import BaseModel

class MyModel(BaseModel):
    foo: str
    bar: dict

my_data = MyModel(foo='bar', bar={'baz': 'qux', 'quux': object()})

cache['my_data'] = my_data

In [49]:
cache[(my_data, MyModel)] = 123

cache[(my_data, MyModel)] 

123

In [50]:
%timeit (my_data, MyModel) in cache

43.6 μs ± 468 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [51]:
dir(diskcache.Cache)

['__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__setitem__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_con',
 '_cull',
 '_iter',
 '_row_insert',
 '_row_update',
 '_select_delete',
 '_sql',
 '_sql_retry',
 '_transact',
 'add',
 'check',
 'clear',
 'close',
 'create_tag_index',
 'cull',
 'decr',
 'delete',
 'directory',
 'disk',
 'drop_tag_index',
 'evict',
 'expire',
 'get',
 'incr',
 'iterkeys',
 'memoize',
 'peek',
 'peekitem',
 'pop',
 'pull',
 'push',
 'read',
 'reset',
 'set',
 'stats',
 'timeout',
 'touch',
 'transact',
 'volume']

In [61]:
from diskcache import Index

ind = Index()

ind[(my_data, MyModel)] = 123

ind[(my_data, MyModel)]

123

In [68]:
cache.disk.hash((my_data, MyModel))

3745006623

In [98]:
cache.disk.hash

<bound method Disk.hash of <diskcache.core.Disk object at 0x113055c10>>

In [55]:
diskcache.Cache

diskcache.core.Cache

In [91]:
disk_key, _ = cache.disk.put("asd")
disk_key

'asd'

In [94]:
json_disk = diskcache.JSONDisk(temp_dir)

json_disk.put("asd")[0]

<memory at 0x1135b8580>

In [95]:
cache.disk.put("asd")[0]

'asd'

In [111]:
disk_key, _ = cache.disk.put((my_data, MyModel))

import zlib
mask = 0xFFFFFFFF
zlib.adler32(disk_key) & mask

3745006623

In [118]:
import struct

zlib.adler32(struct.pack('!d', 0.1)) & mask

329057424

In [104]:
import zlib

def hash(self, key, disk: diskcache.Disk):
    """Compute portable hash for `key`.

    :param key: key to hash
    :return: hash value

    """
    mask = 0xFFFFFFFF
    disk_key, _ = disk.put(key)
    type_disk_key = type(disk_key)

    if type_disk_key is sqlite3.Binary:
        return zlib.adler32(disk_key) & mask
    elif type_disk_key is str:
        return zlib.adler32(disk_key.encode('utf-8')) & mask  # noqa
    elif type_disk_key is int:
        return disk_key % mask
    else:
        assert type_disk_key is float
        return zlib.adler32(struct.pack('!d', disk_key)) & mask