Skip to content

Commit

Permalink
add: readonly optional argument to workaround lock issues in tests + …
Browse files Browse the repository at this point in the history
…fix tests + bump v0.9.1

Signed-off-by: Stephen L. <LRQ3000@gmail.com>
  • Loading branch information
lrq3000 committed Aug 15, 2023
1 parent 85c9dce commit e9dc323
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project] # beware if using setuptools: setup.py still gets executed, and even if pyproject.toml fields take precedence, if there is any code error in setup.py, building will fail!
name = "fdict"
version = "0.9.0" # see PEP 440 https://peps.python.org/pep-0440/#pre-releases and https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
version = "0.9.1" # see PEP 440 https://peps.python.org/pep-0440/#pre-releases and https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
description = "Just like dict() but for out-of-core big data computing of recursive data structures in Python!"
authors = [
{name = "Stephen Karl Larroque", email = "lrq3000@gmail.com"},
Expand Down
37 changes: 26 additions & 11 deletions src/fdict/fdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,8 @@ def __init__(self, *args, **kwargs):
writeback : bool, optional
Activates shelve writeback option. If False, only assignments
will allow committing changes of leaf collections. See shelve
documentation.
documentation. Will be disabled if readonly is True (necessary otherwise
exception when calling close()).
[default : True]
forcedumbdbm : bool, optional
Force the use of the Dumb DBM implementation to manage
Expand All @@ -882,6 +883,11 @@ def __init__(self, *args, **kwargs):
can be found on your system). Dumb DBM should work on
any platform, it is native to Python.
[default : False]
readonly : bool, optional
Open the database as read-only. This is necessary for some tests
to check the internal state of a stored database beyond the autosync
process.
[default : False]
Returns
-------
out : dict-like object.
Expand Down Expand Up @@ -916,46 +922,54 @@ def __init__(self, *args, **kwargs):
else:
self.forcedumbdbm = False

# Do we open the database in read-only mode, or do we allow write permission (and create it if necessary = c mode)?
self.readonly = ('readonly' in kwargs)
if self.readonly:
dbflag = 'r'
else:
dbflag = 'c'

# Initialize parent class
super(sfdict, self).__init__(*args, **kwargs)

# Initialize the out-of-core shelve database file
# Initialize/create/reopen the out-of-core shelve database file
if not self.rootpath: # If rootpath, this is an internal call, we just reuse the input dict
# Else it is an external call, we reuse the provided dict but we make a copy and store in another file, or there is no provided dict and we create a new one
try:
if self.forcedumbdbm:
# Force the use of dumb dbm even if slower
raise ImportError('pass')
d = shelve.open(filename=self.filename, flag='c', protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback)
d = shelve.open(filename=self.filename, flag=dbflag, protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback and (not self.readonly))
self.usedumbdbm = False
except (ImportError, IOError) as exc:
if 'pass' in str(exc).lower() or '_bsddb' in str(exc).lower() or 'permission denied' in str(exc).lower():
# Pypy error, we workaround by using a fallback to anydbm: dumbdbm
if PY3: # pragma: no cover
from dbm import dumb
db = dumb.open(self.filename, 'c')
db = dumb.open(self.filename, dbflag)
else:
import dumbdbm
db = dumbdbm.open(self.filename, 'c')
db = dumbdbm.open(self.filename, dbflag)
# Open the dumb db as a shelf
d = shelve.Shelf(db, protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback)
d = shelve.Shelf(db, protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback and (not self.readonly))
self.usedumbdbm = True
else: # pragma: no cover
raise

# Initialize the shelve with the internal dict preprocessed by the parent class fdict
# Initialize the shelve with the internal (in-memory) dict preprocessed by the parent class fdict
d.update(self.d)
# Then update self.d to use the shelve instead
del self.d
self.d = d
self.d.sync()
if not self.readonly:
self.d.sync()

# Call compatibility layer
self._viewkeys, self._viewvalues, self._viewitems = self._getitermethods(self.d)

def __setitem__(self, key, value):
super(sfdict, self).__setitem__(key, value)
if self.autosync:
if self.autosync and not self.readonly:
# Commit pending changes everytime we set an item
self.sync()

Expand All @@ -964,12 +978,13 @@ def get_filename(self):

def sync(self):
'''Commit pending changes to file'''
self.d.sync()
if not self.readonly:
self.d.sync()

def close(self, delete=False):
'''Commit pending changes to file and close it'''
self.d.close()
if delete:
if delete and not self.readonly:
try:
filename = self.get_filename()
if not self.usedumbdbm:
Expand Down
20 changes: 11 additions & 9 deletions tests/test_fdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,30 +893,30 @@ def test_sfdict_forcedbm_filename():

def test_sfdict_autosync():
'''Test sfdict autosync'''
# With autosync, updating a nested object is saved to disk
## TEST1: With autosync, updating a nested object is saved to disk
g = sfdict(d={'a': {'b': set([1, 2])}}, autosync=True)
assert 'shelve' in str(type(g.d)) or 'instance' in str(type(g.d)) # check the internal dict is a db shelve
g['a']['b'].add(3)
assert g['a/b'] == set([1, 2, 3])
g['d'] = 4 # trigger the autosync on setitem
assert g == {'a/b': set([1, 2, 3]), 'd': 4}
filename = g.get_filename()
# Reload the same shelve before closing/syncing it
g2 = g.to_dict() # copy before close, to test later
g.close() # close database (without deleting), otherwise we cannot reopen it, there will be a lock

# Reload the same shelve before closing/syncing it
h = sfdict(filename=filename)
assert h == g2
assert h['a/b'] == set([1, 2, 3])
assert (h['a/b/c'] == 3) == False
h.close(delete=True)
# Without autosync, the change is lost
## TEST2: Without autosync, the change is lost
g = sfdict(d={'a': {'b': set([1, 2])}}, autosync=False)
g['a']['b'].add(3)
assert g['a/b'] == set([1, 2, 3])
g['d'] = 4
filename = g.get_filename()
h = sfdict(filename=filename)
# The tricky part: reload same database to test it. This is to use an external observer that will bypass autosync and in-memory internal states of the fdict, to really check what is stored on-disk in the shelf.
h = sfdict(filename=filename, readonly=True) # To avoid file lock on Linux and MacOS, we open in readonly mode.
if not '__pypy__' in sys.builtin_module_names:
# pypy seems to always commit the changes, even without sync!
# also happens on Travis, I don't know why, maybe on some linuxes the commits are instantaneous?
Expand All @@ -925,11 +925,13 @@ def test_sfdict_autosync():
except AssertionError:
pass
h.close()
# Now we sync the writeable database, to commit manually the changes
g.sync()
h = sfdict(filename=filename) # reopen after syncing g
assert h == {'a/b': set([1, 2, 3]), 'd': 4}
g.close()
h.close(delete=True)
# And we reopen, after syncing g, in readonly mode with another handle to check the content with an external observer
h = sfdict(filename=filename, readonly=True)
assert h == {'a/b': set([1, 2, 3]), 'd': 4} # then we find the data is there!
h.close()
g.close(delete=True)

def test_sfdict_writeback():
'''Test sfdict writeback'''
Expand Down

0 comments on commit e9dc323

Please sign in to comment.