add: readonly optional argument to workaround lock issues in tests + …

…fix tests + bump v0.9.1 Signed-off-by: Stephen L. <LRQ3000@gmail.com>
lrq3000 · Aug 15, 2023 · e9dc323 · e9dc323
1 parent 85c9dce
commit e9dc323
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 21 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]  # beware if using setuptools: setup.py still gets executed, and even if pyproject.toml fields take precedence, if there is any code error in setup.py, building will fail!
 name = "fdict"
-version = "0.9.0"  # see PEP 440 https://peps.python.org/pep-0440/#pre-releases and https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
+version = "0.9.1"  # see PEP 440 https://peps.python.org/pep-0440/#pre-releases and https://packaging.python.org/en/latest/guides/single-sourcing-package-version/
 description = "Just like dict() but for out-of-core big data computing of recursive data structures in Python!"
 authors = [
     {name = "Stephen Karl Larroque", email = "lrq3000@gmail.com"},

diff --git a/src/fdict/fdict.py b/src/fdict/fdict.py
@@ -873,7 +873,8 @@ def __init__(self, *args, **kwargs):
         writeback : bool, optional
             Activates shelve writeback option. If False, only assignments
             will allow committing changes of leaf collections. See shelve
-            documentation.
+            documentation. Will be disabled if readonly is True (necessary otherwise
+            exception when calling close()).
             [default : True]
         forcedumbdbm : bool, optional
             Force the use of the Dumb DBM implementation to manage
@@ -882,6 +883,11 @@ def __init__(self, *args, **kwargs):
             can be found on your system). Dumb DBM should work on
             any platform, it is native to Python.
             [default : False]
+        readonly : bool, optional
+            Open the database as read-only. This is necessary for some tests
+            to check the internal state of a stored database beyond the autosync
+            process.
+            [default : False]
         Returns
         -------
         out  : dict-like object.
@@ -916,46 +922,54 @@ def __init__(self, *args, **kwargs):
         else:
             self.forcedumbdbm = False
 
+        # Do we open the database in read-only mode, or do we allow write permission (and create it if necessary = c mode)?
+        self.readonly = ('readonly' in kwargs)
+        if self.readonly:
+            dbflag = 'r'
+        else:
+            dbflag = 'c'
+
         # Initialize parent class
         super(sfdict, self).__init__(*args, **kwargs)
 
-        # Initialize the out-of-core shelve database file
+        # Initialize/create/reopen the out-of-core shelve database file
         if not self.rootpath: # If rootpath, this is an internal call, we just reuse the input dict
             # Else it is an external call, we reuse the provided dict but we make a copy and store in another file, or there is no provided dict and we create a new one
             try:
                 if self.forcedumbdbm:
                     # Force the use of dumb dbm even if slower
                     raise ImportError('pass')
-                d = shelve.open(filename=self.filename, flag='c', protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback)
+                d = shelve.open(filename=self.filename, flag=dbflag, protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback and (not self.readonly))
                 self.usedumbdbm = False
             except (ImportError, IOError) as exc:
                 if 'pass' in str(exc).lower() or '_bsddb' in str(exc).lower() or 'permission denied' in str(exc).lower():
                     # Pypy error, we workaround by using a fallback to anydbm: dumbdbm
                     if PY3:  # pragma: no cover
                         from dbm import dumb
-                        db = dumb.open(self.filename, 'c')
+                        db = dumb.open(self.filename, dbflag)
                     else:
                         import dumbdbm
-                        db = dumbdbm.open(self.filename, 'c')
+                        db = dumbdbm.open(self.filename, dbflag)
                     # Open the dumb db as a shelf
-                    d = shelve.Shelf(db, protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback)
+                    d = shelve.Shelf(db, protocol=PICKLE_HIGHEST_PROTOCOL, writeback=self.writeback and (not self.readonly))
                     self.usedumbdbm = True
                 else:  # pragma: no cover
                     raise
 
-            # Initialize the shelve with the internal dict preprocessed by the parent class fdict
+            # Initialize the shelve with the internal (in-memory) dict preprocessed by the parent class fdict
             d.update(self.d)
             # Then update self.d to use the shelve instead
             del self.d
             self.d = d
-            self.d.sync()
+            if not self.readonly:
+                self.d.sync()
 
         # Call compatibility layer
         self._viewkeys, self._viewvalues, self._viewitems = self._getitermethods(self.d)
 
     def __setitem__(self, key, value):
         super(sfdict, self).__setitem__(key, value)
-        if self.autosync:
+        if self.autosync and not self.readonly:
             # Commit pending changes everytime we set an item
             self.sync()
 
@@ -964,12 +978,13 @@ def get_filename(self):
 
     def sync(self):
         '''Commit pending changes to file'''
-        self.d.sync()
+        if not self.readonly:
+            self.d.sync()
 
     def close(self, delete=False):
         '''Commit pending changes to file and close it'''
         self.d.close()
-        if delete:
+        if delete and not self.readonly:
             try:
                 filename = self.get_filename()
                 if not self.usedumbdbm:

diff --git a/tests/test_fdict.py b/tests/test_fdict.py
@@ -893,30 +893,30 @@ def test_sfdict_forcedbm_filename():
 
 def test_sfdict_autosync():
     '''Test sfdict autosync'''
-    # With autosync, updating a nested object is saved to disk
+    ## TEST1: With autosync, updating a nested object is saved to disk
     g = sfdict(d={'a': {'b': set([1, 2])}}, autosync=True)
     assert 'shelve' in str(type(g.d)) or 'instance' in str(type(g.d))  # check the internal dict is a db shelve
     g['a']['b'].add(3)
     assert g['a/b'] == set([1, 2, 3])
     g['d'] = 4  # trigger the autosync on setitem
     assert g == {'a/b': set([1, 2, 3]), 'd': 4}
     filename = g.get_filename()
-    # Reload the same shelve before closing/syncing it
     g2 = g.to_dict()  # copy before close, to test later
     g.close()  # close database (without deleting), otherwise we cannot reopen it, there will be a lock
-
+    # Reload the same shelve before closing/syncing it
     h = sfdict(filename=filename)
     assert h == g2
     assert h['a/b'] == set([1, 2, 3])
     assert (h['a/b/c'] == 3) == False
     h.close(delete=True)
-    # Without autosync, the change is lost
+    ## TEST2: Without autosync, the change is lost
     g = sfdict(d={'a': {'b': set([1, 2])}}, autosync=False)
     g['a']['b'].add(3)
     assert g['a/b'] == set([1, 2, 3])
     g['d'] = 4
     filename = g.get_filename()
-    h = sfdict(filename=filename)
+    # The tricky part: reload same database to test it. This is to use an external observer that will bypass autosync and in-memory internal states of the fdict, to really check what is stored on-disk in the shelf.
+    h = sfdict(filename=filename, readonly=True)  # To avoid file lock on Linux and MacOS, we open in readonly mode.
     if not '__pypy__' in sys.builtin_module_names:
         # pypy seems to always commit the changes, even without sync!
         # also happens on Travis, I don't know why, maybe on some linuxes the commits are instantaneous?
@@ -925,11 +925,13 @@ def test_sfdict_autosync():
         except AssertionError:
             pass
     h.close()
+    # Now we sync the writeable database, to commit manually the changes
     g.sync()
-    h = sfdict(filename=filename)  # reopen after syncing g
-    assert h == {'a/b': set([1, 2, 3]), 'd': 4}
-    g.close()
-    h.close(delete=True)
+    # And we reopen, after syncing g, in readonly mode with another handle to check the content with an external observer
+    h = sfdict(filename=filename, readonly=True)
+    assert h == {'a/b': set([1, 2, 3]), 'd': 4}  # then we find the data is there!
+    h.close()
+    g.close(delete=True)
 
 def test_sfdict_writeback():
     '''Test sfdict writeback'''