From 39388d612da5dd7a7408ad34634c9b25fbe17b9c Mon Sep 17 00:00:00 2001 From: "David L. Qiu" Date: Mon, 24 Oct 2022 20:21:10 +0000 Subject: [PATCH 1/2] implement autosync_interval trait --- jupyter_server_fileid/manager.py | 44 ++++++++++++++++++++++++-------- tests/test_manager.py | 38 +++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/jupyter_server_fileid/manager.py b/jupyter_server_fileid/manager.py index a1c5129..dba52c6 100644 --- a/jupyter_server_fileid/manager.py +++ b/jupyter_server_fileid/manager.py @@ -1,10 +1,11 @@ import os import sqlite3 import stat +import time from typing import Optional from jupyter_core.paths import jupyter_data_dir -from traitlets import TraitError, Unicode, validate +from traitlets import Int, TraitError, Unicode, validate from traitlets.config.configurable import LoggingConfigurable @@ -65,11 +66,23 @@ class FileIdManager(LoggingConfigurable): config=True, ) + autosync_interval = Int( + default_value=5, + help=( + "How often to automatically invoke `sync_all()` when calling `get_path()`, in seconds. " + "Set to 0 to force `sync_all()` to always be called when calling `get_path()`. " + "Set to a negative value to disable autosync for `get_path()`." + "Defaults to 5." + ), + config=True, + ) + def __init__(self, *args, **kwargs): # pass args and kwargs to parent Configurable super().__init__(*args, **kwargs) # initialize instance attrs self._update_cursor = False + self._last_sync = 0.0 # initialize connection with db self.log.info(f"FileIdManager : Configured root dir: {self.root_dir}") self.log.info(f"FileIdManager : Configured database path: {self.db_path}") @@ -140,6 +153,7 @@ def _sync_all(self): _sync_file(). Hence the cursor needs to be redefined if self._update_cursor is set to True by _sync_file(). """ + now = time.time() cursor = self.con.execute("SELECT path, mtime FROM Files WHERE is_dir = 1") self._update_cursor = False dir = cursor.fetchone() @@ -170,6 +184,8 @@ def _sync_all(self): dir = cursor.fetchone() + self._last_sync = now + def _sync_dir(self, dir_path): """ Syncs the contents of a directory. If a child directory is dirty because @@ -361,8 +377,7 @@ def _update(self, id, stat_info=None, path=None): def sync_all(self): """ Syncs Files table with the filesystem and ensures that the correct path - is associated with each file ID. Required to be called manually - beforehand if `sync=False` is passed to `get_path()`. + is associated with each file ID. Notes ----- @@ -408,7 +423,7 @@ def get_id(self, path): self.con.commit() return id - def get_path(self, id, sync=True): + def get_path(self, id, autosync=True): """Retrieves the file path associated with a file ID. The file path is relative to `self.root_dir`. Returns None if the ID does not exist in the Files table, if the path no longer has a @@ -416,17 +431,24 @@ def get_path(self, id, sync=True): Parameters ---------- - sync : bool - Whether to invoke `sync_all()` automatically prior to ID lookup. + autosync : bool + Whether to invoke `sync_all()` automatically based on + `autosync_interval` prior to ID lookup. Defaults to `True`. Notes ----- - - For performance reasons, if this method must be called multiple times - in serial, then it is best to first invoke `sync_all()` and then call - this method with the argument `sync=False`. + - To force syncing when calling `get_path()`, call `sync_all()` manually + prior to calling `get_path()`. + + - To force not syncing when calling `get_path()`, call `get_path()` with + `autosync=False`. """ - if sync: - self.sync_all() + if ( + autosync + and self.autosync_interval >= 0 + and (time.time() - self._last_sync) >= self.autosync_interval + ): + self._sync_all() row = self.con.execute("SELECT path, ino FROM Files WHERE id = ?", (id,)).fetchone() diff --git a/tests/test_manager.py b/tests/test_manager.py index 9de3d0d..0ed57f1 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -1,4 +1,5 @@ import os +from unittest.mock import patch import pytest from traitlets import TraitError @@ -275,6 +276,7 @@ def test_get_path_oob_move_back_to_original_path(fid_manager, old_path, new_path assert fid_manager.get_path(id) == new_path fs_helpers.move(new_path, old_path) + fid_manager.sync_all() assert fid_manager.get_path(id) == old_path @@ -430,3 +432,39 @@ def test_save(fid_manager, test_path, fs_helpers): fid_manager.save(test_path) assert fid_manager.get_id(test_path) == id + + +def test_autosync_gt_0(fid_manager, old_path, new_path, fs_helpers): + fid_manager.autosync_interval = 10 + id = fid_manager.index(old_path) + fid_manager.sync_all() + fs_helpers.move(old_path, new_path) + + assert fid_manager.get_path(id) != new_path + with patch("time.time") as mock_time: + mock_time.return_value = fid_manager._last_sync + 999 + assert fid_manager.get_path(id) == new_path + + +def test_autosync_eq_0(fid_manager, old_path, new_path, fs_helpers): + fid_manager.autosync_interval = 0 + id = fid_manager.index(old_path) + fid_manager.sync_all() + fs_helpers.move(old_path, new_path) + + assert fid_manager.get_path(id) == new_path + + +def test_autosync_lt_0(fid_manager, old_path, new_path, fs_helpers): + fid_manager.autosync_interval = -10 + id = fid_manager.index(old_path) + fid_manager.sync_all() + fs_helpers.move(old_path, new_path) + + assert fid_manager.get_path(id) != new_path + with patch("time.time") as mock_time: + mock_time.return_value = fid_manager._last_sync + 999 + assert fid_manager.get_path(id) != new_path + + fid_manager.sync_all() + assert fid_manager.get_path(id) == new_path From ed22f890b84fa5106f9d28e55307b9454526c3d7 Mon Sep 17 00:00:00 2001 From: "David L. Qiu" Date: Tue, 25 Oct 2022 00:17:01 +0000 Subject: [PATCH 2/2] remove autosync arg --- jupyter_server_fileid/manager.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/jupyter_server_fileid/manager.py b/jupyter_server_fileid/manager.py index dba52c6..172ebec 100644 --- a/jupyter_server_fileid/manager.py +++ b/jupyter_server_fileid/manager.py @@ -423,29 +423,19 @@ def get_id(self, path): self.con.commit() return id - def get_path(self, id, autosync=True): + def get_path(self, id): """Retrieves the file path associated with a file ID. The file path is relative to `self.root_dir`. Returns None if the ID does not exist in the Files table, if the path no longer has a file, or if the path is not a child of `self.root_dir`. - Parameters - ---------- - autosync : bool - Whether to invoke `sync_all()` automatically based on - `autosync_interval` prior to ID lookup. Defaults to `True`. - Notes ----- - To force syncing when calling `get_path()`, call `sync_all()` manually prior to calling `get_path()`. - - - To force not syncing when calling `get_path()`, call `get_path()` with - `autosync=False`. """ if ( - autosync - and self.autosync_interval >= 0 + self.autosync_interval >= 0 and (time.time() - self._last_sync) >= self.autosync_interval ): self._sync_all()