Skip to content

Commit

Permalink
Lazy-load the watchdog event watcher module (#8111)
Browse files Browse the repository at this point in the history
## Describe your changes

If a user configures `server.fileWatcherType` to none or poll, there
isn't any reason we need to import the watchdog package, even if it is
installed on the system. This PR applies a couple of small refactorings
to make the `event_based_path_watcher` lazy-loaded (only importer if
actually needed).

## GitHub Issue Link (if applicable)

Related to #6066

## Testing Plan

- Added e2e test to make sure that `watchdog` and
`streamlit.watcher.event_based_path_watcher` are lazy loaded.

---

**Contribution License Agreement**

By submitting this pull request you agree that all contributions to this
project are made under the Apache 2.0 license.
  • Loading branch information
LukasMasuch committed Feb 8, 2024
1 parent c4f0967 commit 9375977
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 75 deletions.
2 changes: 2 additions & 0 deletions e2e_playwright/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ def app_server(
str(app_port),
"--browser.gatherUsageStats",
"false",
"--server.fileWatcherType",
"none",
],
cwd=".",
)
Expand Down
3 changes: 2 additions & 1 deletion e2e_playwright/lazy_loaded_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@
"pydeck",
"altair",
"graphviz",
"watchdog",
"streamlit.emojis",
"streamlit.external",
"streamlit.watcher.event_based_path_watcher",
# TODO(lukasmasuch): Lazy load more packages:
# "streamlit.hello",
# "streamlit.vendor.pympler",
# "streamlit.watcher.event_based_path_watcher",
# "pandas",
# "pyarrow",
# "numpy",
Expand Down
2 changes: 1 addition & 1 deletion e2e_playwright/lazy_loaded_modules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
def test_lazy_loaded_modules_are_not_imported(app: Page):
"""Test that lazy loaded modules are not imported when the page is loaded."""
markdown_elements = app.get_by_test_id("stMarkdown")
expect(markdown_elements).to_have_count(8)
expect(markdown_elements).to_have_count(10)
for element in markdown_elements.all():
expect(element).to_have_text(re.compile(r".*not loaded.*"))
47 changes: 26 additions & 21 deletions lib/streamlit/watcher/event_based_path_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,14 @@
listens to folder events, sees if registered paths changed, and fires
callbacks if so.
This module is lazy-loaded and used only if watchdog is installed.
"""

from __future__ import annotations

import os
import threading
from typing import Callable, Dict, Optional, cast
from typing import Callable, Dict, Final, cast

from blinker import ANY, Signal
from watchdog import events
Expand All @@ -47,7 +50,7 @@
from streamlit.util import repr_
from streamlit.watcher import util

LOGGER = get_logger(__name__)
_LOGGER: Final = get_logger(__name__)


class EventBasedPathWatcher:
Expand All @@ -58,14 +61,14 @@ def close_all() -> None:
"""Close the _MultiPathWatcher singleton."""
path_watcher = _MultiPathWatcher.get_singleton()
path_watcher.close()
LOGGER.debug("Watcher closed")
_LOGGER.debug("Watcher closed")

def __init__(
self,
path: str,
on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Constructor for EventBasedPathWatchers.
Expand All @@ -76,7 +79,7 @@ def __init__(
The path to watch.
on_changed : Callable[[str], None]
Callback to call when the path changes.
glob_pattern : Optional[str]
glob_pattern : str or None
A glob pattern to filter the files in a directory that should be
watched. Only relevant when creating an EventBasedPathWatcher on a
directory.
Expand All @@ -95,7 +98,7 @@ def __init__(
glob_pattern=glob_pattern,
allow_nonexistent=allow_nonexistent,
)
LOGGER.debug("Watcher created for %s", self._path)
_LOGGER.debug("Watcher created for %s", self._path)

def __repr__(self) -> str:
return repr_(self)
Expand All @@ -109,7 +112,7 @@ def close(self) -> None:
class _MultiPathWatcher(object):
"""Watches multiple paths."""

_singleton: Optional["_MultiPathWatcher"] = None
_singleton: "_MultiPathWatcher" | None = None

@classmethod
def get_singleton(cls) -> "_MultiPathWatcher":
Expand All @@ -118,7 +121,7 @@ def get_singleton(cls) -> "_MultiPathWatcher":
Instantiates one if necessary.
"""
if cls._singleton is None:
LOGGER.debug("No singleton. Registering one.")
_LOGGER.debug("No singleton. Registering one.")
_MultiPathWatcher()

return cast("_MultiPathWatcher", _MultiPathWatcher._singleton)
Expand Down Expand Up @@ -154,7 +157,7 @@ def watch_path(
path: str,
callback: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Start watching a path."""
Expand Down Expand Up @@ -186,7 +189,7 @@ def stop_watching_path(self, path: str, callback: Callable[[str], None]) -> None
folder_handler = self._folder_handlers.get(folder_path)

if folder_handler is None:
LOGGER.debug(
_LOGGER.debug(
"Cannot stop watching path, because it is already not being "
"watched. %s",
folder_path,
Expand All @@ -204,12 +207,12 @@ def close(self) -> None:
"""Close this _MultiPathWatcher object forever."""
if len(self._folder_handlers) != 0:
self._folder_handlers = {}
LOGGER.debug(
_LOGGER.debug(
"Stopping observer thread even though there is a non-zero "
"number of event observers!"
)
else:
LOGGER.debug("Stopping observer thread")
_LOGGER.debug("Stopping observer thread")

self._observer.stop()
self._observer.join(timeout=5)
Expand All @@ -223,7 +226,7 @@ def __init__(
md5: str,
modification_time: float,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
):
self.md5 = md5
Expand Down Expand Up @@ -254,7 +257,7 @@ def __init__(self) -> None:
super(_FolderEventHandler, self).__init__()
self._watched_paths: Dict[str, WatchedPath] = {}
self._lock = threading.Lock() # for watched_paths mutations
self.watch: Optional[ObservedWatch] = None
self.watch: ObservedWatch | None = None

def __repr__(self) -> str:
return repr_(self)
Expand All @@ -264,7 +267,7 @@ def add_path_change_listener(
path: str,
callback: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> None:
"""Add a path to this object's event filter."""
Expand Down Expand Up @@ -320,22 +323,24 @@ def handle_path_change_event(self, event: events.FileSystemEvent) -> None:
# the desired subtype from the event_type check
event = cast(events.FileSystemMovedEvent, event)

LOGGER.debug("Move event: src %s; dest %s", event.src_path, event.dest_path)
_LOGGER.debug(
"Move event: src %s; dest %s", event.src_path, event.dest_path
)
changed_path = event.dest_path
# On OSX with VI, on save, the file is deleted, the swap file is
# modified and then the original file is created hence why we
# capture EVENT_TYPE_CREATED
elif event.event_type == events.EVENT_TYPE_CREATED:
changed_path = event.src_path
else:
LOGGER.debug("Don't care about event type %s", event.event_type)
_LOGGER.debug("Don't care about event type %s", event.event_type)
return

changed_path = os.path.abspath(changed_path)

changed_path_info = self._watched_paths.get(changed_path, None)
if changed_path_info is None:
LOGGER.debug(
_LOGGER.debug(
"Ignoring changed path %s.\nWatched_paths: %s",
changed_path,
self._watched_paths,
Expand All @@ -352,7 +357,7 @@ def handle_path_change_event(self, event: events.FileSystemEvent) -> None:
modification_time != 0.0
and modification_time == changed_path_info.modification_time
):
LOGGER.debug("File/dir timestamp did not change: %s", changed_path)
_LOGGER.debug("File/dir timestamp did not change: %s", changed_path)
return

changed_path_info.modification_time = modification_time
Expand All @@ -363,10 +368,10 @@ def handle_path_change_event(self, event: events.FileSystemEvent) -> None:
allow_nonexistent=changed_path_info.allow_nonexistent,
)
if new_md5 == changed_path_info.md5:
LOGGER.debug("File/dir MD5 did not change: %s", changed_path)
_LOGGER.debug("File/dir MD5 did not change: %s", changed_path)
return

LOGGER.debug("File/dir MD5 changed: %s", changed_path)
_LOGGER.debug("File/dir MD5 changed: %s", changed_path)
changed_path_info.md5 = new_md5
changed_path_info.on_changed.send(changed_path)

Expand Down
84 changes: 43 additions & 41 deletions lib/streamlit/watcher/path_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Optional, Type, Union
from __future__ import annotations

import click
from typing import Callable, Type, Union

import streamlit.watcher
from streamlit import config, env_util
Expand All @@ -23,18 +23,6 @@

LOGGER = get_logger(__name__)

try:
# Check if the watchdog module is installed.
from streamlit.watcher.event_based_path_watcher import EventBasedPathWatcher

watchdog_available = True
except ImportError:
watchdog_available = False
# Stub the EventBasedPathWatcher so it can be mocked by tests

class EventBasedPathWatcher: # type: ignore
pass


# local_sources_watcher.py caches the return value of
# get_default_path_watcher_class(), so it needs to differentiate between the
Expand All @@ -50,7 +38,7 @@ def __init__(
_path_str: str,
_on_changed: Callable[[str], None],
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
):
pass
Expand All @@ -66,32 +54,46 @@ def __init__(
]


def _is_watchdog_available() -> bool:
"""Check if the watchdog module is installed."""
try:
import watchdog # noqa: F401

return True
except ImportError:
return False


def report_watchdog_availability():
if not watchdog_available:
if not config.get_option("global.disableWatchdogWarning") and config.get_option(
"server.fileWatcherType"
) not in ["poll", "none"]:
msg = "\n $ xcode-select --install" if env_util.IS_DARWIN else ""

click.secho(
" %s" % "For better performance, install the Watchdog module:",
fg="blue",
bold=True,
)
click.secho(
"""%s
if (
not config.get_option("global.disableWatchdogWarning")
and config.get_option("server.fileWatcherType") not in ["poll", "none"]
and not _is_watchdog_available()
):
import click

msg = "\n $ xcode-select --install" if env_util.IS_DARWIN else ""

print("print message")
click.secho(
" %s" % "For better performance, install the Watchdog module:",
fg="blue",
bold=True,
)
click.secho(
"""%s
$ pip install watchdog
"""
% msg
)
% msg
)


def _watch_path(
path: str,
on_path_changed: Callable[[str], None],
watcher_type: Optional[str] = None,
watcher_type: str | None = None,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> bool:
"""Create a PathWatcher for the given path if we have a viable
Expand Down Expand Up @@ -139,17 +141,17 @@ def _watch_path(
def watch_file(
path: str,
on_file_changed: Callable[[str], None],
watcher_type: Optional[str] = None,
watcher_type: str | None = None,
) -> bool:
return _watch_path(path, on_file_changed, watcher_type)


def watch_dir(
path: str,
on_dir_changed: Callable[[str], None],
watcher_type: Optional[str] = None,
watcher_type: str | None = None,
*, # keyword-only arguments:
glob_pattern: Optional[str] = None,
glob_pattern: str | None = None,
allow_nonexistent: bool = False,
) -> bool:
return _watch_path(
Expand All @@ -172,13 +174,13 @@ def get_path_watcher_class(watcher_type: str) -> PathWatcherType:
"""Return the PathWatcher class that corresponds to the given watcher_type
string. Acceptable values are 'auto', 'watchdog', 'poll' and 'none'.
"""
if watcher_type == "auto":
if watchdog_available:
return EventBasedPathWatcher
else:
return PollingPathWatcher
elif watcher_type == "watchdog" and watchdog_available:
if watcher_type in {"watchdog", "auto"} and _is_watchdog_available():
# Lazy-import this module to prevent unnecessary imports of the watchdog package.
from streamlit.watcher.event_based_path_watcher import EventBasedPathWatcher

return EventBasedPathWatcher
elif watcher_type == "auto":
return PollingPathWatcher
elif watcher_type == "poll":
return PollingPathWatcher
else:
Expand Down
4 changes: 2 additions & 2 deletions lib/tests/streamlit/watcher/local_sources_watcher_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import tests.streamlit.watcher.test_data.nested_module_parent as NESTED_MODULE_PARENT
from streamlit import config
from streamlit.watcher import local_sources_watcher
from streamlit.watcher.path_watcher import NoOpPathWatcher, watchdog_available
from streamlit.watcher.path_watcher import NoOpPathWatcher, _is_watchdog_available

SCRIPT_PATH = os.path.join(os.path.dirname(__file__), "test_data/not_a_real_script.py")

Expand Down Expand Up @@ -242,7 +242,7 @@ def test_config_watcherType(self):
config.set_option("server.fileWatcherType", "watchdog")
self.assertEqual(
local_sources_watcher.get_default_path_watcher_class().__name__,
"EventBasedPathWatcher" if watchdog_available else "NoOpPathWatcher",
"EventBasedPathWatcher" if _is_watchdog_available() else "NoOpPathWatcher",
)

config.set_option("server.fileWatcherType", "auto")
Expand Down

0 comments on commit 9375977

Please sign in to comment.