diff --git a/jupyter_ydoc/yunicode.py b/jupyter_ydoc/yunicode.py index 6a9dc6e..d1adbdb 100644 --- a/jupyter_ydoc/yunicode.py +++ b/jupyter_ydoc/yunicode.py @@ -2,6 +2,7 @@ # Distributed under the terms of the Modified BSD License. from collections.abc import Callable +from difflib import SequenceMatcher from functools import partial from typing import Any @@ -9,6 +10,9 @@ from .ybasedoc import YBaseDoc +# Heuristic threshold as recommended in difflib documentation +SIMILARITY_THREESHOLD = 0.6 + class YUnicode(YBaseDoc): """ @@ -35,7 +39,7 @@ def __init__(self, ydoc: Doc | None = None, awareness: Awareness | None = None): :type awareness: :class:`pycrdt.Awareness`, optional. """ super().__init__(ydoc, awareness) - self._ysource = self._ydoc.get("source", type=Text) + self._ysource: Text = self._ydoc.get("source", type=Text) self.undo_manager.expand_scope(self._ysource) @property @@ -64,17 +68,45 @@ def set(self, value: str) -> None: :param value: The content of the document. :type value: str """ - if self.get() == value: + old_value = self.get() + if old_value == value: # no-op if the values are already the same, # to avoid side-effects such as cursor jumping to the top return with self._ydoc.transaction(): - # clear document - self._ysource.clear() - # initialize document - if value: - self._ysource += value + matcher = SequenceMatcher(a=old_value, b=value) + + if ( + matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD + and matcher.ratio() >= SIMILARITY_THREESHOLD + ): + operations = matcher.get_opcodes() + offset = 0 + for tag, i1, i2, j1, j2 in operations: + match tag: + case "replace": + self._ysource[i1 + offset : i2 + offset] = value[j1:j2] + offset += (j2 - j1) - (i2 - i1) + case "delete": + del self._ysource[i1 + offset : i2 + offset] + offset -= i2 - i1 + case "insert": + self._ysource.insert(i1 + offset, value[j1:j2]) + offset += j2 - j1 + case "equal": + pass + case _: + raise ValueError(f"Unknown tag '{tag}' in sequence matcher") + else: + # for very different strings, just replace the whole content; + # this avoids generating a huge number of operations + + # clear document + self._ysource.clear() + # initialize document + if value: + self._ysource += value def observe(self, callback: Callable[[str, Any], None]) -> None: """ diff --git a/tests/test_ynotebook.py b/tests/test_ynotebook.py index c196880..545a974 100644 --- a/tests/test_ynotebook.py +++ b/tests/test_ynotebook.py @@ -1,10 +1,9 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. -from dataclasses import dataclass - from pycrdt import ArrayEvent, Map, MapEvent, TextEvent from pytest import mark +from utils import ExpectedEvent from jupyter_ydoc import YNotebook @@ -119,24 +118,6 @@ def record_changes(topic, event): ] -@dataclass -class ExpectedEvent: - kind: type - path: str | None = None - - def __eq__(self, other): - if not isinstance(other, self.kind): - return False - if self.path is not None and self.path != other.path: - return False - return True - - def __repr__(self): - if self.path is not None: - return f"ExpectedEvent({self.kind.__name__}, path={self.path!r})" - return f"ExpectedEvent({self.kind.__name__})" - - @mark.parametrize( "modifications, expected_events", [ diff --git a/tests/test_yunicode.py b/tests/test_yunicode.py index ef19131..4329ad6 100644 --- a/tests/test_yunicode.py +++ b/tests/test_yunicode.py @@ -1,6 +1,9 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. +from pycrdt import TextEvent +from utils import ExpectedEvent + from jupyter_ydoc import YUnicode @@ -25,3 +28,163 @@ def record_changes(topic, event): # No changes should be observed at all assert changes == [] + + +def test_set_granular_changes(): + text = YUnicode() + + text.set( + "\n".join( + [ + "Mary had a little lamb,", + "Its fleece was white as snow.", + "And everywhere that Mary went,", + "The lamb was sure to go.", + ] + ) + ) + + changes = [] + + def record_changes(topic, event): + changes.append((topic, event)) # pragma: nocover + + text.observe(record_changes) + + # Call set with the bunny version + text.set( + "\n".join( + [ + "Mary had a little bunny,", + "Its fur was white as snow.", + "And everywhere that Mary went,", + "The bunny was sure to hop.", + ] + ) + ) + + assert len(changes) == 1 + source_events = [e for t, e in changes if t == "source"] + assert source_events == [ + ExpectedEvent( + TextEvent, + delta=[ + # "Mary had a little b" + {"retain": 18}, + {"delete": 3}, + {"retain": 1}, + # "Mary had a little b" + {"insert": "unny"}, + # ",↵ Its f" + {"retain": 7}, + {"delete": 5}, + # ",↵ Its f" + {"insert": "ur"}, + # " was white as snow.↵" + # "And everywhere that Mary went,↵" + # "The b" + {"retain": 55}, + {"delete": 3}, + {"retain": 1}, + # "The b was sure to" + {"insert": "unny"}, + {"retain": 13}, + # "o" + {"delete": 1}, + {"insert": "h"}, + {"retain": 1}, + {"insert": "p"}, + ], + ) + ] + + +def test_set_granular_append(): + text = YUnicode() + + text.set( + "\n".join( + [ + "Mary had a little lamb,", + "Its fleece was white as snow.", + ] + ) + ) + + changes = [] + + def record_changes(topic, event): + changes.append((topic, event)) # pragma: nocover + + text.observe(record_changes) + + # append a line + text.set( + "\n".join( + [ + "Mary had a little lamb,", + "Its fleece was white as snow.", + "And everywhere that Mary went,", + ] + ) + ) + + # append one more line + text.set( + "\n".join( + [ + "Mary had a little lamb,", + "Its fleece was white as snow.", + "And everywhere that Mary went,", + "The lamb was sure to go.", + ] + ) + ) + + assert len(changes) == 2 + source_events = [e for t, e in changes if t == "source"] + assert source_events == [ + ExpectedEvent( + TextEvent, delta=[{"retain": 53}, {"insert": "\nAnd everywhere that Mary went,"}] + ), + ExpectedEvent(TextEvent, delta=[{"retain": 84}, {"insert": "\nThe lamb was sure to go."}]), + ] + + +def test_set_hard_reload_if_very_different(): + text = YUnicode() + + text.set( + "\n".join( + [ + "Mary had a little lamb,", + "Its fleece was white as snow.", + "And everywhere that Mary went,", + "The lamb was sure to go.", + ] + ) + ) + + changes = [] + + def record_changes(topic, event): + changes.append((topic, event)) # pragma: nocover + + text.observe(record_changes) + + # Call set with a very different nursery rhyme + twinkle_lyrics = "\n".join( + [ + "Twinkle, twinkle, little star,", + "How I wonder what you are!", + "Up above the world so high,", + "Like a diamond in the sky.", + ] + ) + text.set(twinkle_lyrics) + + assert len(changes) == 1 + source_events = [e for t, e in changes if t == "source"] + assert source_events == [ + ExpectedEvent(TextEvent, delta=[{"delete": 109}, {"insert": twinkle_lyrics}]) + ] diff --git a/tests/utils.py b/tests/utils.py index 5797997..679833e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,8 @@ # Copyright (c) Jupyter Development Team. # Distributed under the terms of the Modified BSD License. +from dataclasses import dataclass + from anyio import Lock, connect_tcp @@ -41,3 +43,27 @@ async def ensure_server_running(host: str, port: int) -> None: pass else: break + + +@dataclass +class ExpectedEvent: + kind: type + path: str | None = None + delta: list[dict] | None = None + + def __eq__(self, other): + if not isinstance(other, self.kind): + return False + if self.path is not None and self.path != other.path: + return False + if self.delta is not None and self.delta != other.delta: + return False + return True + + def __repr__(self): + fragments = [self.kind.__name__] + if self.path is not None: + fragments.append(f"path={self.path!r}") + if self.delta is not None: + fragments.append(f"delta={self.delta!r}") + return f"ExpectedEvent({', '.join(fragments)})"