From a4b662d88ff7c9af63d7dbea7effb9315b63d910 Mon Sep 17 00:00:00 2001
From: krassowski <5832902+krassowski@users.noreply.github.com>
Date: Sat, 29 Nov 2025 09:19:28 +0000
Subject: [PATCH 1/5] Add test with expectation of granular reload and append

---
 tests/test_ynotebook.py |  21 +-----
 tests/test_yunicode.py  | 163 ++++++++++++++++++++++++++++++++++++++++
 tests/utils.py          |  26 +++++++
 3 files changed, 190 insertions(+), 20 deletions(-)

diff --git a/tests/test_ynotebook.py b/tests/test_ynotebook.py
index c196880..545a974 100644
--- a/tests/test_ynotebook.py
+++ b/tests/test_ynotebook.py
@@ -1,10 +1,9 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
-from dataclasses import dataclass
-
 from pycrdt import ArrayEvent, Map, MapEvent, TextEvent
 from pytest import mark
+from utils import ExpectedEvent
 
 from jupyter_ydoc import YNotebook
 
@@ -119,24 +118,6 @@ def record_changes(topic, event):
     ]
 
 
-@dataclass
-class ExpectedEvent:
-    kind: type
-    path: str | None = None
-
-    def __eq__(self, other):
-        if not isinstance(other, self.kind):
-            return False
-        if self.path is not None and self.path != other.path:
-            return False
-        return True
-
-    def __repr__(self):
-        if self.path is not None:
-            return f"ExpectedEvent({self.kind.__name__}, path={self.path!r})"
-        return f"ExpectedEvent({self.kind.__name__})"
-
-
 @mark.parametrize(
     "modifications, expected_events",
     [
diff --git a/tests/test_yunicode.py b/tests/test_yunicode.py
index ef19131..4329ad6 100644
--- a/tests/test_yunicode.py
+++ b/tests/test_yunicode.py
@@ -1,6 +1,9 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+from pycrdt import TextEvent
+from utils import ExpectedEvent
+
 from jupyter_ydoc import YUnicode
 
 
@@ -25,3 +28,163 @@ def record_changes(topic, event):
 
     # No changes should be observed at all
     assert changes == []
+
+
+def test_set_granular_changes():
+    text = YUnicode()
+
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+                "The lamb was sure to go.",
+            ]
+        )
+    )
+
+    changes = []
+
+    def record_changes(topic, event):
+        changes.append((topic, event))  # pragma: nocover
+
+    text.observe(record_changes)
+
+    # Call set with the bunny version
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little bunny,",
+                "Its fur was white as snow.",
+                "And everywhere that Mary went,",
+                "The bunny was sure to hop.",
+            ]
+        )
+    )
+
+    assert len(changes) == 1
+    source_events = [e for t, e in changes if t == "source"]
+    assert source_events == [
+        ExpectedEvent(
+            TextEvent,
+            delta=[
+                # "Mary had a little <delete:lam>b"
+                {"retain": 18},
+                {"delete": 3},
+                {"retain": 1},
+                # "Mary had a little b<insert:unny>"
+                {"insert": "unny"},
+                # ",↵ Its f<delete:leece>"
+                {"retain": 7},
+                {"delete": 5},
+                # ",↵ Its f<insert:ur>"
+                {"insert": "ur"},
+                # " was white as snow.↵"
+                # "And everywhere that Mary went,↵"
+                # "The <delete:lam>b"
+                {"retain": 55},
+                {"delete": 3},
+                {"retain": 1},
+                # "The b<insert:unny> was sure to"
+                {"insert": "unny"},
+                {"retain": 13},
+                # "<delete:g><insert:h>o<insert:p>"
+                {"delete": 1},
+                {"insert": "h"},
+                {"retain": 1},
+                {"insert": "p"},
+            ],
+        )
+    ]
+
+
+def test_set_granular_append():
+    text = YUnicode()
+
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+            ]
+        )
+    )
+
+    changes = []
+
+    def record_changes(topic, event):
+        changes.append((topic, event))  # pragma: nocover
+
+    text.observe(record_changes)
+
+    # append a line
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+            ]
+        )
+    )
+
+    # append one more line
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+                "The lamb was sure to go.",
+            ]
+        )
+    )
+
+    assert len(changes) == 2
+    source_events = [e for t, e in changes if t == "source"]
+    assert source_events == [
+        ExpectedEvent(
+            TextEvent, delta=[{"retain": 53}, {"insert": "\nAnd everywhere that Mary went,"}]
+        ),
+        ExpectedEvent(TextEvent, delta=[{"retain": 84}, {"insert": "\nThe lamb was sure to go."}]),
+    ]
+
+
+def test_set_hard_reload_if_very_different():
+    text = YUnicode()
+
+    text.set(
+        "\n".join(
+            [
+                "Mary had a little lamb,",
+                "Its fleece was white as snow.",
+                "And everywhere that Mary went,",
+                "The lamb was sure to go.",
+            ]
+        )
+    )
+
+    changes = []
+
+    def record_changes(topic, event):
+        changes.append((topic, event))  # pragma: nocover
+
+    text.observe(record_changes)
+
+    # Call set with a very different nursery rhyme
+    twinkle_lyrics = "\n".join(
+        [
+            "Twinkle, twinkle, little star,",
+            "How I wonder what you are!",
+            "Up above the world so high,",
+            "Like a diamond in the sky.",
+        ]
+    )
+    text.set(twinkle_lyrics)
+
+    assert len(changes) == 1
+    source_events = [e for t, e in changes if t == "source"]
+    assert source_events == [
+        ExpectedEvent(TextEvent, delta=[{"delete": 109}, {"insert": twinkle_lyrics}])
+    ]
diff --git a/tests/utils.py b/tests/utils.py
index 5797997..679833e 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,6 +1,8 @@
 # Copyright (c) Jupyter Development Team.
 # Distributed under the terms of the Modified BSD License.
 
+from dataclasses import dataclass
+
 from anyio import Lock, connect_tcp
 
 
@@ -41,3 +43,27 @@ async def ensure_server_running(host: str, port: int) -> None:
             pass
         else:
             break
+
+
+@dataclass
+class ExpectedEvent:
+    kind: type
+    path: str | None = None
+    delta: list[dict] | None = None
+
+    def __eq__(self, other):
+        if not isinstance(other, self.kind):
+            return False
+        if self.path is not None and self.path != other.path:
+            return False
+        if self.delta is not None and self.delta != other.delta:
+            return False
+        return True
+
+    def __repr__(self):
+        fragments = [self.kind.__name__]
+        if self.path is not None:
+            fragments.append(f"path={self.path!r}")
+        if self.delta is not None:
+            fragments.append(f"delta={self.delta!r}")
+        return f"ExpectedEvent({', '.join(fragments)})"

From ba2102606df7e5870eb36f8e49b94885cb79339a Mon Sep 17 00:00:00 2001
From: krassowski <5832902+krassowski@users.noreply.github.com>
Date: Sat, 29 Nov 2025 09:29:55 +0000
Subject: [PATCH 2/5] Use stdlib sequence matcher to perform granular text
 updates

---
 jupyter_ydoc/yunicode.py | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/jupyter_ydoc/yunicode.py b/jupyter_ydoc/yunicode.py
index 6a9dc6e..941c53e 100644
--- a/jupyter_ydoc/yunicode.py
+++ b/jupyter_ydoc/yunicode.py
@@ -2,6 +2,7 @@
 # Distributed under the terms of the Modified BSD License.
 
 from collections.abc import Callable
+from difflib import SequenceMatcher
 from functools import partial
 from typing import Any
 
@@ -64,17 +65,40 @@ def set(self, value: str) -> None:
         :param value: The content of the document.
         :type value: str
         """
-        if self.get() == value:
+        old_value = self.get()
+        if old_value == value:
             # no-op if the values are already the same,
             # to avoid side-effects such as cursor jumping to the top
             return
 
         with self._ydoc.transaction():
-            # clear document
-            self._ysource.clear()
-            # initialize document
-            if value:
-                self._ysource += value
+            matcher = SequenceMatcher(a=old_value, b=value)
+
+            # for very different strings, just replace the whole content;
+            # this avoids generating a huge number of operations
+            if matcher.ratio() < 0.6:
+                # clear document
+                self._ysource.clear()
+                # initialize document
+                if value:
+                    self._ysource += value
+            else:
+                operations = matcher.get_opcodes()
+                offset = 0
+                for tag, i1, i2, j1, j2 in operations:
+                    if tag == "replace":
+                        self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
+                        offset += (j2 - j1) - (i2 - i1)
+                    elif tag == "delete":
+                        del self._ysource[i1 + offset : i2 + offset]
+                        offset -= i2 - i1
+                    elif tag == "insert":
+                        self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
+                        offset += j2 - j1
+                    elif tag == "equal":
+                        pass
+                    else:
+                        raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
 
     def observe(self, callback: Callable[[str, Any], None]) -> None:
         """

From 7e0627e5437da9f4c011c6c092a89c318edabb7a Mon Sep 17 00:00:00 2001
From: krassowski <5832902+krassowski@users.noreply.github.com>
Date: Sun, 30 Nov 2025 10:49:49 +0000
Subject: [PATCH 3/5] Use `real_quick_ratio` to fast-reject very dissimilar
 updates

---
 jupyter_ydoc/yunicode.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/jupyter_ydoc/yunicode.py b/jupyter_ydoc/yunicode.py
index 941c53e..911c85c 100644
--- a/jupyter_ydoc/yunicode.py
+++ b/jupyter_ydoc/yunicode.py
@@ -10,6 +10,9 @@
 
 from .ybasedoc import YBaseDoc
 
+# Heuristic threshold as recommended in difflib documentation
+SIMILARITY_THREESHOLD = 0.6
+
 
 class YUnicode(YBaseDoc):
     """
@@ -74,15 +77,10 @@ def set(self, value: str) -> None:
         with self._ydoc.transaction():
             matcher = SequenceMatcher(a=old_value, b=value)
 
-            # for very different strings, just replace the whole content;
-            # this avoids generating a huge number of operations
-            if matcher.ratio() < 0.6:
-                # clear document
-                self._ysource.clear()
-                # initialize document
-                if value:
-                    self._ysource += value
-            else:
+            if (
+                matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD
+                and matcher.ratio() >= SIMILARITY_THREESHOLD
+            ):
                 operations = matcher.get_opcodes()
                 offset = 0
                 for tag, i1, i2, j1, j2 in operations:
@@ -99,6 +97,15 @@ def set(self, value: str) -> None:
                         pass
                     else:
                         raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
+            else:
+                # for very different strings, just replace the whole content;
+                # this avoids generating a huge number of operations
+
+                # clear document
+                self._ysource.clear()
+                # initialize document
+                if value:
+                    self._ysource += value
 
     def observe(self, callback: Callable[[str, Any], None]) -> None:
         """

From ca32120cdf4d8d529d1b8e40240e690b3a9ece90 Mon Sep 17 00:00:00 2001
From: krassowski <5832902+krassowski@users.noreply.github.com>
Date: Mon, 1 Dec 2025 09:33:39 +0000
Subject: [PATCH 4/5] Use `insert()` which skips some of the checks

as `__setitem__` also checks if index is a number or slice
and then checks the range of the slice; we can skip those
knowing that `i1 == i2` in the `insert` opcode.
---
 jupyter_ydoc/yunicode.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jupyter_ydoc/yunicode.py b/jupyter_ydoc/yunicode.py
index 911c85c..7690db9 100644
--- a/jupyter_ydoc/yunicode.py
+++ b/jupyter_ydoc/yunicode.py
@@ -39,7 +39,7 @@ def __init__(self, ydoc: Doc | None = None, awareness: Awareness | None = None):
         :type awareness: :class:`pycrdt.Awareness`, optional.
         """
         super().__init__(ydoc, awareness)
-        self._ysource = self._ydoc.get("source", type=Text)
+        self._ysource: Text = self._ydoc.get("source", type=Text)
         self.undo_manager.expand_scope(self._ysource)
 
     @property
@@ -91,7 +91,7 @@ def set(self, value: str) -> None:
                         del self._ysource[i1 + offset : i2 + offset]
                         offset -= i2 - i1
                     elif tag == "insert":
-                        self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
+                        self._ysource.insert(i1 + offset, value[j1:j2])
                         offset += j2 - j1
                     elif tag == "equal":
                         pass

From b61975ef55b4e8011016e7fbdd83f601e5ae84df Mon Sep 17 00:00:00 2001
From: krassowski <5832902+krassowski@users.noreply.github.com>
Date: Mon, 1 Dec 2025 10:01:56 +0000
Subject: [PATCH 5/5] Use match-case instead of elif

---
 jupyter_ydoc/yunicode.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/jupyter_ydoc/yunicode.py b/jupyter_ydoc/yunicode.py
index 7690db9..d1adbdb 100644
--- a/jupyter_ydoc/yunicode.py
+++ b/jupyter_ydoc/yunicode.py
@@ -84,19 +84,20 @@ def set(self, value: str) -> None:
                 operations = matcher.get_opcodes()
                 offset = 0
                 for tag, i1, i2, j1, j2 in operations:
-                    if tag == "replace":
-                        self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
-                        offset += (j2 - j1) - (i2 - i1)
-                    elif tag == "delete":
-                        del self._ysource[i1 + offset : i2 + offset]
-                        offset -= i2 - i1
-                    elif tag == "insert":
-                        self._ysource.insert(i1 + offset, value[j1:j2])
-                        offset += j2 - j1
-                    elif tag == "equal":
-                        pass
-                    else:
-                        raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
+                    match tag:
+                        case "replace":
+                            self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
+                            offset += (j2 - j1) - (i2 - i1)
+                        case "delete":
+                            del self._ysource[i1 + offset : i2 + offset]
+                            offset -= i2 - i1
+                        case "insert":
+                            self._ysource.insert(i1 + offset, value[j1:j2])
+                            offset += j2 - j1
+                        case "equal":
+                            pass
+                        case _:
+                            raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
             else:
                 # for very different strings, just replace the whole content;
                 # this avoids generating a huge number of operations