Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions jupyter_ydoc/yunicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
# Distributed under the terms of the Modified BSD License.

from collections.abc import Callable
from difflib import SequenceMatcher
from functools import partial
from typing import Any

from pycrdt import Awareness, Doc, Text

from .ybasedoc import YBaseDoc

# Heuristic threshold as recommended in difflib documentation
SIMILARITY_THREESHOLD = 0.6


class YUnicode(YBaseDoc):
"""
Expand All @@ -35,7 +39,7 @@ def __init__(self, ydoc: Doc | None = None, awareness: Awareness | None = None):
:type awareness: :class:`pycrdt.Awareness`, optional.
"""
super().__init__(ydoc, awareness)
self._ysource = self._ydoc.get("source", type=Text)
self._ysource: Text = self._ydoc.get("source", type=Text)
self.undo_manager.expand_scope(self._ysource)

@property
Expand Down Expand Up @@ -64,17 +68,45 @@ def set(self, value: str) -> None:
:param value: The content of the document.
:type value: str
"""
if self.get() == value:
old_value = self.get()
if old_value == value:
# no-op if the values are already the same,
# to avoid side-effects such as cursor jumping to the top
return

with self._ydoc.transaction():
# clear document
self._ysource.clear()
# initialize document
if value:
self._ysource += value
matcher = SequenceMatcher(a=old_value, b=value)

if (
matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD
and matcher.ratio() >= SIMILARITY_THREESHOLD
):
operations = matcher.get_opcodes()
offset = 0
for tag, i1, i2, j1, j2 in operations:
match tag:
case "replace":
self._ysource[i1 + offset : i2 + offset] = value[j1:j2]
offset += (j2 - j1) - (i2 - i1)
case "delete":
del self._ysource[i1 + offset : i2 + offset]
offset -= i2 - i1
case "insert":
self._ysource.insert(i1 + offset, value[j1:j2])
offset += j2 - j1
case "equal":
pass
case _:
raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
else:
# for very different strings, just replace the whole content;
# this avoids generating a huge number of operations

# clear document
self._ysource.clear()
# initialize document
if value:
self._ysource += value

def observe(self, callback: Callable[[str, Any], None]) -> None:
"""
Expand Down
21 changes: 1 addition & 20 deletions tests/test_ynotebook.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from dataclasses import dataclass

from pycrdt import ArrayEvent, Map, MapEvent, TextEvent
from pytest import mark
from utils import ExpectedEvent

from jupyter_ydoc import YNotebook

Expand Down Expand Up @@ -119,24 +118,6 @@ def record_changes(topic, event):
]


@dataclass
class ExpectedEvent:
kind: type
path: str | None = None

def __eq__(self, other):
if not isinstance(other, self.kind):
return False
if self.path is not None and self.path != other.path:
return False
return True

def __repr__(self):
if self.path is not None:
return f"ExpectedEvent({self.kind.__name__}, path={self.path!r})"
return f"ExpectedEvent({self.kind.__name__})"


@mark.parametrize(
"modifications, expected_events",
[
Expand Down
163 changes: 163 additions & 0 deletions tests/test_yunicode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from pycrdt import TextEvent
from utils import ExpectedEvent

from jupyter_ydoc import YUnicode


Expand All @@ -25,3 +28,163 @@ def record_changes(topic, event):

# No changes should be observed at all
assert changes == []


def test_set_granular_changes():
text = YUnicode()

text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
"The lamb was sure to go.",
]
)
)

changes = []

def record_changes(topic, event):
changes.append((topic, event)) # pragma: nocover

text.observe(record_changes)

# Call set with the bunny version
text.set(
"\n".join(
[
"Mary had a little bunny,",
"Its fur was white as snow.",
"And everywhere that Mary went,",
"The bunny was sure to hop.",
]
)
)

assert len(changes) == 1
source_events = [e for t, e in changes if t == "source"]
assert source_events == [
ExpectedEvent(
TextEvent,
delta=[
# "Mary had a little <delete:lam>b"
{"retain": 18},
{"delete": 3},
{"retain": 1},
# "Mary had a little b<insert:unny>"
{"insert": "unny"},
# ",↵ Its f<delete:leece>"
{"retain": 7},
{"delete": 5},
# ",↵ Its f<insert:ur>"
{"insert": "ur"},
# " was white as snow.↵"
# "And everywhere that Mary went,↵"
# "The <delete:lam>b"
{"retain": 55},
{"delete": 3},
{"retain": 1},
# "The b<insert:unny> was sure to"
{"insert": "unny"},
{"retain": 13},
# "<delete:g><insert:h>o<insert:p>"
{"delete": 1},
{"insert": "h"},
{"retain": 1},
{"insert": "p"},
],
)
]


def test_set_granular_append():
text = YUnicode()

text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
]
)
)

changes = []

def record_changes(topic, event):
changes.append((topic, event)) # pragma: nocover

text.observe(record_changes)

# append a line
text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
]
)
)

# append one more line
text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
"The lamb was sure to go.",
]
)
)

assert len(changes) == 2
source_events = [e for t, e in changes if t == "source"]
assert source_events == [
ExpectedEvent(
TextEvent, delta=[{"retain": 53}, {"insert": "\nAnd everywhere that Mary went,"}]
),
ExpectedEvent(TextEvent, delta=[{"retain": 84}, {"insert": "\nThe lamb was sure to go."}]),
]


def test_set_hard_reload_if_very_different():
text = YUnicode()

text.set(
"\n".join(
[
"Mary had a little lamb,",
"Its fleece was white as snow.",
"And everywhere that Mary went,",
"The lamb was sure to go.",
]
)
)

changes = []

def record_changes(topic, event):
changes.append((topic, event)) # pragma: nocover

text.observe(record_changes)

# Call set with a very different nursery rhyme
twinkle_lyrics = "\n".join(
[
"Twinkle, twinkle, little star,",
"How I wonder what you are!",
"Up above the world so high,",
"Like a diamond in the sky.",
]
)
text.set(twinkle_lyrics)

assert len(changes) == 1
source_events = [e for t, e in changes if t == "source"]
assert source_events == [
ExpectedEvent(TextEvent, delta=[{"delete": 109}, {"insert": twinkle_lyrics}])
]
26 changes: 26 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from dataclasses import dataclass

from anyio import Lock, connect_tcp


Expand Down Expand Up @@ -41,3 +43,27 @@ async def ensure_server_running(host: str, port: int) -> None:
pass
else:
break


@dataclass
class ExpectedEvent:
kind: type
path: str | None = None
delta: list[dict] | None = None

def __eq__(self, other):
if not isinstance(other, self.kind):
return False
if self.path is not None and self.path != other.path:
return False
if self.delta is not None and self.delta != other.delta:
return False
return True

def __repr__(self):
fragments = [self.kind.__name__]
if self.path is not None:
fragments.append(f"path={self.path!r}")
if self.delta is not None:
fragments.append(f"delta={self.delta!r}")
return f"ExpectedEvent({', '.join(fragments)})"
Loading