Skip to content

Commit

Permalink
feat: jupytext markdown cell content with comments
Browse files Browse the repository at this point in the history
  • Loading branch information
kiyoon committed Feb 25, 2023
1 parent ea8c68c commit 8ee9bf7
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 18 deletions.
98 changes: 80 additions & 18 deletions src/jupynium/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,21 @@ class JupyniumBuffer:
This does have a functionality to sync with the Notebook.
"""

def __init__(self, buf: list[str] = [""]):
def __init__(
self,
buf: list[str] = [""],
header_cell_type="header",
):
"""
self.buf is a list of lines of the nvim buffer,
with the exception that the commented magic commands are normal magic commands.
e.g. '# %time' -> '%time'
and jupytext markdown cell content also strips the leading comment.
e.g. '# # Markdown header' -> '# Markdown header'
Args:
header_cell_type (str, optional): Use only when partial update.
header_cell_separator (str, optional): Use only when partial update.
"""
self.buf = buf
if self.buf == [""]:
Expand All @@ -33,23 +43,41 @@ def __init__(self, buf: list[str] = [""]):
] # each cell's row length. 0-th cell is not a cell, but it's the header. You can put anything above and it won't be synced to Jupyter Notebook.
self.cell_types = ["header"] # 0-th cell is not a cell.
else:
self.full_analyse_buf()
self.full_analyse_buf(header_cell_type)

def full_analyse_buf(self, header_cell_type="header"):
"""
Main parser for the jupynium format (*.ju.*).
This function needs to support partial update.
def full_analyse_buf(self):
E.g. by looking at 1 line of change, it should be able to understand if:
- the change is within a cell
- cell creation/deletion
- cell type change
During the partial update, the header cell will be continuation from the existing cell.
We don't know if it will be header/cell/markdown.
So we need to pass the header_cell_type.
Args:
header_cell_type (str, optional): Use only when partial update.
"""
num_rows_this_cell = 0
num_rows_per_cell = []
cell_types = ["header"]
cell_types = [header_cell_type]
for row, line in enumerate(self.buf):
if (
line.startswith("# %%%")
or line.startswith("# %% [md]")
or line.startswith("# %% [markdown]")
or line.startswith('"""%%')
or line.startswith("'''%%")
):
num_rows_per_cell.append(num_rows_this_cell)
num_rows_this_cell = 1
cell_types.append("markdown")
elif line.startswith("# %% [md]") or line.startswith("# %% [markdown]"):
num_rows_per_cell.append(num_rows_this_cell)
num_rows_this_cell = 1
cell_types.append("markdown (jupytext)")
elif (
line.startswith("# %%")
or line.startswith('%%"""')
Expand All @@ -62,7 +90,15 @@ def full_analyse_buf(self):
# Use '# %' for magic commands
# e.g. '# %matplotlib inline'
# Remove the comment
self.buf[row] = self.buf[row][2:]
if cell_types[-1] == "code":
self.buf[row] = self.buf[row][2:]
num_rows_this_cell += 1
elif line.startswith("# "):
# Remove the comment for markdown cells
# Only activated if the cell separator is like Jupytext's
# Useful for non-python languages like R
if cell_types[-1] == "markdown (jupytext)":
self.buf[row] = self.buf[row][2:]
num_rows_this_cell += 1
else:
num_rows_this_cell += 1
Expand Down Expand Up @@ -103,9 +139,7 @@ def _on_lines_update_buf(self, lines, start_row, old_end_row, new_end_row):
notebook_cell_operations = []

try:
cell_idx, cell_start_row, row_within_cell = self.get_cell_index_from_row(
start_row
)
cell_idx, _, row_within_cell = self.get_cell_index_from_row(start_row)

if row_within_cell == 0 and cell_idx > 0:
# If the row is the first row of a cell, and it's not the first cell, then it's a cell separator.
Expand Down Expand Up @@ -135,7 +169,12 @@ def _on_lines_update_buf(self, lines, start_row, old_end_row, new_end_row):
lines_to_remove -= 1

# Analyse how many cells are added
new_lines_buf = JupyniumBuffer(lines)
new_lines_buf = JupyniumBuffer(
lines,
header_cell_type=self.cell_types[
cell_idx
], # This is required as we're analysing partially.
)
if new_lines_buf.num_cells - 1 == 0:
self.num_rows_per_cell[cell_idx] += new_lines_buf.num_rows_per_cell[0]
notebook_cell_operations = notebook_cell_delete_operations
Expand Down Expand Up @@ -184,7 +223,7 @@ def _on_lines_update_buf(self, lines, start_row, old_end_row, new_end_row):

# Now actually replace the lines
# Optimisation: if the number of lines is not changed, which is most of the cases,
# then we can just replace the lines.
# then we can just replace the the strings in the list instead of modifying list itself.
if old_end_row == new_end_row:
for i, line in enumerate(lines):
self.buf[start_row + i] = line
Expand Down Expand Up @@ -213,7 +252,8 @@ def _apply_cell_operations(self, driver, notebook_cell_operations):
logger.info(
f"Cell {nb_cell_idx + i} type change to {cell_type} from Notebook"
)
if cell_type == "markdown":
# "markdown" or "markdown (jupytext)"
if cell_type.startswith("markdown"):
driver.execute_script(
"Jupyter.notebook.cells_to_markdown([arguments[0]]);",
nb_cell_idx + i,
Expand All @@ -229,28 +269,49 @@ def _apply_cell_operations(self, driver, notebook_cell_operations):
def get_cell_start_row(self, cell_idx):
return sum(self.num_rows_per_cell[:cell_idx])

def get_cell_index_from_row(self, row):
def get_cell_index_from_row(
self,
row: int,
num_rows_per_cell: list[int] | None = None,
raise_out_of_bound: bool = True,
) -> tuple[int, int, int]:
"""
Returns the cell index for the given row.
Args:
row (int): row index
num_rows_per_cell (list): number of rows per cell. If None, use self.num_rows_per_cell
raise_out_of_bound (bool): whether to raise an IndexError if the row is out of bound
Returns:
int: cell index
int: cell start row
int: row index within the cell
"""
if num_rows_per_cell is None:
num_rows_per_cell = self.num_rows_per_cell

cell_start_row = 0
for i, num_rows in enumerate(self.num_rows_per_cell):
i = 0
for i, num_rows in enumerate(num_rows_per_cell):
if cell_start_row + num_rows > row:
return i, cell_start_row, row - cell_start_row
cell_start_row += num_rows

raise IndexError("Could not find cell for row {}".format(row))
# Out of bound. Could be adding a new line.
if raise_out_of_bound:
raise IndexError(f"Could not find cell for row {row}")
else:
return i, cell_start_row, row - cell_start_row

def _check_validity(self):
assert len(self.buf) == sum(self.num_rows_per_cell)
assert len(self.cell_types) == len(self.num_rows_per_cell)
assert self.cell_types[0] == "header"
assert all(x in ("code", "markdown") for x in self.cell_types[1:])
assert all(
x in ("code", "markdown", "markdown (jupytext)")
for x in self.cell_types[1:]
)

def _partial_sync_to_notebook(
self, driver, start_cell_idx, end_cell_idx, strip=True
Expand Down Expand Up @@ -316,7 +377,8 @@ def _partial_sync_to_notebook(
for i, cell_type in enumerate(
self.cell_types[start_cell_idx : end_cell_idx + 1]
)
if cell_type == "markdown"
if cell_type.startswith("markdown")
# "markdown" or "markdown (jupytext)"
]

if len(code_cell_indices) > 0:
Expand Down
114 changes: 114 additions & 0 deletions tests/test_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,120 @@ def test_buffer_markdown_2(jupbuf1):
assert jupbuf1.cell_types == ["header", "markdown", "code"]


def test_buffer_markdown_jupytext():
buffer = JupyniumBuffer(["a", "b", "c", "# %% [md]", "d", "# %%", "f"])
assert buffer.num_rows_per_cell == [3, 2, 2]
assert buffer.cell_types == ["header", "markdown (jupytext)", "code"]
assert buffer.buf[4] == "d"


def test_buffer_markdown_jupytext_2():
buffer = JupyniumBuffer(
[
"a",
"# b",
"# # c",
"# %% [markdown]",
"# # header",
"# content",
"noescape",
"# %%",
"f",
]
)
assert buffer.num_rows_per_cell == [3, 4, 2]
assert buffer.cell_types == ["header", "markdown (jupytext)", "code"]

assert buffer.buf[0] == "a"
assert buffer.buf[1] == "# b"
assert buffer.buf[2] == "# # c"

assert buffer.buf[4] == "# header"
assert buffer.buf[5] == "content"
assert buffer.buf[6] == "noescape"


def test_buffer_markdown_jupytext_inject():
buffer = JupyniumBuffer(
[
"a",
"# b",
"# # c",
"# %% [markdown]",
"# # header",
"# content",
"noescape",
"# %%",
"f",
],
"markdown (jupytext)",
)
assert buffer.num_rows_per_cell == [3, 4, 2]
assert buffer.cell_types == ["markdown (jupytext)", "markdown (jupytext)", "code"]

assert buffer.buf[0] == "a"
assert buffer.buf[1] == "b"
assert buffer.buf[2] == "# c"

assert buffer.buf[4] == "# header"
assert buffer.buf[5] == "content"
assert buffer.buf[6] == "noescape"


def test_buffer_markdown_jupytext_inject_2():
buffer = JupyniumBuffer(
[
"a",
"# b",
"# # c",
"# %% [markdown]",
"# # header",
"# content",
"noescape",
"# %%",
"f",
],
"markdown",
)
assert buffer.num_rows_per_cell == [3, 4, 2]
assert buffer.cell_types == ["markdown", "markdown (jupytext)", "code"]

assert buffer.buf[0] == "a"
assert buffer.buf[1] == "# b"
assert buffer.buf[2] == "# # c"

assert buffer.buf[4] == "# header"
assert buffer.buf[5] == "content"
assert buffer.buf[6] == "noescape"


def test_buffer_markdown_jupytext_inject_3():
buffer = JupyniumBuffer(
[
"a",
"# b",
"# # c",
"# %% [markdown]",
"# # header",
"# content",
"noescape",
"# %%",
"f",
],
"code",
)
assert buffer.num_rows_per_cell == [3, 4, 2]
assert buffer.cell_types == ["code", "markdown (jupytext)", "code"]

assert buffer.buf[0] == "a"
assert buffer.buf[1] == "# b"
assert buffer.buf[2] == "# # c"

assert buffer.buf[4] == "# header"
assert buffer.buf[5] == "content"
assert buffer.buf[6] == "noescape"


def test_get_cell_start_row(jupbuf1):
assert jupbuf1.get_cell_start_row(0) == 0
assert jupbuf1.get_cell_start_row(1) == 3
Expand Down

0 comments on commit 8ee9bf7

Please sign in to comment.