Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify dev_tools.notebooks.utils.rewrite_notebook #6030

Merged
merged 8 commits into from
Mar 14, 2023
7 changes: 6 additions & 1 deletion dev_tools/notebooks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from dev_tools.notebooks.utils import filter_notebooks, list_all_notebooks, rewrite_notebook
from dev_tools.notebooks.utils import (
filter_notebooks,
list_all_notebooks,
remove_if_temporary_notebook,
rewrite_notebook,
)
13 changes: 8 additions & 5 deletions dev_tools/notebooks/isolated_notebook_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,12 @@
import pytest

from dev_tools import shell_tools
from dev_tools.notebooks import list_all_notebooks, filter_notebooks, rewrite_notebook
from dev_tools.notebooks import (
list_all_notebooks,
filter_notebooks,
rewrite_notebook,
remove_if_temporary_notebook,
)

# these notebooks rely on features that are not released yet
# after every release we should raise a PR and empty out this list
Expand Down Expand Up @@ -177,7 +182,7 @@ def test_notebooks_against_released_cirq(partition, notebook_path, cloned_env):

notebook_file = os.path.basename(notebook_path)

rewritten_notebook_descriptor, rewritten_notebook_path = rewrite_notebook(notebook_path)
rewritten_notebook_path = rewrite_notebook(notebook_path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems we are (and have always been) leaking temporary files. Could you fix while you're at it?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@viathor - thank you for the review. The temporary files cleanup was a bit more involved - can you please take a quick look again?


cmd = f"""
mkdir -p out/{notebook_rel_dir}
Expand Down Expand Up @@ -208,9 +213,7 @@ def test_notebooks_against_released_cirq(partition, notebook_path, cloned_env):
f"instead of `pip install cirq` to this notebook, and exclude it from "
f"dev_tools/notebooks/isolated_notebook_test.py."
)

if rewritten_notebook_descriptor:
os.close(rewritten_notebook_descriptor)
remove_if_temporary_notebook(rewritten_notebook_path)


@pytest.mark.parametrize("notebook_path", NOTEBOOKS_DEPENDING_ON_UNRELEASED_FEATURES)
Expand Down
13 changes: 8 additions & 5 deletions dev_tools/notebooks/notebook_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@
import pytest

from dev_tools import shell_tools
from dev_tools.notebooks import filter_notebooks, list_all_notebooks, rewrite_notebook
from dev_tools.notebooks import (
filter_notebooks,
list_all_notebooks,
rewrite_notebook,
remove_if_temporary_notebook,
)

SKIP_NOTEBOOKS = [
# skipping vendor notebooks as we don't have auth sorted out
Expand Down Expand Up @@ -67,7 +72,7 @@ def test_notebooks_against_released_cirq(notebook_path):
notebook_file = os.path.basename(notebook_path)
notebook_rel_dir = os.path.dirname(os.path.relpath(notebook_path, "."))
out_path = f"out/{notebook_rel_dir}/{notebook_file[:-6]}.out.ipynb"
rewritten_notebook_descriptor, rewritten_notebook_path = rewrite_notebook(notebook_path)
rewritten_notebook_path = rewrite_notebook(notebook_path)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

papermill_flags = "--no-request-save-on-cell-execute --autosave-cell-every 0"
cmd = f"""mkdir -p out/{notebook_rel_dir}
papermill {rewritten_notebook_path} {out_path} {papermill_flags}"""
Expand All @@ -83,6 +88,4 @@ def test_notebooks_against_released_cirq(notebook_path):
f"notebook (in Github Actions, you can download it from the workflow artifact"
f" 'notebook-outputs')"
)

if rewritten_notebook_descriptor:
os.close(rewritten_notebook_descriptor)
remove_if_temporary_notebook(rewritten_notebook_path)
48 changes: 33 additions & 15 deletions dev_tools/notebooks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def rewrite_notebook(notebook_path):

* Lines in this file without `->` are ignored.

* Lines in this file with `->` are split into two (if there are mulitple `->` it is an
* Lines in this file with `->` are split into two (if there are multiple `->` it is an
error). The first of these is compiled into a pattern match, via `re.compile`, and
the second is the replacement for that match.

Expand All @@ -82,16 +82,17 @@ def rewrite_notebook(notebook_path):
It is the responsibility of the caller of this method to delete the new file.

Returns:
Tuple of a file descriptor and the file path for the rewritten file. If no `.tst` file
was found, then the file descriptor is None and the path is `notebook_path`.
The file path for the rewritten file. If no `.tst` file was found,
return the input `notebook_path` as the notebook was not changed.
Otherwise return a new path created in the temporary directory.

Raises:
AssertionError: If there are multiple `->` per line, or not all of the replacements
are used.
"""
notebook_test_path = os.path.splitext(notebook_path)[0] + '.tst'
if not os.path.exists(notebook_test_path):
return None, notebook_path
return notebook_path

# Get the rewrite rules.
patterns = []
Expand All @@ -104,20 +105,37 @@ def rewrite_notebook(notebook_path):

used_patterns = set()
with open(notebook_path, 'r') as original_file:
new_file_descriptor, new_file_path = tempfile.mkstemp(suffix='.ipynb')
with open(new_file_path, 'w') as new_file:
for line in original_file:
new_line = line
for pattern, replacement in patterns:
new_line = pattern.sub(replacement, new_line)
if new_line != line:
used_patterns.add(pattern)
break
new_file.write(new_line)
lines = original_file.readlines()
for i, line in enumerate(lines):
for pattern, replacement in patterns:
new_line = pattern.sub(replacement, line)
if new_line != line:
lines[i] = new_line
used_patterns.add(pattern)
break

assert len(patterns) == len(used_patterns), (
'Not all patterns where used. Patterns not used: '
f'{set(x for x, _ in patterns) - used_patterns}'
)

return new_file_descriptor, new_file_path
with tempfile.NamedTemporaryFile(mode='w', suffix='-rewrite.ipynb', delete=False) as new_file:
new_file.writelines(lines)

return new_file.name


def remove_if_temporary_notebook(notebook_path: str):
"""Delete temporary notebook if written by `rewrite_notebook`.

Do nothing if the specified notebook is not in the temporary directory
or if its filename does not end in '-rewrite.ipynb'

Use this to safely clean up notebooks created by `rewrite_notebook`.
"""
if (
notebook_path.endswith('-rewrite.ipynb')
and os.path.isfile(notebook_path)
and os.path.dirname(notebook_path) == tempfile.gettempdir()
):
Copy link
Collaborator

@viathor viathor Mar 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implements the pattern wherein we make a decision (here: whether or not we need to rewrite a notebook into a temporary file) and then later try to work out what decision was taken to correlate earlier and later actions. This coding pattern is brittle since it's easy for the code to evolve in a way that breaks perfect correlation between the two decision points.

One way to solve this is to return a bool from rewrite_notebook indicating whether the file is temporary or not. This way we can ensure that the file is deleted if and only if it is temporary while also ensuring that the decision is made once.

If you really dislike two return values you could try to hide the bool in the type by e.g. returning an instance of tempfile.NamedTemporaryFile or a file object (they both have .name attribute I think...). This is fine too as long as it can be made to work (haven't tried).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback - per our offline talk, it is even simpler to have rewrite_notebook always create a temporary file which can be removed without any extra logic.

Done in 92477a7 and 0d5895f.

os.remove(notebook_path)
21 changes: 13 additions & 8 deletions dev_tools/notebooks/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,40 +37,43 @@ def write_test_data(ipynb_txt, tst_txt):
def test_rewrite_notebook():
directory, ipynb_path = write_test_data('d = 5\nd = 4', 'd = 5->d = 3')

descriptor, path = dt.rewrite_notebook(ipynb_path)
path = dt.rewrite_notebook(ipynb_path)

assert path != ipynb_path
with open(path, 'r') as f:
rewritten = f.read()
assert rewritten == 'd = 3\nd = 4'

os.close(descriptor)
dt.remove_if_temporary_notebook(path)
assert not os.path.exists(path)
shutil.rmtree(directory)


def test_rewrite_notebook_multiple():
directory, ipynb_path = write_test_data('d = 5\nd = 4', 'd = 5->d = 3\nd = 4->d = 1')

descriptor, path = dt.rewrite_notebook(ipynb_path)
path = dt.rewrite_notebook(ipynb_path)

with open(path, 'r') as f:
rewritten = f.read()
assert rewritten == 'd = 3\nd = 1'

os.close(descriptor)
dt.remove_if_temporary_notebook(path)
assert not os.path.exists(path)
shutil.rmtree(directory)


def test_rewrite_notebook_ignore_non_seperator_lines():
directory, ipynb_path = write_test_data('d = 5\nd = 4', 'd = 5->d = 3\n# comment')

descriptor, path = dt.rewrite_notebook(ipynb_path)
path = dt.rewrite_notebook(ipynb_path)

with open(path, 'r') as f:
rewritten = f.read()
assert rewritten == 'd = 3\nd = 4'

os.close(descriptor)
dt.remove_if_temporary_notebook(path)
assert not os.path.exists(path)
shutil.rmtree(directory)


Expand All @@ -80,10 +83,12 @@ def test_rewrite_notebook_no_tst_file():
with open(ipynb_path, 'w') as f:
f.write('d = 5\nd = 4')

descriptor, path = dt.rewrite_notebook(ipynb_path)
path = dt.rewrite_notebook(ipynb_path)

assert descriptor is None
# verify clean up is skipped when files are not rewritten
assert path == ipynb_path
dt.remove_if_temporary_notebook(path)
assert os.path.exists(path)

shutil.rmtree(directory)

Expand Down