New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Disable IPython History in executing preprocessor #1017
Changes from 6 commits
cb2ef94
e348ee1
e296eb5
53653cb
fb3780b
7b4a677
0c7d7ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -220,6 +220,20 @@ class ExecutePreprocessor(Preprocessor): | |
) | ||
).tag(config=True) | ||
|
||
ipython_hist_file = Unicode( | ||
default_value=':memory:', | ||
help="""Path to file to use for SQLite history database for an IPython kernel. | ||
|
||
The specific value `:memory:` (including the colon | ||
at both end but not the back ticks), avoids creating a history file. Otherwise, IPython | ||
will create a history file for each kernel. | ||
|
||
When running kernels simultaneously (e.g. via multiprocessing) saving history a single | ||
SQLite file can result in database errors, so using `:memory:` is recommended in non-interactive | ||
contexts. | ||
|
||
""").tag(config=True) | ||
|
||
kernel_manager_class = Type( | ||
config=True, | ||
help='The kernel manager class to use.' | ||
|
@@ -268,6 +282,8 @@ def start_new_kernel(self, **kwargs): | |
'kernelspec', {}).get('name', 'python') | ||
km = self.kernel_manager_class(kernel_name=self.kernel_name, | ||
config=self.config) | ||
if km.ipykernel and self.ipython_hist_file: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is If not, I'd prefer to know where this is being assigned. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a property of the Kernel Manager but it isn't documented. I used it to encapsulate the logic about using an |
||
self.extra_arguments += ['--HistoryManager.hist_file={}'.format(self.ipython_hist_file)] | ||
km.start_kernel(extra_arguments=self.extra_arguments, **kwargs) | ||
|
||
kc = km.client() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from IPython import get_ipython" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"scrolled": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"ip = get_ipython()\n", | ||
"assert ip.history_manager.hist_file == ':memory:'" | ||
] | ||
} | ||
], | ||
"metadata": {}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Ensure notebooks can execute in parallel\n", | ||
"\n", | ||
"This notebook uses a file system based \"lock\" to assert that two instances of the notebook kernel will run in parallel. Each instance writes to a file in a temporary directory, and then tries to read the other file from\n", | ||
"the temporary directory, so that running them in sequence will fail, but running them in parallel will succed.\n", | ||
"\n", | ||
"Two notebooks are launched, each with an injected cell which sets the `this_notebook` variable. One notebook is set to `this_notebook = 'A'` and the other `this_notebook = 'B'`." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import os.path\n", | ||
"import tempfile\n", | ||
"import time" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# the variable this_notebook is injectected in a cell above by the test framework.\n", | ||
"other_notebook = {'A':'B', 'B':'A'}[this_notebook]\n", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be good to add a comment about how this works with the A waiting for B file and visa versa There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also you probably want a comment that this_notebook is undefined and injected by the test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 I've added some comments to the notebook to explain a bit about how it works. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks |
||
"directory = os.environ['NBEXECUTE_TEST_PARALLEL_TMPDIR']\n", | ||
"with open(os.path.join(directory, 'test_file_{}.txt'.format(this_notebook)), 'w') as f:\n", | ||
" f.write('Hello from {}'.format(this_notebook))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"start = time.time()\n", | ||
"timeout = 5\n", | ||
"end = start + timeout\n", | ||
"target_file = os.path.join(directory, 'test_file_{}.txt'.format(other_notebook))\n", | ||
"while time.time() < end:\n", | ||
" time.sleep(0.1)\n", | ||
" if os.path.exists(target_file):\n", | ||
" with open(target_file, 'r') as f:\n", | ||
" text = f.read()\n", | ||
" if text == 'Hello from {}'.format(other_notebook):\n", | ||
" break\n", | ||
"else:\n", | ||
" assert False, \"Timed out – didn't get a message from {}\".format(other_notebook)" | ||
] | ||
} | ||
], | ||
"metadata": {}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -13,6 +13,8 @@ | |||||||||||||||||||||||||||||||||||||||||
import io | ||||||||||||||||||||||||||||||||||||||||||
import os | ||||||||||||||||||||||||||||||||||||||||||
import re | ||||||||||||||||||||||||||||||||||||||||||
import threading | ||||||||||||||||||||||||||||||||||||||||||
import multiprocessing as mp | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
import nbformat | ||||||||||||||||||||||||||||||||||||||||||
import sys | ||||||||||||||||||||||||||||||||||||||||||
|
@@ -69,14 +71,17 @@ def build_preprocessor(opts): | |||||||||||||||||||||||||||||||||||||||||
return preprocessor | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
def run_notebook(filename, opts, resources): | ||||||||||||||||||||||||||||||||||||||||||
def run_notebook(filename, opts, resources, preprocess_notebook=None): | ||||||||||||||||||||||||||||||||||||||||||
"""Loads and runs a notebook, returning both the version prior to | ||||||||||||||||||||||||||||||||||||||||||
running it and the version after running it. | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
""" | ||||||||||||||||||||||||||||||||||||||||||
with io.open(filename) as f: | ||||||||||||||||||||||||||||||||||||||||||
input_nb = nbformat.read(f, 4) | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
if preprocess_notebook: | ||||||||||||||||||||||||||||||||||||||||||
input_nb = preprocess_notebook(input_nb) | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
preprocessor = build_preprocessor(opts) | ||||||||||||||||||||||||||||||||||||||||||
cleaned_input_nb = copy.deepcopy(input_nb) | ||||||||||||||||||||||||||||||||||||||||||
for cell in cleaned_input_nb.cells: | ||||||||||||||||||||||||||||||||||||||||||
|
@@ -218,6 +223,13 @@ def assert_notebooks_equal(expected, actual): | |||||||||||||||||||||||||||||||||||||||||
actual_execution_count = actual_cell.get('execution_count', None) | ||||||||||||||||||||||||||||||||||||||||||
assert expected_execution_count == actual_execution_count | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
def notebook_resources(): | ||||||||||||||||||||||||||||||||||||||||||
"""Prepare a notebook resources dictionary for executing test notebooks in the `files` folder.""" | ||||||||||||||||||||||||||||||||||||||||||
res = ResourcesDict() | ||||||||||||||||||||||||||||||||||||||||||
res['metadata'] = ResourcesDict() | ||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to be a nested ResourcesDict()? I feel like we should have a convenience class for creating a ResourcesDict that is prepopulated a value for metadata rather than doing this. The pattern is fine for right now, but it would be useful to have a docstring describing the purpose of this new function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 I took this snippet from I'll add a quick docstring, to the effect of "prepares a notebook resources dictionary" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm fine with the test doing what it does and not blocking a merge on test helpers. It's not critical path code and it's readable for anyone who wants to tweak it in the future imo. |
||||||||||||||||||||||||||||||||||||||||||
res['metadata']['path'] = os.path.join(current_dir, 'files') | ||||||||||||||||||||||||||||||||||||||||||
return res | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
@pytest.mark.parametrize( | ||||||||||||||||||||||||||||||||||||||||||
["input_name", "opts"], | ||||||||||||||||||||||||||||||||||||||||||
|
@@ -238,18 +250,66 @@ def assert_notebooks_equal(expected, actual): | |||||||||||||||||||||||||||||||||||||||||
("Unicode.ipynb", dict(kernel_name="python")), | ||||||||||||||||||||||||||||||||||||||||||
("UnicodePy3.ipynb", dict(kernel_name="python")), | ||||||||||||||||||||||||||||||||||||||||||
("update-display-id.ipynb", dict(kernel_name="python")), | ||||||||||||||||||||||||||||||||||||||||||
("Check History in Memory.ipynb", dict(kernel_name="python")), | ||||||||||||||||||||||||||||||||||||||||||
] | ||||||||||||||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||||||||||||||
def test_run_all_notebooks(input_name, opts): | ||||||||||||||||||||||||||||||||||||||||||
"""Runs a series of test notebooks and compares them to their actual output""" | ||||||||||||||||||||||||||||||||||||||||||
input_file = os.path.join(current_dir, 'files', input_name) | ||||||||||||||||||||||||||||||||||||||||||
res = ResourcesDict() | ||||||||||||||||||||||||||||||||||||||||||
res['metadata'] = ResourcesDict() | ||||||||||||||||||||||||||||||||||||||||||
res['metadata']['path'] = os.path.join(current_dir, 'files') | ||||||||||||||||||||||||||||||||||||||||||
input_nb, output_nb = run_notebook(input_file, opts, res) | ||||||||||||||||||||||||||||||||||||||||||
input_nb, output_nb = run_notebook(input_file, opts, notebook_resources()) | ||||||||||||||||||||||||||||||||||||||||||
assert_notebooks_equal(input_nb, output_nb) | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
def label_parallel_notebook(nb, label): | ||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this would more idiomatically be treated as a custom preprocessor. You would pass in via resources the label value and then overwrite the nbconvert/nbconvert/preprocessors/base.py Lines 51 to 70 in 8af6aac
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense – do you think that is worthwhile for this set of tests? I was hesitant to re-implement papermill's templating here, but happy to do so if that makes it more maintainable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I share that hesitation just for testing purposes. |
||||||||||||||||||||||||||||||||||||||||||
"""Insert a cell in a notebook which sets the variable `this_notebook` to the string `label`. | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
Used for parallel testing to label two notebooks which are run simultaneously. | ||||||||||||||||||||||||||||||||||||||||||
""" | ||||||||||||||||||||||||||||||||||||||||||
label_cell = nbformat.NotebookNode( | ||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this would be cleaner if it used |
||||||||||||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||||||||||||
"cell_type": "code", | ||||||||||||||||||||||||||||||||||||||||||
"execution_count": None, | ||||||||||||||||||||||||||||||||||||||||||
"metadata": {}, | ||||||||||||||||||||||||||||||||||||||||||
"outputs": [], | ||||||||||||||||||||||||||||||||||||||||||
"source": "this_notebook = '{}'".format(label), | ||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
nb.cells.insert(1, label_cell) | ||||||||||||||||||||||||||||||||||||||||||
return nb | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
def test_parallel_notebooks(capfd, tmpdir): | ||||||||||||||||||||||||||||||||||||||||||
"""Two notebooks should be able to be run simultaneously without problems. | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
The two notebooks spawned here use the filesystem to check that the other notebook | ||||||||||||||||||||||||||||||||||||||||||
wrote to the filesystem.""" | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
opts = dict(kernel_name="python") | ||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you end up needing to overwrite the kernel? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what part of this you are thinking about here? Kernels (and zmq.Context, the underlying culprit) are thread-safe, but not process safe – I am working on some other tests which demonstrate and then fix that for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant that this link is replacing the kernel from the notebook, and was curious if the line was actually needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, yes, I remember discussing. It is probably unnecessary, I’ll double check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, it was redundant. To better match the other tests here, I removed the kernel spec from the notebook file. Is that okay with you? It might make it run on python2, but I think this should work on both python2 and python3. Though my understanding kernel names is admittedly weak! |
||||||||||||||||||||||||||||||||||||||||||
input_name = "Parallel Execute.ipynb" | ||||||||||||||||||||||||||||||||||||||||||
input_file = os.path.join(current_dir, "files", input_name) | ||||||||||||||||||||||||||||||||||||||||||
res = notebook_resources() | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
with modified_env({"NBEXECUTE_TEST_PARALLEL_TMPDIR": str(tmpdir)}): | ||||||||||||||||||||||||||||||||||||||||||
threads = [ | ||||||||||||||||||||||||||||||||||||||||||
threading.Thread( | ||||||||||||||||||||||||||||||||||||||||||
target=run_notebook, | ||||||||||||||||||||||||||||||||||||||||||
args=( | ||||||||||||||||||||||||||||||||||||||||||
input_file, | ||||||||||||||||||||||||||||||||||||||||||
opts, | ||||||||||||||||||||||||||||||||||||||||||
res, | ||||||||||||||||||||||||||||||||||||||||||
functools.partial(label_parallel_notebook, label=label), | ||||||||||||||||||||||||||||||||||||||||||
), | ||||||||||||||||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||||||||||||||||
for label in ("A", "B") | ||||||||||||||||||||||||||||||||||||||||||
] | ||||||||||||||||||||||||||||||||||||||||||
[t.start() for t in threads] | ||||||||||||||||||||||||||||||||||||||||||
[t.join(timeout=2) for t in threads] | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
captured = capfd.readouterr() | ||||||||||||||||||||||||||||||||||||||||||
assert captured.err == "" | ||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||
class TestExecute(PreprocessorTestsBase): | ||||||||||||||||||||||||||||||||||||||||||
"""Contains test functions for execute.py""" | ||||||||||||||||||||||||||||||||||||||||||
maxDiff = None | ||||||||||||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍 We should have probably had this default a long time ago regardless of parallelism concerns.