Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into windows-compatabili…
Browse files Browse the repository at this point in the history
…ty-test_utils
  • Loading branch information
rohitsanj committed Aug 12, 2022
2 parents 0137c57 + a3f530e commit daa9419
Show file tree
Hide file tree
Showing 8 changed files with 277 additions and 60 deletions.
22 changes: 20 additions & 2 deletions papermill/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .exceptions import PapermillException
from .clientwrap import PapermillNotebookClient
from .iorw import write_ipynb
from .utils import merge_kwargs, remove_args
from .utils import merge_kwargs, remove_args, nb_kernel_name, nb_language


class PapermillEngines(object):
Expand Down Expand Up @@ -48,6 +48,14 @@ def execute_notebook_with_engine(self, engine_name, nb, kernel_name, **kwargs):
"""Fetch a named engine and execute the nb object against it."""
return self.get_engine(engine_name).execute_notebook(nb, kernel_name, **kwargs)

def nb_kernel_name(self, engine_name, nb, name=None):
"""Fetch kernel name from the document by dropping-down into the provided engine."""
return self.get_engine(engine_name).nb_kernel_name(nb, name)

def nb_language(self, engine_name, nb, language=None):
"""Fetch language from the document by dropping-down into the provided engine."""
return self.get_engine(engine_name).nb_language(nb, language)


def catch_nb_assignment(func):
"""
Expand Down Expand Up @@ -368,6 +376,16 @@ def execute_managed_notebook(cls, nb_man, kernel_name, **kwargs):
"""An abstract method where implementation will be defined in a subclass."""
raise NotImplementedError("'execute_managed_notebook' is not implemented for this engine")

@classmethod
def nb_kernel_name(cls, nb, name=None):
"""Use default implementation to fetch kernel name from the notebook object"""
return nb_kernel_name(nb, name)

@classmethod
def nb_language(cls, nb, language=None):
"""Use default implementation to fetch programming language from the notebook object"""
return nb_language(nb, language)


class NBClientEngine(Engine):
"""
Expand All @@ -393,7 +411,7 @@ def execute_managed_notebook(
Performs the actual execution of the parameterized notebook locally.
Args:
nb (NotebookNode): Executable notebook object.
nb_man (NotebookExecutionManager): Wrapper for execution state of a notebook.
kernel_name (str): Name of kernel to execute the notebook against.
log_output (bool): Flag for whether or not to write notebook output to the
configured logger.
Expand Down
19 changes: 13 additions & 6 deletions papermill/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .exceptions import PapermillExecutionError
from .iorw import get_pretty_path, local_file_io_cwd, load_notebook_node, write_ipynb
from .engines import papermill_engines
from .utils import chdir, nb_kernel_name
from .utils import chdir
from .parameterize import add_builtin_parameters, parameterize_notebook, parameterize_path


Expand All @@ -34,8 +34,8 @@ def execute_notebook(
Parameters
----------
input_path : str or Path
Path to input notebook
input_path : str or Path or nbformat.NotebookNode
Path to input notebook or NotebookNode object of notebook
output_path : str or Path or None
Path to save executed notebook. If None, no file will be saved
parameters : dict, optional
Expand Down Expand Up @@ -92,16 +92,23 @@ def execute_notebook(
# Parameterize the Notebook.
if parameters:
nb = parameterize_notebook(
nb, parameters, report_mode, kernel_name=kernel_name, language=language
nb,
parameters,
report_mode,
kernel_name=kernel_name,
language=language,
engine_name=engine_name,
)

nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode)
# clear out any existing error markers from previous papermill runs
nb = remove_error_markers(nb)

if not prepare_only:
# Fetch out the name from the notebook document
kernel_name = nb_kernel_name(nb, kernel_name)
# Dropdown to the engine to fetch the kernel name from the notebook document
kernel_name = papermill_engines.nb_kernel_name(
engine_name=engine_name, nb=nb, name=kernel_name
)
# Execute the Notebook in `cwd` if it is set
with chdir(cwd):
nb = papermill_engines.execute_notebook_with_engine(
Expand Down
112 changes: 73 additions & 39 deletions papermill/iorw.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,50 +96,14 @@ def __init__(self):
self.reset()

def read(self, path, extensions=['.ipynb', '.json']):
if path == '-':
return sys.stdin.read()

if not fnmatch.fnmatch(os.path.basename(path).split('?')[0], '*.*'):
warnings.warn(
"the file is not specified with any extension : " + os.path.basename(path)
)
elif not any(
fnmatch.fnmatch(os.path.basename(path).split('?')[0], '*' + ext) for ext in extensions
):
warnings.warn(
"The specified input file ({}) does not end in one of {}".format(path, extensions)
)
# Handle https://github.com/nteract/papermill/issues/317
notebook_metadata = self.get_handler(path).read(path)
notebook_metadata = self.get_handler(path, extensions).read(path)
if isinstance(notebook_metadata, (bytes, bytearray)):
return notebook_metadata.decode('utf-8')
return notebook_metadata

def write(self, buf, path, extensions=['.ipynb', '.json']):
if path is None:
return
if path == '-':
try:
return sys.stdout.buffer.write(buf.encode('utf-8'))
except AttributeError:
# Originally required by https://github.com/nteract/papermill/issues/420
# Support Buffer.io objects
return sys.stdout.write(buf.encode('utf-8'))

return sys.stdout.buffer.write(buf.encode('utf-8'))

# Usually no return object here
if not fnmatch.fnmatch(os.path.basename(path).split('?')[0], '*.*'):
warnings.warn(
"the file is not specified with any extension : " + os.path.basename(path)
)
elif not any(
fnmatch.fnmatch(os.path.basename(path).split('?')[0], '*' + ext) for ext in extensions
):
warnings.warn(
"The specified output file ({}) does not end in one of {}".format(path, extensions)
)
return self.get_handler(path).write(buf, path)
return self.get_handler(path, extensions).write(buf, path)

def listdir(self, path):
return self.get_handler(path).listdir(path)
Expand All @@ -159,10 +123,44 @@ def register_entry_points(self):
for entrypoint in entrypoints.get_group_all("papermill.io"):
self.register(entrypoint.name, entrypoint.load())

def get_handler(self, path):
def get_handler(self, path, extensions=None):
'''Get I/O Handler based on a notebook path
Parameters
----------
path : str or nbformat.NotebookNode or None
extensions : list of str, optional
Required file extension options for the path (if path is a string), which
will log a warning if there is no match. Defaults to None, which does not
check for any extensions
Raises
------
PapermillException: If a valid I/O handler could not be found for the input path
Returns
-------
I/O Handler
'''
if path is None:
return NoIOHandler()

if isinstance(path, nbformat.NotebookNode):
return NotebookNodeHandler()

if extensions:
if not fnmatch.fnmatch(os.path.basename(path).split('?')[0], '*.*'):
warnings.warn(
"the file is not specified with any extension : " + os.path.basename(path)
)
elif not any(
fnmatch.fnmatch(os.path.basename(path).split('?')[0], '*' + ext)
for ext in extensions
):
warnings.warn(
"The specified file ({}) does not end in one of {}".format(path, extensions)
)

local_handler = None
for scheme, handler in self._handlers:
if scheme == 'local':
Expand Down Expand Up @@ -411,6 +409,41 @@ def pretty_path(self, path):
return path


class StreamHandler(object):
'''Handler for Stdin/Stdout streams'''
def read(self, path):
return sys.stdin.read()

def listdir(self, path):
raise PapermillException('listdir is not supported by Stream Handler')

def write(self, buf, path):
try:
return sys.stdout.buffer.write(buf.encode('utf-8'))
except AttributeError:
# Originally required by https://github.com/nteract/papermill/issues/420
# Support Buffer.io objects
return sys.stdout.write(buf.encode('utf-8'))

def pretty_path(self, path):
return path


class NotebookNodeHandler(object):
'''Handler for input_path of nbformat.NotebookNode object'''
def read(self, path):
return nbformat.writes(path)

def listdir(self, path):
raise PapermillException('listdir is not supported by NotebookNode Handler')

def write(self, buf, path):
raise PapermillException('write is not supported by NotebookNode Handler')

def pretty_path(self, path):
return 'NotebookNode object'


class NoIOHandler(object):
'''Handler for output_path of None - intended to not write anything'''

Expand Down Expand Up @@ -448,6 +481,7 @@ class NoDatesSafeLoader(yaml.SafeLoader):
papermill_io.register("hdfs://", HDFSHandler())
papermill_io.register("http://github.com/", GithubHandler())
papermill_io.register("https://github.com/", GithubHandler())
papermill_io.register("-", StreamHandler())
papermill_io.register_entry_points()


Expand Down
26 changes: 17 additions & 9 deletions papermill/parameterize.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import copy
import nbformat

from .engines import papermill_engines
from .log import logger
from .exceptions import PapermillMissingParameterException
from .iorw import read_yaml_file
from .translators import translate_parameters
from .utils import find_first_tagged_cell_index, nb_kernel_name, nb_language
from .utils import find_first_tagged_cell_index

from uuid import uuid4
from datetime import datetime
Expand Down Expand Up @@ -38,13 +39,14 @@ def parameterize_path(path, parameters):
Parameters
----------
path : string or None
Path with optional parameters, as a python format string
path : string or nbformat.NotebookNode or None
Path with optional parameters, as a python format string. If path is a NotebookNode
or None, the path is returned without modification
parameters : dict or None
Arbitrary keyword arguments to fill in the path
"""
if path is None:
return
if path is None or isinstance(path, nbformat.NotebookNode):
return path

if parameters is None:
parameters = {}
Expand All @@ -56,7 +58,13 @@ def parameterize_path(path, parameters):


def parameterize_notebook(
nb, parameters, report_mode=False, comment='Parameters', kernel_name=None, language=None
nb,
parameters,
report_mode=False,
comment='Parameters',
kernel_name=None,
language=None,
engine_name=None,
):
"""Assigned parameters into the appropriate place in the input notebook
Expand All @@ -78,9 +86,9 @@ def parameterize_notebook(
# Copy the nb object to avoid polluting the input
nb = copy.deepcopy(nb)

# Fetch out the name and language from the notebook document
kernel_name = nb_kernel_name(nb, kernel_name)
language = nb_language(nb, language)
# Fetch out the name and language from the notebook document by dropping-down into the engine's implementation
kernel_name = papermill_engines.nb_kernel_name(engine_name, nb, kernel_name)
language = papermill_engines.nb_language(engine_name, nb, language)

# Generate parameter content based on the kernel_name
param_content = translate_parameters(kernel_name, language, parameters, comment)
Expand Down
20 changes: 20 additions & 0 deletions papermill/tests/notebooks/test_notebooknode_io.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": ["print('Hello World')"]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit daa9419

Please sign in to comment.