Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for kernel launch parameters #22

Merged
merged 1 commit into from
Aug 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions jupyter_kernel_mgmt/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def find_kernels(self):
pass

@abstractmethod
def launch(self, name, cwd=None):
def launch(self, name, cwd=None, launch_params=None):
"""Launch a kernel, return (connection_info, kernel_manager).

name will be one of the kernel names produced by find_kernels()
Expand All @@ -34,7 +34,7 @@ def launch(self, name, cwd=None):
"""
pass

def launch_async(self, name, cwd=None):
def launch_async(self, name, cwd=None, launch_params=None):
"""Launch a kernel asynchronously using asyncio.

name will be one of the kernel names produced by find_kernels()
Expand Down Expand Up @@ -80,16 +80,17 @@ def find_kernels(self):
'metadata': spec.metadata,
}

def launch(self, name, cwd=None):
def launch(self, name, cwd=None, launch_params=None):
spec = self.ksm.get_kernel_spec(name)
launcher = SubprocessKernelLauncher(kernel_cmd=spec.argv, extra_env=spec.env, cwd=cwd)
launcher = SubprocessKernelLauncher(kernel_cmd=spec.argv, extra_env=spec.env, cwd=cwd,
launch_params=launch_params)
return launcher.launch()

def launch_async(self, name, cwd=None):
def launch_async(self, name, cwd=None, launch_params=None):
from .subproc.async_manager import AsyncSubprocessKernelLauncher
spec = self.ksm.get_kernel_spec(name)
return AsyncSubprocessKernelLauncher(
kernel_cmd=spec.argv, extra_env=spec.env, cwd=cwd).launch()
kernel_cmd=spec.argv, extra_env=spec.env, cwd=cwd, launch_params=launch_params).launch()


class IPykernelProvider(KernelProviderBase):
Expand Down Expand Up @@ -123,22 +124,22 @@ def find_kernels(self):
'resource_dir': info['resource_dir'],
}

def launch(self, name, cwd=None):
def launch(self, name, cwd=None, launch_params=None):
info = self._check_for_kernel()
if info is None:
raise Exception("ipykernel is not importable")

launcher = SubprocessKernelLauncher(kernel_cmd=info['spec']['argv'],
extra_env={}, cwd=cwd)
extra_env={}, cwd=cwd, launch_params=launch_params)
return launcher.launch()

def launch_async(self, name, cwd=None):
def launch_async(self, name, cwd=None, launch_params=None):
from .subproc.async_manager import AsyncSubprocessKernelLauncher
info = self._check_for_kernel()
if info is None:
raise Exception("ipykernel is not importable")
return AsyncSubprocessKernelLauncher(
kernel_cmd=info['spec']['argv'], extra_env={}, cwd=cwd).launch()
kernel_cmd=info['spec']['argv'], extra_env={}, cwd=cwd, launch_params=launch_params).launch()


class KernelFinder(object):
Expand Down Expand Up @@ -188,22 +189,22 @@ def find_kernels(self):
kernel_type = provider.id + '/' + kernel_name
yield kernel_type, attributes

def launch(self, name, cwd=None):
def launch(self, name, cwd=None, launch_params=None):
"""Launch a kernel of a given kernel type.
"""
provider_id, kernel_id = name.split('/', 1)
for provider in self.providers:
if provider_id == provider.id:
return provider.launch(kernel_id, cwd)
return provider.launch(kernel_id, cwd=cwd, launch_params=launch_params)
raise KeyError(provider_id)

def launch_async(self, name, cwd=None):
def launch_async(self, name, cwd=None, launch_params=None):
"""Launch a kernel of a given kernel type, using asyncio.
"""
provider_id, kernel_id = name.split('/', 1)
for provider in self.providers:
if provider_id == provider.id:
return provider.launch_async(kernel_id, cwd)
return provider.launch_async(kernel_id, cwd=cwd, launch_params=launch_params)
raise KeyError(provider_id)


Expand Down
27 changes: 20 additions & 7 deletions jupyter_kernel_mgmt/subproc/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
port_names = ['shell_port', 'iopub_port', 'stdin_port', 'control_port',
'hb_port']


class SubprocessKernelLauncher:
"""Launch kernels in a subprocess.

Expand All @@ -44,13 +45,14 @@ class SubprocessKernelLauncher:
"""
transport = 'tcp'

def __init__(self, kernel_cmd, cwd, extra_env=None, ip=None):
def __init__(self, kernel_cmd, cwd, extra_env=None, ip=None, launch_params=None):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably say this level of the API should have specific kwargs which the provider is responsible for extracting from the generic launch_params. E.g. this could be called argv_format_args or something.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I first read this, I figured you were stating that these three keyword parameters could be collapsed into **kwargs. But now that I look closer, it seems like your comment really applies to the code that constructs the SubprocessKernelLauncher instance, since that would be location in which argv-only parameters could be extracted into argv_format_args. Or are you saying that this method would have additional code that pulls the argv-only parameters into argv_format_args, for example?

I don't mind replacing this single parameter with **kwargs, but I'd still need to add code that pulls launch_params out of kwargs making those values available to the format code until the metadata definition is formalized. Or update the callers to use a different keyword.

Could you please clarify what you mean? Sorry.

self.kernel_cmd = kernel_cmd
self.cwd = cwd
self.extra_env = extra_env
if ip is None:
ip = localhost()
self.ip = ip
self.launch_params = launch_params
self.log = get_app_logger()

if self.transport == 'tcp' and not is_local_ip(ip):
Expand Down Expand Up @@ -138,18 +140,29 @@ def format_kernel_cmd(self, connection_file, kernel_resource_dir=None):
# but it should be.
cmd[0] = sys.executable

ns = dict(connection_file=connection_file,
# Preserve system-owned substitutions by starting with launch params
ns = dict()
if isinstance(self.launch_params, dict):
ns.update(self.launch_params)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to drop all launch_params into this namespace, or have a sub-dict for that and leave room in case there's a need for other kinds of parameters in the future?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I see your point relative to another area I was thinking about - environment variables - where the metadata also includes a list of supported environment variables. We could then maintain separate dicts. That said, having everything in the same namespace is only an issue if there are conflicts between env names and launch args and I'd be okay with stating that those spaces are shared.

I think we could keep a single set of parameters by adding meta-property indicating their expected usages. For example, a context meta-property could be included that has one of a set of values like {'env', 'argv', 'config'}. The presentation layer could choose to use the context to split how it prompts for the values or not - its up to them. The provider could decompose list into multiple sets as it chooses.

I think this comes down to extensibility and my feeling is it would be better to extend the schema with an additional context type vs. supporting a varying number of sub-dicts - where each sub-dict infers its context.

That said, my preference toward a metadata approach is just slightly stronger.


# Add system-owned substitutions
ns.update(dict(connection_file=connection_file,
prefix=sys.prefix,
)
))

if kernel_resource_dir:
ns["resource_dir"] = kernel_resource_dir

pat = re.compile(r'{([A-Za-z0-9_]+)}')

def from_ns(match):
"""Get the key out of ns if it's there, otherwise no change."""
return ns.get(match.group(1), match.group())
"""Get the key out of ns if it's there, otherwise no change.
Return as string since that's what is required by pattern
matching. We know this should be safe currently, because
only 'connection_file', 'sys.prefix' and 'resource_dir' are
candidates - all of which are strings.
"""
return str(ns.get(match.group(1), match.group()))

return [pat.sub(from_ns, arg) for arg in cmd]

Expand Down Expand Up @@ -310,12 +323,12 @@ def prepare_interrupt_event(env, interrupt_event=None):
env["IPY_INTERRUPT_EVENT"] = env["JPY_INTERRUPT_EVENT"]
return interrupt_event

def start_new_kernel(kernel_cmd, startup_timeout=60, cwd=None):
def start_new_kernel(kernel_cmd, startup_timeout=60, cwd=None, launch_params=None):
"""Start a new kernel, and return its Manager and a blocking client"""
from ..client import BlockingKernelClient
cwd = cwd or os.getcwd()

launcher = SubprocessKernelLauncher(kernel_cmd, cwd=cwd)
launcher = SubprocessKernelLauncher(kernel_cmd, cwd=cwd, launch_params=launch_params)
connection_info, km = launcher.launch()
kc = BlockingKernelClient(connection_info, manager=km)
try:
Expand Down
108 changes: 95 additions & 13 deletions jupyter_kernel_mgmt/tests/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ class DummyKernelProvider(discovery.KernelProviderBase):
def find_kernels(self):
yield 'sample', {'argv': ['dummy_kernel']}

def launch(self, name, cwd=None):
def launch(self, name, cwd=None, launch_params=None):
return {}, DummyKernelManager()

def launch_async(self, name, cwd=None):
def launch_async(self, name, cwd=None, launch_params=None):
pass


Expand All @@ -33,10 +33,18 @@ class DummyKernelSpecProvider(discovery.KernelSpecProvider):

# find_kernels() is inherited from KernelsSpecProvider

def launch(self, name, cwd=None):
def launch(self, name, cwd=None, launch_params=None):
return {}, DummyKernelManager()


class LaunchParamsKernelProvider(discovery.KernelSpecProvider):
"""A dummy kernelspec provider subclass for testing KernelFinder and KernelSpecProvider subclasses"""
id = 'params_kspec'
kernel_file = 'params_kspec.json'

# find_kernels() and launch() are inherited from KernelsSpecProvider


class DummyKernelManager(KernelManagerABC):
_alive = True

Expand All @@ -60,10 +68,6 @@ def interrupt(self):
def kill(self):
self._alive = False

def get_connection_info(self):
"""Return a dictionary of connection information"""
return {}


class ProviderApplication(Application):
name = 'ProviderApplication'
Expand Down Expand Up @@ -99,12 +103,38 @@ class KernelDiscoveryTests(unittest.TestCase):
def setUp(self):
self.env_patch = test_env()
self.env_patch.start()
self.sample_kernel_dir = install_sample_kernel(
pjoin(paths.jupyter_data_dir(), 'kernels'))
self.prov_sample1_kernel_dir = install_sample_kernel(
pjoin(paths.jupyter_data_dir(), 'kernels'), 'dummy_kspec1', 'dummy_kspec.json')
self.prov_sample2_kernel_dir = install_sample_kernel(
pjoin(paths.jupyter_data_dir(), 'kernels'), 'dummy_kspec2', 'dummy_kspec.json')
install_sample_kernel(pjoin(paths.jupyter_data_dir(), 'kernels'))
install_sample_kernel(pjoin(paths.jupyter_data_dir(), 'kernels'), 'dummy_kspec1', 'dummy_kspec.json')
install_sample_kernel(pjoin(paths.jupyter_data_dir(), 'kernels'), 'dummy_kspec2', 'dummy_kspec.json')

# This provides an example of what a kernel provider might do for describing the launch parameters
# it supports. By creating the metadata in the form of JSON schema, applications can easily build
# forms that gather the values.
# Note that not all parameters are fed to `argv`. Some may be used by the provider
# to configure an environment (e.g., a kubernetes pod) in which the kernel will run. The idea
# is that the front-end will get the parameter metadata, consume and prompt for values, and return
# the launch_parameters (name, value pairs) in the kernel startup POST json body, which then
# gets passed into the kernel provider's launch method.
#
# See test_kernel_launch_params() for usage.

params_json = {'argv': ['tail', '{follow}', '-n {line_count}', '{connection_file}'],
'display_name': 'Test kernel',
'metadata': {
'launch_parameter_schema': {
"title": "Params_kspec Kernel Provider Launch Parameter Schema",
"properties": {
"line_count": {"type": "integer", "minimum": 1, "default": 20, "description": "The number of lines to tail"},
"follow": {"type": "string", "enum": ["-f", "-F"], "default": "-f", "description": "The follow option to tail"},
"cpus": {"type": "number", "minimum": 0.5, "maximum": 8.0, "default": 4.0, "description": "The number of CPUs to use for this kernel"},
"memory": {"type": "integer", "minimum": 2, "maximum": 1024, "default": 8, "description": "The number of GB to reserve for memory for this kernel"}
},
"required": ["line_count", "follow"]
}
}
}
install_sample_kernel(pjoin(paths.jupyter_data_dir(), 'kernels'), 'params_kspec', 'params_kspec.json',
kernel_json=params_json)

def tearDown(self):
self.env_patch.stop()
Expand Down Expand Up @@ -165,6 +195,58 @@ def test_kernel_spec_provider_subclass():

conn_info, manager = kf.launch('dummy_kspec/dummy_kspec1')
assert isinstance(manager, DummyKernelManager)
manager.kill() # no process was started, so this is only for completeness

@staticmethod
def test_kernel_launch_params():
kf = discovery.KernelFinder(providers=[LaunchParamsKernelProvider()])

kspecs = list(kf.find_kernels())

count = 0
param_spec = None
for name, spec in kspecs:
if name == 'params_kspec/params_kspec':
param_spec = spec
count += 1

assert count == 1
assert param_spec['argv'] == ['tail', '{follow}', '-n {line_count}', '{connection_file}']

# application gathers launch parameters here... Since this is full schema, application will likely
# just access: param_spec['metadata']['launch_parameter_schema']
#
line_count_schema = param_spec['metadata']['launch_parameter_schema']['properties']['line_count']
follow_schema = param_spec['metadata']['launch_parameter_schema']['properties']['follow']
cpus_schema = param_spec['metadata']['launch_parameter_schema']['properties']['cpus']
memory_schema = param_spec['metadata']['launch_parameter_schema']['properties']['memory']

# validate we have our metadata
assert line_count_schema['minimum'] == 1
assert follow_schema['default'] == '-f'
assert cpus_schema['maximum'] == 8.0
assert memory_schema['description'] == "The number of GB to reserve for memory for this kernel"

# Kernel provider would be responsible for validating values against the schema upon return from client.
# This includes setting any default values for parameters that were not included, etc. The following
# simulates the parameter gathering...
launch_params = dict()
launch_params['follow'] = follow_schema['enum'][0]
launch_params['line_count'] = 8
launch_params['cpus'] = cpus_schema['default']
# add a "system-owned" parameter - connection_file - ensure this value is NOT substituted.
launch_params['connection_file'] = 'bad_param'

conn_info, manager = kf.launch('params_kspec/params_kspec', launch_params=launch_params)
assert isinstance(manager, KernelManager)

# confirm argv substitutions
assert manager.kernel.args[1] == '-f'
assert manager.kernel.args[2] == '-n 8'
assert manager.kernel.args[3] != 'bad_param'

# this actually starts a tail -f command, so let's make sure its terminated
manager.kill()

def test_load_config(self):
# create fake application
Expand Down
6 changes: 4 additions & 2 deletions jupyter_kernel_mgmt/tests/test_kernelspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Distributed under the terms of the Modified BSD License.

import io
import copy
import json
from logging import StreamHandler
import os
Expand All @@ -27,16 +28,17 @@

sample_kernel_json = {'argv':['cat', '{connection_file}'],
'display_name':'Test kernel',
'metadata': {}
}


def install_sample_kernel(kernels_dir, kernel_name='sample', kernel_file='kernel.json'):
def install_sample_kernel(kernels_dir, kernel_name='sample', kernel_file='kernel.json', kernel_json=sample_kernel_json):
"""install a sample kernel in a kernels directory"""
sample_kernel_dir = pjoin(kernels_dir, kernel_name)
os.makedirs(sample_kernel_dir)
json_file = pjoin(sample_kernel_dir, kernel_file)
with open(json_file, 'w') as f:
json.dump(sample_kernel_json, f)
json.dump(kernel_json, f)
return sample_kernel_dir


Expand Down