Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

General cleanup take #2 #30

Merged
merged 5 commits into from
May 11, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 24 additions & 18 deletions spylon_kernel/scala_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
DEFAULT_APPLICATION_NAME = "spylon-kernel"


def init_spark(conf: spylon.spark.SparkConfiguration=None,
application_name: str=None):
def init_spark(conf=None, application_name=None):
"""Initializes a SparkSession

Parameters
Expand Down Expand Up @@ -130,17 +129,18 @@ def initialize_scala_interpreter():

Returns
-------
SparkInterpreter
ScalaInterpreter
"""
# Initialize Spark first if it isn't already
spark_session, spark_jvm_helpers = init_spark()

#
# Get handy JVM references
jvm = spark_session._jvm
jconf = spark_session._jsc.getConf()
io = jvm.java.io

#
# Build a print writer that'll be used to get results from the
# Scala REPL
bytes_out = jvm.org.apache.commons.io.output.ByteArrayOutputStream()
jprint_writer = io.PrintWriter(bytes_out, True)

Expand Down Expand Up @@ -196,7 +196,7 @@ def initialize_scala_interpreter():
# Clear the print writer stream
bytes_out.reset()

return SparkInterpreter(jvm, intp, bytes_out)
return ScalaInterpreter(jvm, intp, bytes_out)


def _scala_seq_to_py(jseq):
Expand All @@ -223,7 +223,7 @@ def __init__(self, scala_message, *args, **kwargs):
self.scala_message = scala_message


class SparkInterpreter(object):
class ScalaInterpreter(object):
"""Wrapper for a Scala interpreter.

Notes
Expand Down Expand Up @@ -282,7 +282,7 @@ def __init__(self, jvm, jimain, jbyteout, loop: Union[None, asyncio.AbstractEven
self._stderr_handlers = []
self._initialize_stdout_err(tempdir)

def register_stdout_handler(self, handler: Callable[[Any], None]):
def register_stdout_handler(self, handler):
"""Registers a handler for the Scala stdout stream.

Parameters
Expand All @@ -292,7 +292,7 @@ def register_stdout_handler(self, handler: Callable[[Any], None]):
"""
self._stdout_handlers.append(handler)

def register_stderr_handler(self, handler: Callable[[Any], None]):
def register_stderr_handler(self, handler):
"""Registers a handler for the Scala stderr stream.

Parameters
Expand Down Expand Up @@ -328,7 +328,7 @@ def _initialize_stdout_err(self, tempdir):
self.loop.create_task(self._poll_file(stdout_file, self.handle_stdout))
self.loop.create_task(self._poll_file(stderr_file, self.handle_stderr))

def handle_stdout(self, line) -> None:
def handle_stdout(self, line):
"""Passes a line of Scala stdout to registered handlers.

Parameters
Expand All @@ -339,7 +339,7 @@ def handle_stdout(self, line) -> None:
for handler in self._stdout_handlers:
handler(line)

def handle_stderr(self, line) -> None:
def handle_stderr(self, line):
"""Passes a line of Scala stderr to registered handlers.

Parameters
Expand All @@ -350,7 +350,7 @@ def handle_stderr(self, line) -> None:
for handler in self._stderr_handlers:
handler(line)

async def _poll_file(self, filename: str, fn: Callable[[Any], None]):
async def _poll_file(self, filename, fn):
"""Busy-polls a file for lines of text and passes them to
the provided callback function when available.

Expand All @@ -370,7 +370,7 @@ async def _poll_file(self, filename: str, fn: Callable[[Any], None]):
else:
await asyncio.sleep(0.01, loop=self.loop)

def _interpret_sync(self, code: str, synthetic=False):
def _interpret_sync(self, code, synthetic=False):
"""Synchronously interprets a Block of scala code and returns the
string output from the Scala REPL.

Expand Down Expand Up @@ -412,7 +412,7 @@ def _interpret_sync(self, code: str, synthetic=False):
finally:
self.jbyteout.reset()

async def _interpret_async(self, code: str, future: Future):
async def _interpret_async(self, code, future):
"""Asynchronously interprets a block of Scala code and sets the
output or exception as the result of the future.

Expand All @@ -429,7 +429,7 @@ async def _interpret_async(self, code: str, future: Future):
except Exception as e:
future.set_exception(e)

def interpret(self, code: str):
def interpret(self, code):
"""Interprets a block of Scala code.

Follow this with a call `last_result` to retrieve the result as a
Expand Down Expand Up @@ -470,7 +470,7 @@ def last_result(self):
res = lr.lineRep().call("$result", spark_state.spark_jvm_helpers.to_scala_list([]))
return res

def bind(self, name: str, value: Any, jtyp: str="Any"):
def bind(self, name, value, jtyp="Any"):
"""Set a variable in the Scala REPL to a Python valued type.

Parameters
Expand Down Expand Up @@ -499,12 +499,18 @@ def bind(self, name: str, value: Any, jtyp: str="Any"):

@property
def jcompleter(self):
"""Scala code completer.

Returns
-------
scala.tools.nsc.interpreter.PresentationCompilerCompleter

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks almost fully qualified. Should this be: py4j.java_gateway.JVMView. scala.tools.nsc.interpreter.PresentationCompilerCompleter ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scala.tools.nsc.interpreter.PresentationCompilerCompleter is the Scala type. py4j.java_gateway.JVMView is a made up namespace which allows Python code to reference them I believe. There is no real Python module+type py4j.java_gateway.JVMView. scala.tools.nsc.interpreter.PresentationCompilerCompleter.

What to do, what to do ...

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah alright well fair enough then. Made up object paths, yey

"""
if self._jcompleter is None:
jClass = self.jvm.scala.tools.nsc.interpreter.PresentationCompilerCompleter
self._jcompleter = jClass(self.jimain)
return self._jcompleter

def complete(self, code: str, pos: int) -> List[str]:
def complete(self, code, pos):
"""Performs code completion for a block of Scala code.

Parameters
Expand Down Expand Up @@ -582,7 +588,7 @@ def get_scala_interpreter():

Returns
-------
scala_intp : SparkInterpreter
scala_intp : ScalaInterpreter
"""
global scala_intp
if scala_intp is None:
Expand Down
136 changes: 117 additions & 19 deletions spylon_kernel/scala_kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@

from metakernel import MetaKernel
from .init_spark_magic import InitSparkMagic
from .scala_interpreter import ScalaException, SparkInterpreter
from .scala_interpreter import ScalaException, ScalaInterpreter
from .scala_magic import ScalaMagic
from ._version import get_versions


class SpylonKernel(MetaKernel):
"""Jupyter kernel that supports code evaluation using the Scala REPL
via py4j.

Currently uses a ScalaMagic instance as a bridge to a `ScalaInterpreter`
to let that functionality remain separate for reuse outside the kernel.
"""
implementation = 'spylon-kernel'
implementation_version = get_versions()['version']
Expand Down Expand Up @@ -41,42 +44,96 @@ class SpylonKernel(MetaKernel):
def __init__(self, *args, **kwargs):
self._scalamagic = None
super(SpylonKernel, self).__init__(*args, **kwargs)
# Register the %%scala and %%init_spark magics
# The %%scala one is here only because this classes uses it
# to interact with the ScalaInterpreter instance
self.register_magics(ScalaMagic)
self.register_magics(InitSparkMagic)
self._scalamagic = self.line_magics['scala']

@property
def scala_interpreter(self):
"""Gets the `ScalaInterpreter` instance associated with the ScalaMagic
for direct use.

Returns
-------
ScalaInterpreter
"""
# noinspection PyProtectedMember
intp = self._scalamagic._get_scala_interpreter()
assert isinstance(intp, SparkInterpreter)
assert isinstance(intp, ScalaInterpreter)
return intp

def get_usage(self):
"""Gets usage information about the kernel itself.

Part of the MetaKernel API.
"""
return "This is spylon-kernel. It implements a Scala interpreter."

def set_variable(self, name, value):
"""Sets a variable in the kernel language.

Part of the MetaKernel API.

Parameters
----------
name : str
Variable name
value : any
Variable value to set

Notes
-----
Since metakernel calls this to bind kernel into the remote space we
don't actually want that to happen. Simplest is just to have this
flag as None initially. Furthermore the metakernel will attempt to
set things like _i1, _i, _ii etc. These we dont want in the kernel
for now.
"""
Set a variable in the kernel language.
"""
# Since metakernel calls this to bind kernel into the remote space we
# don't actually want that to happen. Simplest is just to have this
# flag as None initially. Furthermore the metakernel will attempt to
# set things like _i1, _i, _ii etc. These we dont want in the kernel
# for now.
if self._scalamagic and (not name.startswith("_i")):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are your feelings on logging attempts to set _i... variables? I'm generally 👎 on silent behavior changes but also don't have a clue how metakernel stuff works, so 🤷‍♂️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will happen after almost every cell execution as MetaKernel will update the FIFO queue of commands and outputs, so the logs will get pretty chatty. We could log at the debug level.

self.scala_interpreter.bind(name, value)

def get_variable(self, name):
"""
Get a variable from the kernel as a Python-typed value.
"""Get a variable from the kernel as a Python-typed value.

Part of the MetaKernel API.

Parameters
----------
name : str
Scala variable name

Returns
-------
value : any
Scala variable value, tranformed to a Python type
"""
if self._scalamagic:
intp = self.scala_interpreter
intp.interpret(name)
return intp.last_result()

def do_execute_direct(self, code, silent=False):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this extra kwarg?

"""Executes code in the kernel language.

Part of the MetaKernel API.

Parameters
----------
code : str
Scala code to execute
silent : bool
Ignored

Returns
-------
any
Result of the execution to be formatted for inclusion in
a `execute_result` or `error` message from the kernel to
frontends
"""
try:
res = self._scalamagic.eval(code.strip(), raw=False)
if res:
Expand All @@ -85,14 +142,47 @@ def do_execute_direct(self, code, silent=False):
return self.Error(e.scala_message)

def get_completions(self, info):
"""Gets completions from the kernel based on the provided info.

Part of the MetaKernel API.

Parameters
----------
info : dict
Information returned by `metakernel.parser.Parser.parse_code`
including `code`, `help_pos`, `start`, etc.

Returns
-------
list of str
Possible completions for the code
"""
return self._scalamagic.get_completions(info)

def get_kernel_help_on(self, info, level=0, none_on_fail=False):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another spurious kwarg?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed in person, will doc how these are all implementing the expected MetaKernel interface.

"""Gets help text for the `info['help_obj']` identifier.

Part of the MetaKernel API.

Parameters
----------
info : dict
Information returned by `metakernel.parser.Parser.parse_code`
including `help_obj`, etc.
level : int
Level of help to request, 0 for basic, 1 for more, etc.
none_on_fail : bool
Ignored

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't look ignored. it's passed to self._scalamagic.get_help_on()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignored down in _scalamagic.get_help_on. I'll try to figure out from the metakernel source exactly how it's used.


Returns
-------
str
Help text
"""
return self._scalamagic.get_help_on(info, level, none_on_fail)

def do_is_complete(self, code):
"""
Given code as string, returns dictionary with 'status' representing
"""Given code as string, returns a dictionary with 'status' representing
whether code is ready to evaluate. Possible values for status are:

'complete' - ready to evaluate
Expand All @@ -103,20 +193,28 @@ def do_is_complete(self, code):
Optionally, if 'status' is 'incomplete', you may indicate
an indentation string.

Example:
Parameters
----------
code : str
Scala code to check for completion

Returns
-------

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete blank returns?



return {'status' : 'incomplete',
'indent': ' ' * 4}
Example
-------
return {'status' : 'incomplete', 'indent': ' ' * 4}
"""
# Handle magics and the case where the interpreter is not yet
# instantiated. We don't want to create it just to do completion
# since it will take a while to initialize and appear hung to the user.
if code.startswith(self.magic_prefixes['magic']) or not self._scalamagic._is_complete_ready:
# force requirement to end with an empty line
if code.endswith("\n"):
return {'status': 'complete', 'indent': ''}
else:
return {'status': 'incomplete', 'indent': ''}
# The scala interpreter can take a while to be alive, only use the
# fancy method when we don't need to lazily instantiate the
# interpreter.
status = self.scala_interpreter.is_complete(code)
# TODO: We can probably do a better job of detecting a good indent
# level here by making use of a code parser such as pygments
Expand Down