Skip to content

Commit

Permalink
Merge pull request from GHSA-4qhp-652w-c22x
Browse files Browse the repository at this point in the history
* Add auth decorators, add traversal guard

* Fix mocks resolving most test failures;

`test_listeners` still fails not sure how to fix it

* Address review comments

* add tests for (un)authn'd REST and WebSocket handlers

* Restore old import for 1.x compat, remove a log

* handle advertised jupyter-server 1.x version

* Lint (isort any mypy)

* More tests for paths

---------

Co-authored-by: Nicholas Bollweg <nick.bollweg@gmail.com>
  • Loading branch information
krassowski and bollwyvl committed Jan 17, 2024
1 parent 1f1ddca commit 4ad12f2
Show file tree
Hide file tree
Showing 10 changed files with 260 additions and 14 deletions.
61 changes: 56 additions & 5 deletions python_packages/jupyter_lsp/jupyter_lsp/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,32 @@
"""
from typing import Optional, Text

from jupyter_core.utils import ensure_async
from jupyter_server.base.handlers import APIHandler
from jupyter_server.base.zmqhandlers import WebSocketHandler, WebSocketMixin
from jupyter_server.utils import url_path_join as ujoin
from tornado import web
from tornado.websocket import WebSocketHandler

try:
from jupyter_server.auth.decorator import authorized
except ImportError:

def authorized(method): # type: ignore
"""A no-op fallback for `jupyter_server 1.x`"""
return method


try:
from jupyter_server.base.websocket import WebSocketMixin
except ImportError:
from jupyter_server.base.zmqhandlers import WebSocketMixin

from .manager import LanguageServerManager
from .schema import SERVERS_RESPONSE
from .specs.utils import censored_spec

AUTH_RESOURCE = "lsp"


class BaseHandler(APIHandler):
manager = None # type: LanguageServerManager
Expand All @@ -21,10 +39,43 @@ def initialize(self, manager: LanguageServerManager):
class LanguageServerWebSocketHandler( # type: ignore
WebSocketMixin, WebSocketHandler, BaseHandler
):
"""Setup tornado websocket to route to language server sessions"""
"""Setup tornado websocket to route to language server sessions.
The logic of `get` and `pre_get` methods is derived from jupyter-server ws handlers,
and should be kept in sync to follow best practice established by upstream; see:
https://github.com/jupyter-server/jupyter_server/blob/v2.12.5/jupyter_server/services/kernels/websocket.py#L36
"""

auth_resource = AUTH_RESOURCE

language_server: Optional[Text] = None

async def pre_get(self):
"""Handle a pre_get."""
# authenticate first
# authenticate the request before opening the websocket
user = self.current_user
if user is None:
self.log.warning("Couldn't authenticate WebSocket connection")
raise web.HTTPError(403)

if not hasattr(self, "authorizer"):
return

# authorize the user.
is_authorized = await ensure_async(
self.authorizer.is_authorized(self, user, "execute", AUTH_RESOURCE)
)
if not is_authorized:
raise web.HTTPError(403)

async def get(self, *args, **kwargs):
"""Get an event socket."""
await self.pre_get()
res = super().get(*args, **kwargs)
if res is not None:
await res

async def open(self, language_server):
await self.manager.ready()
self.language_server = language_server
Expand All @@ -47,11 +98,11 @@ class LanguageServersHandler(BaseHandler):
Response should conform to schema in schema/servers.schema.json
"""

auth_resource = AUTH_RESOURCE
validator = SERVERS_RESPONSE

def initialize(self, *args, **kwargs):
super().initialize(*args, **kwargs)

@web.authenticated
@authorized
async def get(self):
"""finish with the JSON representations of the sessions"""
await self.manager.ready()
Expand Down
14 changes: 12 additions & 2 deletions python_packages/jupyter_lsp/jupyter_lsp/paths.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import pathlib
import re
from pathlib import Path
from typing import Union
from urllib.parse import unquote, urlparse

RE_PATH_ANCHOR = r"^file://([^/]+|/[A-Z]:)"
Expand All @@ -12,7 +13,7 @@ def normalized_uri(root_dir):
Special care must be taken around windows paths: the canonical form of
windows drives and UNC paths is lower case
"""
root_uri = pathlib.Path(root_dir).expanduser().resolve().as_uri()
root_uri = Path(root_dir).expanduser().resolve().as_uri()
root_uri = re.sub(
RE_PATH_ANCHOR, lambda m: "file://{}".format(m.group(1).lower()), root_uri
)
Expand All @@ -33,3 +34,12 @@ def file_uri_to_path(file_uri):
else:
result = file_uri_path_unquoted # pragma: no cover
return result


def is_relative(root: Union[str, Path], path: Union[str, Path]) -> bool:
"""Return if path is relative to root"""
try:
Path(path).resolve().relative_to(Path(root).resolve())
return True
except ValueError:
return False
10 changes: 9 additions & 1 deletion python_packages/jupyter_lsp/jupyter_lsp/serverextension.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

import traitlets
from tornado import ioloop

from .handlers import add_handlers
from .manager import LanguageServerManager
Expand Down Expand Up @@ -73,4 +74,11 @@ def load_jupyter_server_extension(nbapp):
page_config.update(rootUri=root_uri, virtualDocumentsUri=virtual_documents_uri)

add_handlers(nbapp)
nbapp.io_loop.call_later(0, initialize, nbapp, virtual_documents_uri)

if hasattr(nbapp, "io_loop"):
io_loop = nbapp.io_loop
else:
# handle jupyter_server 1.x
io_loop = ioloop.IOLoop.current()

io_loop.call_later(0, initialize, nbapp, virtual_documents_uri)
5 changes: 4 additions & 1 deletion python_packages/jupyter_lsp/jupyter_lsp/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from jupyter_server.serverapp import ServerApp
from pytest import fixture
from tornado.httpserver import HTTPRequest
from tornado.httputil import HTTPServerRequest
from tornado.queues import Queue
from tornado.web import Application
Expand Down Expand Up @@ -141,9 +142,11 @@ def send_ping(self):

class MockHandler(LanguageServersHandler):
_payload = None
_jupyter_current_user = "foo" # type:ignore[assignment]

def __init__(self):
pass
self.request = HTTPRequest("GET")
self.application = Application()

def finish(self, payload):
self._payload = payload
Expand Down
117 changes: 117 additions & 0 deletions python_packages/jupyter_lsp/jupyter_lsp/tests/test_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Integration tests of authorization running under jupyter-server."""
import json
import os
import socket
import subprocess
import time
import uuid
from typing import Generator, Tuple
from urllib.error import HTTPError, URLError
from urllib.request import urlopen

import pytest

from .conftest import KNOWN_SERVERS

LOCALHOST = "127.0.0.1"
REST_ROUTES = ["/lsp/status"]
WS_ROUTES = [f"/lsp/ws/{ls}" for ls in KNOWN_SERVERS]


@pytest.mark.parametrize("route", REST_ROUTES)
def test_auth_rest(route: str, a_server_url_and_token: Tuple[str, str]) -> None:
"""Verify a REST route only provides access to an authenticated user."""
base_url, token = a_server_url_and_token

verify_response(base_url, route)

url = f"{base_url}{route}"

with urlopen(f"{url}?token={token}") as response:
raw_body = response.read().decode("utf-8")

decode_error = None

try:
json.loads(raw_body)
except json.decoder.JSONDecodeError as err:
decode_error = err
assert not decode_error, f"the response for {url} was not JSON"


@pytest.mark.parametrize("route", WS_ROUTES)
def test_auth_websocket(route: str, a_server_url_and_token: Tuple[str, str]) -> None:
"""Verify a WebSocket does not provide access to an unauthenticated user."""
verify_response(a_server_url_and_token[0], route)


@pytest.fixture(scope="module")
def a_server_url_and_token(
tmp_path_factory: pytest.TempPathFactory,
) -> Generator[Tuple[str, str], None, None]:
"""Start a temporary, isolated jupyter server."""
token = str(uuid.uuid4())
port = get_unused_port()

root_dir = tmp_path_factory.mktemp("root_dir")
home = tmp_path_factory.mktemp("home")
server_conf = home / "etc/jupyter/jupyter_config.json"

server_conf.parent.mkdir(parents=True)
extensions = {"jupyter_lsp": True, "jupyterlab": False, "nbclassic": False}
app = {"jpserver_extensions": extensions, "token": token}
config_data = {"ServerApp": app, "IdentityProvider": {"token": token}}

server_conf.write_text(json.dumps(config_data), encoding="utf-8")
args = ["jupyter-server", f"--port={port}", "--no-browser"]
env = dict(os.environ)
env.update(
HOME=str(home),
USERPROFILE=str(home),
JUPYTER_CONFIG_DIR=str(server_conf.parent),
)
proc = subprocess.Popen(args, cwd=str(root_dir), env=env, stdin=subprocess.PIPE)
url = f"http://{LOCALHOST}:{port}"
retries = 20
while retries:
time.sleep(1)
try:
urlopen(f"{url}/favicon.ico")
break
except URLError:
print(f"[{retries} / 20] ...", flush=True)
retries -= 1
continue
yield url, token
proc.terminate()
proc.communicate(b"y\n")
proc.wait()
assert proc.returncode is not None, "jupyter-server probably still running"


def get_unused_port():
"""Get an unused port by trying to listen to any random port.
Probably could introduce race conditions if inside a tight loop.
"""
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind((LOCALHOST, 0))
sock.listen(1)
port = sock.getsockname()[1]
sock.close()
return port


def verify_response(base_url: str, route: str, expect: int = 403):
"""Verify that a response returns the expected error."""
error = None
body = None
url = f"{base_url}{route}"
try:
with urlopen(url) as res:
body = res.read()
except HTTPError as err:
error = err
assert error, f"no HTTP error for {url}: {body}"
http_code = error.getcode()
assert http_code == expect, f"{url} HTTP code was unexpected: {body}"
41 changes: 40 additions & 1 deletion python_packages/jupyter_lsp/jupyter_lsp/tests/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from ..paths import file_uri_to_path, normalized_uri
from ..paths import file_uri_to_path, is_relative, normalized_uri

WIN = platform.system() == "Windows"
HOME = pathlib.Path("~").expanduser()
Expand All @@ -17,6 +17,45 @@ def test_normalize_posix_path_home(root_dir, expected_root_uri): # pragma: no c
assert normalized_uri(root_dir) == expected_root_uri


@pytest.mark.skipif(WIN, reason="can't test POSIX paths on Windows")
@pytest.mark.parametrize(
"root, path",
[["~", "~/a"], ["~", "~/a/../b/"], ["/", "/"], ["/a", "/a/b"], ["/a", "/a/b/../c"]],
)
def test_is_relative_ok(root, path):
assert is_relative(root, path)


@pytest.mark.skipif(WIN, reason="can't test POSIX paths on Windows")
@pytest.mark.parametrize(
"root, path",
[
["~", "~/.."],
["~", "/"],
["/a", "/"],
["/a/b", "/a"],
["/a/b", "/a/b/.."],
["/a", "/a/../b"],
["/a", "a//"],
],
)
def test_is_relative_not_ok(root, path):
assert not is_relative(root, path)


@pytest.mark.skipif(not WIN, reason="can't test Windows paths on POSIX")
@pytest.mark.parametrize(
"root, path",
[
["c:\\Users\\user1", "c:\\Users\\"],
["c:\\Users\\user1", "d:\\"],
["c:\\Users", "c:\\Users\\.."],
],
)
def test_is_relative_not_ok_win(root, path):
assert not is_relative(root, path)


@pytest.mark.skipif(PY35, reason="can't test non-existent paths on py35")
@pytest.mark.skipif(WIN, reason="can't test POSIX paths on Windows")
@pytest.mark.parametrize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,21 @@ def run_shadow(message):
)


@pytest.mark.asyncio
async def test_shadow_traversal(shadow_path, manager):
file_beyond_shadow_root_uri = (Path(shadow_path) / ".." / "test.py").as_uri()

shadow = setup_shadow_filesystem(Path(shadow_path).as_uri())

def run_shadow(message):
return shadow("client", message, "python-lsp-server", manager)

with pytest.raises(
ShadowFilesystemError, match="is not relative to shadow filesystem root"
):
await run_shadow(did_open(file_beyond_shadow_root_uri, "content"))


@pytest.fixture
def forbidden_shadow_path(tmpdir):
path = Path(tmpdir) / "no_permission_dir"
Expand Down Expand Up @@ -238,7 +253,7 @@ def send_change():
# no message should be emitted during the first two attempts
assert caplog.text == ""

# a wargning should be emitted on third failure
# a warning should be emitted on third failure
with caplog.at_level(logging.WARNING):
assert await send_change() is None
assert "initialization of shadow filesystem failed three times" in caplog.text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tornado.gen import convert_yielded

from .manager import lsp_message_listener
from .paths import file_uri_to_path
from .paths import file_uri_to_path, is_relative
from .types import LanguageServerManagerAPI

# TODO: make configurable
Expand Down Expand Up @@ -171,6 +171,11 @@ async def shadow_virtual_documents(scope, message, language_server, manager):
initialized = True

path = file_uri_to_path(uri)
if not is_relative(shadow_filesystem, path):
raise ShadowFilesystemError(
f"Path {path} is not relative to shadow filesystem root"
)

editable_file = EditableFile(path)

await editable_file.read()
Expand Down
1 change: 0 additions & 1 deletion requirements/utest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@ pytest-cov
pytest-flake8
pytest-runner
python-lsp-server
pluggy<1.0,>=0.12 # Python 3.5 CI Travis, may need update with new pytest releases, see issue 259

0 comments on commit 4ad12f2

Please sign in to comment.