Skip to content

Commit

Permalink
Merge pull request #943 from lsst/tickets/DM-42567
Browse files Browse the repository at this point in the history
DM-42567: Fix missing auth headers in RemoteButler
  • Loading branch information
dhirving committed Jan 19, 2024
2 parents acb4c37 + c62735a commit e027dc4
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 49 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,4 @@ USER appuser
EXPOSE 8080

# Run the application.
CMD ["uvicorn", "lsst.daf.butler.remote_butler.server:app", "--host", "0.0.0.0", "--port", "8080"]
CMD ["uvicorn", "lsst.daf.butler.remote_butler.server:create_app", "--host", "0.0.0.0", "--port", "8080"]
10 changes: 5 additions & 5 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def dimensions(self) -> DimensionUniverse:
if cache.dimensions is not None:
return cache.dimensions

response = self._client.get(self._get_url("universe"))
response = self._get("universe")
response.raise_for_status()

config = DimensionConfig.fromString(response.text, format="json")
Expand Down Expand Up @@ -334,7 +334,7 @@ def get_dataset_type(self, name: str) -> DatasetType:
# In future implementation this should directly access the cache
# and only go to the server if the dataset type is not known.
path = f"dataset_type/{name}"
response = self._client.get(self._get_url(path))
response = self._get(path)
if response.status_code != httpx.codes.OK:
content = response.json()
if content["exception"] == "MissingDatasetTypeError":
Expand All @@ -357,7 +357,7 @@ def get_dataset(
}
if datastore_records:
raise ValueError("Datastore records can not yet be returned in client/server butler.")
response = self._client.get(self._get_url(path), params=params)
response = self._get(path, params=params)
response.raise_for_status()
if response.json() is None:
return None
Expand Down Expand Up @@ -608,10 +608,10 @@ def _post(self, path: str, model: BaseModel) -> httpx.Response:
url, content=json, headers=self._headers | {"content-type": "application/json"}
)

def _get(self, path: str) -> httpx.Response:
def _get(self, path: str, params: Mapping[str, str | bool] | None = None) -> httpx.Response:
"""Send a GET request to the Butler server."""
url = self._get_url(path)
return self._client.get(url, headers=self._headers)
return self._client.get(url, headers=self._headers, params=params)

def _parse_model(self, response: httpx.Response, model: Type[_AnyPydanticModel]) -> _AnyPydanticModel:
"""Deserialize a Pydantic model from the body of an HTTP response."""
Expand Down
64 changes: 25 additions & 39 deletions python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,58 +27,44 @@

from __future__ import annotations

__all__ = ("app",)
__all__ = ("create_app",)

import logging

from fastapi import FastAPI, Request
from fastapi.middleware.gzip import GZipMiddleware
from fastapi.responses import JSONResponse
from lsst.daf.butler import MissingDatasetTypeError
from safir.metadata import Metadata, get_metadata

from .handlers._external import external_router

_DEFAULT_API_PATH = "/api/butler"
from .handlers._internal import internal_router

log = logging.getLogger(__name__)

app = FastAPI()
app.add_middleware(GZipMiddleware, minimum_size=1000)

# A single instance of the server can serve data from multiple Butler
# repositories. This 'repository' path placeholder is consumed by
# factory_dependency().
repository_placeholder = "{repository}"
app.include_router(external_router, prefix=f"{_DEFAULT_API_PATH}/repo/{repository_placeholder}")


@app.exception_handler(MissingDatasetTypeError)
def missing_dataset_type_exception_handler(request: Request, exc: MissingDatasetTypeError) -> JSONResponse:
# Remove the double quotes around the string form. These confuse
# the JSON serialization when single quotes are in the message.
message = str(exc).strip('"')
return JSONResponse(
status_code=404,
content={"detail": message, "exception": "MissingDatasetTypeError"},
)
def create_app() -> FastAPI:
"""Create a Butler server FastAPI application."""
app = FastAPI()
app.add_middleware(GZipMiddleware, minimum_size=1000)

# A single instance of the server can serve data from multiple Butler
# repositories. This 'repository' path placeholder is consumed by
# factory_dependency().
repository_placeholder = "{repository}"
default_api_path = "/api/butler"
app.include_router(external_router, prefix=f"{default_api_path}/repo/{repository_placeholder}")
app.include_router(internal_router)

@app.get(
"/",
description=(
"Return metadata about the running application. Can also be used as"
" a health check. This route is not exposed outside the cluster and"
" therefore cannot be used by external clients."
),
include_in_schema=False,
response_model=Metadata,
response_model_exclude_none=True,
summary="Application metadata",
)
async def get_index() -> Metadata:
"""GET ``/`` (the app's internal root).
@app.exception_handler(MissingDatasetTypeError)
def missing_dataset_type_exception_handler(
request: Request, exc: MissingDatasetTypeError
) -> JSONResponse:
# Remove the double quotes around the string form. These confuse
# the JSON serialization when single quotes are in the message.
message = str(exc).strip('"')
return JSONResponse(
status_code=404,
content={"detail": message, "exception": "MissingDatasetTypeError"},
)

By convention, this endpoint returns only the application's metadata.
"""
return get_metadata(package_name="lsst.daf.butler", application_name="butler")
return app
51 changes: 51 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/handlers/_internal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from fastapi import APIRouter
from safir.metadata import Metadata, get_metadata

internal_router = APIRouter()


@internal_router.get(
"/",
description=(
"Return metadata about the running application. Can also be used as"
" a health check. This route is not exposed outside the cluster and"
" therefore cannot be used by external clients."
),
include_in_schema=False,
response_model=Metadata,
response_model_exclude_none=True,
summary="Application metadata",
)
async def get_index() -> Metadata:
"""GET ``/`` (the app's internal root).
By convention, this endpoint returns only the application's metadata.
"""
return get_metadata(package_name="lsst.daf.butler", application_name="butler")
74 changes: 74 additions & 0 deletions python/lsst/daf/butler/tests/server_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.


import re
from typing import Any

from fastapi import FastAPI, HTTPException, Request, Response


def add_auth_header_check_middleware(app: FastAPI) -> None:
"""Add a middleware to a FastAPI app to check that Gafaelfawr
authentication headers are present.
This is only suitable for testing -- in a real deployment,
GafaelfawrIngress will handle these headers and convert them to a different
format.
Parameters
----------
app : `FastAPI`
The app the middleware will be added to.
"""

@app.middleware("http")
async def check_auth_headers(request: Request, call_next: Any) -> Response:
if _is_authenticated_endpoint(request.url.path):
header = request.headers.get("authorization")
if header is None:
raise HTTPException(status_code=401, detail="Authorization header is missing")
if not re.match(r"^Bearer \S+", header):
raise HTTPException(
status_code=401, detail=f"Authorization header not in expected format: {header}"
)

return await call_next(request)


def _is_authenticated_endpoint(path: str) -> bool:
"""Return True if the specified path requires authentication in the real
server deployment.
"""
if path == "/":
return False
if path.endswith("/butler.yaml"):
return False
if path.endswith("/butler.json"):
return False

return True
20 changes: 16 additions & 4 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,18 @@

try:
# Failing to import any of these should disable the tests.
from fastapi import HTTPException
from fastapi.testclient import TestClient
from lsst.daf.butler.remote_butler import RemoteButler, RemoteButlerFactory
from lsst.daf.butler.remote_butler._authentication import _EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY
from lsst.daf.butler.remote_butler.server import app
from lsst.daf.butler.remote_butler.server import create_app
from lsst.daf.butler.remote_butler.server._dependencies import butler_factory_dependency
from lsst.daf.butler.tests.server_utils import add_auth_header_check_middleware
from lsst.resources.s3utils import clean_test_environment_for_s3, getS3Client
from moto import mock_s3
except ImportError:
TestClient = None
app = None
create_app = None

from unittest.mock import patch

Expand Down Expand Up @@ -86,7 +88,7 @@ def _make_remote_butler(http_client, *, collections: str | None = None):
return factory.create_butler_for_access_token("fake-access-token", butler_options=options)


@unittest.skipIf(TestClient is None or app is None, "FastAPI not installed.")
@unittest.skipIf(TestClient is None or create_app is None, "FastAPI not installed.")
class ButlerClientServerTestCase(unittest.TestCase):
"""Test for Butler client/server."""

Expand Down Expand Up @@ -119,7 +121,9 @@ def setUpClass(cls):
# Override the server's Butler initialization to point at our test repo
server_butler_factory = LabeledButlerFactory({TEST_REPOSITORY_NAME: cls.root})

app = create_app()
app.dependency_overrides[butler_factory_dependency] = lambda: server_butler_factory
add_auth_header_check_middleware(app)

# Set up the RemoteButler that will connect to the server
cls.client = _make_test_client(app)
Expand All @@ -145,7 +149,6 @@ def setUpClass(cls):

@classmethod
def tearDownClass(cls):
del app.dependency_overrides[butler_factory_dependency]
removeTestTempDir(cls.root)

def test_health_check(self):
Expand Down Expand Up @@ -384,6 +387,15 @@ def check_uris(uris: DatasetRefURIs):
componentUris = self.butler.getURIs(componentRef)
check_uris(componentUris)

def test_auth_check(self):
# This is checking that the unit-test middleware for validating the
# authentication headers is working. It doesn't test actual server
# functionality -- in a real deployment, the authentication headers are
# handled by GafaelfawrIngress, not our app.
with self.assertRaises(HTTPException) as cm:
self.client.get("/v1/dataset_type/int")
self.assertEqual(cm.exception.status_code, 401)


def _create_corrupted_dataset(repo: MetricTestRepo) -> DatasetRef:
run = "corrupted-run"
Expand Down

0 comments on commit e027dc4

Please sign in to comment.