From 5181475850bc0d3e3cecdf8f8ea213edc35bd3dd Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 14 Sep 2023 11:26:46 -0700 Subject: [PATCH 1/4] Use built-in .pgpass file. The old code generated a .pgpass file on the fly, an approach that is both fiddly and brittle. It's more robust to let the Kubernetes framework provide the file, but the file-generation code will break if a file is already present. --- python/activator/activator.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/activator/activator.py b/python/activator/activator.py index d1ed8e67..bd768ad8 100644 --- a/python/activator/activator.py +++ b/python/activator/activator.py @@ -37,7 +37,6 @@ from .config import PipelinesConfig from .logger import setup_usdf_logger -from .make_pgpass import make_pgpass from .middleware_interface import get_central_butler, make_local_repo, MiddlewareInterface from .raw import ( get_prefix_from_snap, @@ -80,10 +79,6 @@ try: - # Write PostgreSQL credentials. - # This MUST be done before creating a Butler or accessing the APDB. - make_pgpass() - app = Flask(__name__) consumer = kafka.Consumer({ From b3c45016a6211b03b2d12a18c0a9b232236c7763 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 19 Sep 2023 14:31:25 -0700 Subject: [PATCH 2/4] Remove make_pgpass.py. The .pgpass file, and the credentials within it, are now managed directly by Kubernetes. This change also eliminates the need to read many of these credentials as individual environment variables. --- doc/playbook.rst | 5 --- python/activator/make_pgpass.py | 61 --------------------------------- 2 files changed, 66 deletions(-) delete mode 100755 python/activator/make_pgpass.py diff --git a/doc/playbook.rst b/doc/playbook.rst index 9cc99420..859ee4a9 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -234,11 +234,7 @@ It includes the following required environment variables: * CALIB_REPO: URI to repo containing calibrations (and templates) * LSST_DISABLE_BUCKET_VALIDATION: set this so to disable validation of S3 bucket names, allowing Ceph multi-tenant colon-separated names to be used. * IP_APDB: IP address or hostname and port of the APDB (see `Databases`_, below) -* IP_REGISTRY: IP address or hostname and port of the registry database (see `Databases`_) * DB_APDB: PostgreSQL database name for the APDB -* PSQL_APDB_PASS: secret containing the password for ``USER_APDB`` (see below) -* DB_REGISTRY: PostgreSQL database name for the registry database -* PSQL_REGISTRY_PASS: secret containing the password for ``USER_REGISTRY`` (see below) * KAFKA_CLUSTER: hostname and port of the Kafka provider The following environment variables are optional: @@ -246,7 +242,6 @@ The following environment variables are optional: * IMAGE_TIMEOUT: timeout in seconds to wait after expected script completion for raw image arrival, default 20 sec. * LOCAL_REPOS: absolute path (in the container) where local repos are created, default ``/tmp``. * USER_APDB: database user for the APDB, default "postgres" -* USER_REGISTRY: database user for the registry database, default "postgres" * NAMESPACE_APDB: the database namespace for the APDB, defaults to the DB's default namespace * SERVICE_LOG_LEVELS: requested logging levels in the format of `Middleware's --log-level argument `_. Default is to log prompt_prototype at DEBUG, other LSST code at INFO, and third-party code at WARNING. diff --git a/python/activator/make_pgpass.py b/python/activator/make_pgpass.py deleted file mode 100755 index 301c7edd..00000000 --- a/python/activator/make_pgpass.py +++ /dev/null @@ -1,61 +0,0 @@ -# This file is part of prompt_prototype. -# -# Developed for the LSST Data Management System. -# This product includes software developed by the LSST Project -# (https://www.lsst.org). -# See the COPYRIGHT file at the top-level directory of this distribution -# for details of code ownership. -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -__all__ = ["make_pgpass"] - - -import os -import stat - - -PSQL_USER = "postgres" - - -def make_pgpass(): - """Create a .pgpass file that contains the service's database credentials. - - This function is designed to work in the Prompt Processing Service's docker - container, and no other environment. - - Raises - ------ - RuntimeError - Raised if the database passwords cannot be found. - """ - try: - ip_apdb = os.environ["IP_APDB"] - db_apdb = os.environ["DB_APDB"] - user_apdb = os.environ.get("USER_APDB", PSQL_USER) - pass_apdb = os.environ["PSQL_APDB_PASS"] - ip_registry = os.environ["IP_REGISTRY"] - db_registry = os.environ["DB_REGISTRY"] - user_registry = os.environ.get("USER_REGISTRY", PSQL_USER) - pass_registry = os.environ["PSQL_REGISTRY_PASS"] - except KeyError as e: - raise RuntimeError("Addresses and passwords have not been configured") from e - - filename = os.path.join(os.environ["HOME"], ".pgpass") - with open(filename, mode="wt") as file: - file.write(f"{ip_apdb}:{db_apdb}:{user_apdb}:{pass_apdb}\n") - file.write(f"{ip_registry}:{db_registry}:{user_registry}:{pass_registry}\n") - # Only user may access the file - os.chmod(filename, stat.S_IRUSR) From c39ffca7252c0093c81f19a1483e2f80f4946a4e Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Tue, 19 Sep 2023 16:21:56 -0700 Subject: [PATCH 3/4] Merge APDB environment variables into a single URL_APDB. The URL_APDB variable directly identifies the SQL database used as the APDB. The only additional information that needs to be provided is the namespace/schema, which is handled separately by the dax.apdb.ApdbSql interface. --- doc/playbook.rst | 4 +--- python/activator/middleware_interface.py | 7 +------ tests/test_middleware_interface.py | 8 ++------ 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/doc/playbook.rst b/doc/playbook.rst index 859ee4a9..174d5099 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -233,15 +233,13 @@ It includes the following required environment variables: * IMAGE_BUCKET: bucket containing raw images * CALIB_REPO: URI to repo containing calibrations (and templates) * LSST_DISABLE_BUCKET_VALIDATION: set this so to disable validation of S3 bucket names, allowing Ceph multi-tenant colon-separated names to be used. -* IP_APDB: IP address or hostname and port of the APDB (see `Databases`_, below) -* DB_APDB: PostgreSQL database name for the APDB +* URL_APDB: the URL to the APDB, in any form recognized by SQLAlchemy * KAFKA_CLUSTER: hostname and port of the Kafka provider The following environment variables are optional: * IMAGE_TIMEOUT: timeout in seconds to wait after expected script completion for raw image arrival, default 20 sec. * LOCAL_REPOS: absolute path (in the container) where local repos are created, default ``/tmp``. -* USER_APDB: database user for the APDB, default "postgres" * NAMESPACE_APDB: the database namespace for the APDB, defaults to the DB's default namespace * SERVICE_LOG_LEVELS: requested logging levels in the format of `Middleware's --log-level argument `_. Default is to log prompt_prototype at DEBUG, other LSST code at INFO, and third-party code at WARNING. diff --git a/python/activator/middleware_interface.py b/python/activator/middleware_interface.py index a6a36a41..56316c41 100644 --- a/python/activator/middleware_interface.py +++ b/python/activator/middleware_interface.py @@ -179,7 +179,6 @@ class MiddlewareInterface: """ # Class invariants: - # self._apdb_uri is a valid URI that unambiguously identifies the APDB # self.image_host is a valid URI with non-empty path and no query or fragment. # self._download_store is None if and only if self.image_host is a local URI. # self.visit, self.instrument, self.camera, self.skymap, self._deployment @@ -262,11 +261,7 @@ def _get_deployment(self): def _make_apdb_uri(self): """Generate a URI for accessing the APDB. """ - # TODO: merge this code with make_pgpass.py - ip_apdb = os.environ["IP_APDB"] # Also includes port - db_apdb = os.environ["DB_APDB"] - user_apdb = os.environ.get("USER_APDB", "postgres") - return f"postgresql://{user_apdb}@{ip_apdb}/{db_apdb}" + return os.environ["URL_APDB"] def _init_local_butler(self, repo_uri: str, output_collections: list[str], output_run: str): """Prepare the local butler to ingest into and process from. diff --git a/tests/test_middleware_interface.py b/tests/test_middleware_interface.py index 8c1aea77..96112bdd 100644 --- a/tests/test_middleware_interface.py +++ b/tests/test_middleware_interface.py @@ -117,9 +117,7 @@ class MiddlewareInterfaceTest(unittest.TestCase): @classmethod def setUpClass(cls): cls.env_patcher = unittest.mock.patch.dict(os.environ, - {"IP_APDB": "localhost", - "DB_APDB": "postgres", - "USER_APDB": "postgres", + {"URL_APDB": "postgresql://localhost/postgres", "K_REVISION": "prompt-proto-service-042", }) cls.env_patcher.start() @@ -805,9 +803,7 @@ def setUpClass(cls): super().setUpClass() cls.env_patcher = unittest.mock.patch.dict(os.environ, - {"IP_APDB": "localhost", - "DB_APDB": "postgres", - "USER_APDB": "postgres", + {"URL_APDB": "postgresql://localhost/postgres", "K_REVISION": "prompt-proto-service-042", }) cls.env_patcher.start() From ae0e77231b60ca7eed44c9cea40f81fbb49593e0 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Wed, 20 Sep 2023 13:57:05 -0700 Subject: [PATCH 4/4] Rewrite Playbook to refer to Argo CD. The SLACLab repository no longer has deployment scripts for the prompt processing service. --- doc/playbook.rst | 55 +++++++++++++++++++++--------------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/doc/playbook.rst b/doc/playbook.rst index 174d5099..4245a3e5 100644 --- a/doc/playbook.rst +++ b/doc/playbook.rst @@ -214,37 +214,30 @@ The service can be controlled with ``kubectl`` from ``rubin-devl``. You must first `get credentials for the development cluster `_ on the web; ignore the installation instructions and copy the commands from the second box. Credentials must be renewed if you get a "cannot fetch token: 400 Bad Request" error when running ``kubectl``. -Each time the service container is updated, a new revision of the service should be edited and deployed. -(Continuous deployment has not yet been set up.) -To create the service, clone the `slaclab/rubin-usdf-prompt-processing`_ repo and navigate to the ``kubernetes/overlays/dev/prompt-proto-service`` directory. -Edit ``prompt-proto-service.yaml`` to point to the new service container (likely a ticket branch instead of ``latest``), then run ``make apply`` *from the same directory*. -See the readme in that directory for more details. - -.. _slaclab/rubin-usdf-prompt-processing: https://github.com/slaclab/rubin-usdf-prompt-processing/ - -All service configuration is in ``prompt-proto-service.yaml``. -It includes the following required environment variables: - -* RUBIN_INSTRUMENT: the "short" instrument name -* PIPELINES_CONFIG: a machine-readable string describing which pipeline(s) should be run for which visits. - Notation is complex and still in flux; see :file:`../python/activator/config.py` for current documentation and examples. -* PUBSUB_VERIFICATION_TOKEN: choose an arbitrary string matching the Pub/Sub endpoint URL below. - This variable is currently unused and may be removed in the future. -* IMAGE_BUCKET: bucket containing raw images -* CALIB_REPO: URI to repo containing calibrations (and templates) -* LSST_DISABLE_BUCKET_VALIDATION: set this so to disable validation of S3 bucket names, allowing Ceph multi-tenant colon-separated names to be used. -* URL_APDB: the URL to the APDB, in any form recognized by SQLAlchemy -* KAFKA_CLUSTER: hostname and port of the Kafka provider - -The following environment variables are optional: - -* IMAGE_TIMEOUT: timeout in seconds to wait after expected script completion for raw image arrival, default 20 sec. -* LOCAL_REPOS: absolute path (in the container) where local repos are created, default ``/tmp``. -* NAMESPACE_APDB: the database namespace for the APDB, defaults to the DB's default namespace -* SERVICE_LOG_LEVELS: requested logging levels in the format of `Middleware's --log-level argument `_. - Default is to log prompt_prototype at DEBUG, other LSST code at INFO, and third-party code at WARNING. - -Secrets are configured through the makefile and ``kustomization.yaml``. +The service container deployment is managed using `Argo CD and Phalanx `_. +See the `Phalanx`_ docs for information on working with Phalanx in general (including special developer environment setup). + +There are two different ways to deploy a development release of the service: + +* If you will not be making permanent changes to the Phalanx config, go to the Argo UI, select the specific ``prompt-proto-service-`` service, then select the first "svc" node. + Scroll down to the live manifest, click "edit", then update the ``template.spec.containers.image`` key to point to the new service container (likely a ticket branch instead of ``latest``). + The service will immediately redeploy with the new image. + To force an update of the container, edit ``template.metadata.annotations.revision``. + *Do not* click "SYNC" on the main screen, as that will undo all your edits. +* If you will be making permanent changes of any kind, the above procedure would force you to re-enter your changes with each update of the ``phalanx`` branch. + Instead, clone the `lsst-sqre/phalanx`_ repo and navigate to the ``applications/prompt-proto-service-`` directory. + Edit ``values-usdfdev-prompt-processing.yaml`` to point to the new service container (likely a ticket branch instead of ``latest``) and push the branch. + You do not need to create a PR. + Then, in the Argo UI, follow the instructions in `the Phalanx docs `_. + To force a container update without a corresponding ``phalanx`` update, you need to edit ``template.metadata.annotations.revision`` as described above -- `restarting a deployment `_ that's part of a service does not check for a newer container, even with Always pull policy. + +.. _Phalanx: https://phalanx.lsst.io/developers/ +.. _lsst-sqre/phalanx: https://github.com/lsst-sqre/phalanx/ + +The service configuration is in each instrument's ``values.yaml`` (for settings shared between development and production) and ``values-usdfdev-prompt-processing.yaml`` (for development-only settings). +``values.yaml`` and ``README.md`` provide documentation for all settings. +The actual Kubernetes config (and the implementation of new config settings or secrets) is in ``charts/prompt-proto-service/templates/prompt-proto-service.yaml``. +This file fully supports the Go template syntax. A few useful commands for managing the service: