diff --git a/.github/workflows/cluster_tests.yaml b/.github/workflows/cluster_tests.yaml index c2eb218ea..ce2958e07 100644 --- a/.github/workflows/cluster_tests.yaml +++ b/.github/workflows/cluster_tests.yaml @@ -35,6 +35,8 @@ jobs: - name: Run all cluster unit tests tests env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v tests/test_resources/test_cluster.py --level unit - name: Teardown all test clusters diff --git a/.github/workflows/local_den_unit_tests.yaml b/.github/workflows/local_den_unit_tests.yaml index 1c6d77c21..42c8cec8b 100644 --- a/.github/workflows/local_den_unit_tests.yaml +++ b/.github/workflows/local_den_unit_tests.yaml @@ -45,17 +45,6 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup Runhouse Config - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.TEST_USERNAME }} - token: ${{ secrets.TEST_TOKEN }} - - - name: Update Server URL in Runhouse Config - run: | - echo "api_server_url: http://localhost:8000" >> ~/.rh/config.yaml - - # TODO: pull the latest "prod" tag from ECR - name: Start Den container run: | @@ -65,4 +54,6 @@ jobs: env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level unit diff --git a/.github/workflows/local_tests.yaml b/.github/workflows/local_tests.yaml index bf09457e0..1087922ee 100644 --- a/.github/workflows/local_tests.yaml +++ b/.github/workflows/local_tests.yaml @@ -36,17 +36,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local tests/test_servers/ env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local tests/test_servers/ timeout-minutes: 60 @@ -74,17 +69,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest" timeout-minutes: 60 @@ -112,17 +102,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "secrettest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "secrettest" timeout-minutes: 60 @@ -135,17 +120,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "moduletest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "moduletest" timeout-minutes: 60 @@ -158,17 +138,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "functiontest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "functiontest" timeout-minutes: 60 @@ -181,16 +156,11 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "envtest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "envtest" timeout-minutes: 60 diff --git a/.github/workflows/local_tests_den_dev.yaml b/.github/workflows/local_tests_den_dev.yaml index 9cf48eb9d..7a081b5bd 100644 --- a/.github/workflows/local_tests_den_dev.yaml +++ b/.github/workflows/local_tests_den_dev.yaml @@ -34,18 +34,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - - name: pytest -v --level local tests/test_servers/ env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local tests/test_servers/ --api-server-url $API_SERVER_URL timeout-minutes: 60 @@ -73,17 +67,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "not servertest and not secrettest and not moduletest and not functiontest and not envtest" --api-server-url $API_SERVER_URL timeout-minutes: 60 @@ -111,17 +100,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "secrettest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "secrettest" --api-server-url $API_SERVER_URL timeout-minutes: 60 @@ -134,17 +118,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "moduletest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "moduletest" --api-server-url $API_SERVER_URL timeout-minutes: 60 @@ -157,17 +136,12 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "functiontest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "functiontest" --api-server-url $API_SERVER_URL timeout-minutes: 60 @@ -180,16 +154,11 @@ jobs: - name: Setup Runhouse uses: ./.github/workflows/setup_runhouse - - name: Setup ~/.rh/config.yaml - uses: ./.github/workflows/setup_rh_config - with: - username: ${{ secrets.CI_ACCOUNT_USERNAME }} - token: ${{ secrets.CI_ACCOUNT_TOKEN }} - api_server_url: ${{ env.API_SERVER_URL }} - - name: pytest -v --level local -k "envtest" env: TEST_TOKEN: ${{ secrets.TEST_TOKEN }} TEST_USERNAME: ${{ secrets.TEST_USERNAME }} + RH_TOKEN: ${{ secrets.TEST_TOKEN }} + RH_USERNAME: ${{ secrets.TEST_USERNAME }} run: pytest -v --level local -k "envtest" --api-server-url $API_SERVER_URL timeout-minutes: 60 diff --git a/.github/workflows/setup_rh_config/action.yaml b/.github/workflows/setup_rh_config/action.yaml deleted file mode 100644 index a344a711b..000000000 --- a/.github/workflows/setup_rh_config/action.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: Setup an RH config - -description: Reusable short flow for setting up a fake ~/.rh/config.yaml - -inputs: - username: - description: 'The username to log in with' - required: true - - token: - description: 'The token of the logged in username' - required: true - - api_server_url: - description: 'The den api server to send the requests to' - required: true - -runs: - using: composite - steps: - - name: Setup ~/.rh/config.yaml - shell: bash - run: | - mkdir ~/.rh && touch ~/.rh/config.yaml - echo "default_folder: /${{ inputs.username }}" > ~/.rh/config.yaml - echo "disable_data_collection: true" >> ~/.rh/config.yaml - echo "token: ${{ inputs.token }}" >> ~/.rh/config.yaml - echo "username: ${{ inputs.username }}" >> ~/.rh/config.yaml - echo "api_server_url: ${{ inputs.api_server_url }}" >> ~/.rh/config.yaml diff --git a/.github/workflows/unit_tests.yaml b/.github/workflows/unit_tests.yaml index 576e85bb5..49c2cb7bb 100644 --- a/.github/workflows/unit_tests.yaml +++ b/.github/workflows/unit_tests.yaml @@ -28,12 +28,6 @@ jobs: # - name: Setup Runhouse # uses: ./.github/workflows/setup_runhouse # -# - name: Setup ~/.rh/config.yaml -# uses: ./.github/workflows/setup_rh_config -# with: -# username: ${{ secrets.CI_ACCOUNT_USERNAME }} -# token: ${{ secrets.CI_ACCOUNT_TOKEN }} -# # - name: pytest -v --level unit -k "not den_auth" # env: # TEST_TOKEN: ${{ secrets.TEST_TOKEN }} diff --git a/docs/api/python/cluster.rst b/docs/api/python/cluster.rst index a30ac833c..8031182d6 100644 --- a/docs/api/python/cluster.rst +++ b/docs/api/python/cluster.rst @@ -229,11 +229,11 @@ be started on the cluster on port :code:`32300`. Server Authentication --------------------- -If desired, Runhouse provides out-of-the-box authentication via users' Runhouse token (generated when -:ref:`logging in `) and set locally at: :code:`~/.rh/config.yaml`). This is crucial if the cluster -has ports open to the public internet, as would usually be the case when using the ``tls`` connection type. You may -also set up your own authentication manually inside of your own code, but you should likely still enable Runhouse -authentication to ensure that even your non-user-facing endpoints into the server are secured. +If desired, Runhouse provides out-of-the-box authentication via users' Runhouse cluster token (generated when +:ref:`logging in `). This is crucial if the cluster has ports open to the public internet, as would +usually be the case when using the ``tls`` connection type. You may also set up your own authentication manually +inside of your own code, but you should likely still enable Runhouse authentication to ensure that even your +non-user-facing endpoints into the server are secured. When :ref:`initializing a cluster `, you can set the :code:`den_auth` parameter to :code:`True` to enable token authentication. Calls to the cluster server can then be made using an auth header with the diff --git a/docs/tutorials/api-resources.rst b/docs/tutorials/api-resources.rst index 1641c2a3a..527642b35 100644 --- a/docs/tutorials/api-resources.rst +++ b/docs/tutorials/api-resources.rst @@ -26,9 +26,8 @@ Runhouse RNS Setting Config Options ---------------------- -Runhouse stores user configs both locally in ``~/.rh/config.yaml`` and -remotely in the Runhouse database, letting you preserve your same config -across environments. +Runhouse stores user configs locally in ``~/.rh/config.yaml``, letting you preserve convenient defaults to re-use +across clusters. Some configs to consider setting: diff --git a/docs/tutorials/api-secrets.rst b/docs/tutorials/api-secrets.rst index 25477e188..1d4fe4ec7 100644 --- a/docs/tutorials/api-secrets.rst +++ b/docs/tutorials/api-secrets.rst @@ -720,9 +720,7 @@ Login and Logout The login flow gives you the option to upload locally detected builtin provider secrets, or load down saved-down Vault secrets into your local -environment. If loading down new secrets, the location (file or env var) -of the new secrets will be logged in your runhouse config yaml at -``~/.rh/config.yaml`` as well. There are some useful APIs as well for +environment. There are some useful APIs as well for seeing which secrets you have locally configured or stored in Vault. .. code:: ipython3 diff --git a/runhouse/resources/hardware/cluster.py b/runhouse/resources/hardware/cluster.py index add1d5d91..f64e0a26d 100644 --- a/runhouse/resources/hardware/cluster.py +++ b/runhouse/resources/hardware/cluster.py @@ -1,7 +1,6 @@ import contextlib import copy import importlib -import json import logging import re import subprocess @@ -12,6 +11,8 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union +import yaml + from runhouse.resources.envs.utils import run_with_logs from runhouse.rns.utils.api import ResourceAccess, ResourceVisibility @@ -25,7 +26,6 @@ from runhouse.constants import ( CLI_RESTART_CMD, CLI_STOP_CMD, - CLUSTER_CONFIG_PATH, DEFAULT_HTTP_PORT, DEFAULT_HTTPS_PORT, DEFAULT_RAY_PORT, @@ -33,7 +33,7 @@ LOCALHOST, RESERVED_SYSTEM_NAMES, ) -from runhouse.globals import obj_store, rns_client +from runhouse.globals import configs, obj_store, rns_client from runhouse.resources.envs.utils import _get_env_from from runhouse.resources.hardware.utils import _current_cluster, ServerConnectionType from runhouse.resources.resource import Resource @@ -132,25 +132,12 @@ def default_env(self, env): if self.is_up(): self.check_server() self._default_env.to(self) - self.save_config_to_cluster() logger.info( "The cluster default env has been updated. " "Run `cluster.restart_server()` to restart the Runhouse server on the new default env." ) - def save_config_to_cluster(self, node: str = None): - config = self.config(condensed=False) - config.pop("creds") - json_config = f"{json.dumps(config)}" - - self.run( - [ - f"mkdir -p ~/.rh; touch {CLUSTER_CONFIG_PATH}; echo '{json_config}' > {CLUSTER_CONFIG_PATH}" - ], - node=node or "all", - ) - def save(self, name: str = None, overwrite: bool = True, folder: str = None): """Overrides the default resource save() method in order to also update the cluster config on the cluster itself. @@ -354,6 +341,31 @@ def keep_warm(self): ) return self + def _add_cluster_owner(self): + """Write user token (including username and rns address) to the cluster in path: `~/.rh/cluster_owners.yaml.`""" + import shlex + + cluster_token = rns_client.base_cluster_token + username = rns_client.username + + path_to_file = str(Path(configs.CLUSTER_TOKEN_PATH)) + return_codes = self.run([f"grep -q {username} {path_to_file}"]) + if return_codes[0][0] == 0: + # username already added to the file + return + + user_data = { + username: {"rns_address": self.rns_address, "token": cluster_token} + } + + yaml_data = yaml.dump(user_data, default_flow_style=False, allow_unicode=True) + token_cmd = f"mkdir -p ~/.rh && echo {shlex.quote(yaml_data)} >> {path_to_file}" + self.run([token_cmd]) + + logger.debug( + f"Saved data to cluster owners file on server in path: {configs.CLUSTER_TOKEN_PATH}" + ) + def _sync_default_env_to_cluster(self): """Install and set up the default env requirements on the cluster. This does not put the env resource on the cluster or initialize the servlet. It also does not set any env vars.""" @@ -737,6 +749,7 @@ def _start_ray_workers(self, ray_port, env): def restart_server( self, _rh_install_url: str = None, + _set_owner: bool = True, resync_rh: bool = True, restart_ray: bool = True, restart_proxy: bool = False, @@ -799,9 +812,6 @@ def restart_server( ) cluster_cert_path = f"{base_caddy_dir}/{self.cert_config.CERT_NAME}" - # Update the cluster config on the cluster - self.save_config_to_cluster() - cmd = ( CLI_RESTART_CMD + (" --restart-ray" if restart_ray else "") @@ -851,6 +861,11 @@ def restart_server( if env_vars: self.call(default_env.name, "_set_env_vars", env_vars) + if _set_owner and rns_client.token and self.rns_address: + # If a Runhouse token is saved locally and rns address exists for the cluster, we can write down the + # user's hashed cluster token to the "cluster_owners" file on the cluster + self._add_cluster_owner() + return status_codes def stop_server(self, stop_ray: bool = True, env: Union[str, "Env"] = None): diff --git a/runhouse/resources/hardware/on_demand_cluster.py b/runhouse/resources/hardware/on_demand_cluster.py index 8c65d9986..83f44c460 100644 --- a/runhouse/resources/hardware/on_demand_cluster.py +++ b/runhouse/resources/hardware/on_demand_cluster.py @@ -437,7 +437,7 @@ def up(self): ) self._update_from_sky_status() - self.restart_server() + self.restart_server(_set_owner=True) return self diff --git a/runhouse/resources/hardware/sagemaker/sagemaker_cluster.py b/runhouse/resources/hardware/sagemaker/sagemaker_cluster.py index 526307bfb..ecd99e22a 100644 --- a/runhouse/resources/hardware/sagemaker/sagemaker_cluster.py +++ b/runhouse/resources/hardware/sagemaker/sagemaker_cluster.py @@ -401,6 +401,7 @@ def set_connection_defaults(self): def restart_server( self, _rh_install_url: str = None, + _set_owner: bool = False, resync_rh: bool = True, restart_ray: bool = True, env: Union[str, "Env"] = None, @@ -1303,15 +1304,6 @@ def _sync_runhouse_to_cluster(self, node: str = None, _install_url=None, env=Non if not self.client: self.connect_server_client() - # Sync the local ~/.rh directory to the cluster - self._rsync( - source=str(Path("~/.rh").expanduser()), - dest="~/.rh", - up=True, - contents=True, - ) - logger.info("Synced ~/.rh folder to the cluster") - local_rh_package_path = Path(importlib.util.find_spec("runhouse").origin).parent # local_rh_package_path = Path(pkgutil.get_loader("runhouse").path).parent diff --git a/runhouse/rns/defaults.py b/runhouse/rns/defaults.py index a87718065..d8b464acb 100644 --- a/runhouse/rns/defaults.py +++ b/runhouse/rns/defaults.py @@ -1,4 +1,5 @@ import copy +import hashlib import json import logging import os @@ -20,6 +21,8 @@ class Defaults: USER_ENDPOINT = "user/" GROUP_ENDPOINT = "group/" CONFIG_PATH = Path("~/.rh/config.yaml").expanduser() + CLUSTER_TOKEN_PATH = "~/.rh/cluster_owners.yaml" + # TODO [DG] default sub-dicts for various resources (e.g. defaults.get('cluster').get('resource_type')) BASE_DEFAULTS = { "default_folder": "~", @@ -62,6 +65,10 @@ def token(self): def token(self, value): self._token = value + @property + def cluster_token(self): + return self._get_or_create_cluster_token() + @property def username(self): if self._simulate_logged_out: @@ -84,6 +91,9 @@ def username(self, value): @property def default_folder(self): + if os.environ.get("RH_DEFAULT_FOLDER"): + self._default_folder = os.environ.get("RH_DEFAULT_FOLDER") + if self._simulate_logged_out: return self.BASE_DEFAULTS["default_folder"] @@ -130,6 +140,12 @@ def request_headers(self) -> dict: """Base request headers used to make requests to Runhouse Den.""" return {"Authorization": f"Bearer {self.token}"} if self.token else {} + @property + def cluster_request_headers(self) -> dict: + """Base request headers used to make requests to a cluster.""" + cluster_token = self.cluster_token + return {"Authorization": f"Bearer {cluster_token}"} if cluster_token else {} + def upload_defaults( self, defaults: Optional[Dict] = None, @@ -274,3 +290,36 @@ def data_collection_enabled(self) -> bool: return False return True + + def load_cluster_token_from_file(self, username: str) -> Optional[str]: + path_to_file = Path(self.CLUSTER_TOKEN_PATH).expanduser() + if not path_to_file.exists(): + # File should only exist on clusters, not locally + return + + with open(path_to_file, "r") as f: + data = yaml.safe_load(f) + saved_cluster_token = data.get(username, {}).get("token") + return saved_cluster_token + + def _get_or_create_cluster_token( + self, den_token: str = None, resource_address: str = None, username: str = None + ): + if den_token and resource_address and username: + # If specific values are passed in (as opposed to loading from local config), use those to build the token + return self._build_token_hash(den_token, resource_address, username) + + den_token = self.token + username = self.username + + if den_token is None or username is None: + return None + + # Return the user's self-owned cluster token + return self._build_token_hash(den_token, username, username) + + @staticmethod + def _build_token_hash(den_token: str, resource_address: str, username: str): + hash_input = (den_token + resource_address).encode("utf-8") + hash_hex = hashlib.sha256(hash_input).hexdigest() + return f"{hash_hex}+{resource_address}+{username}" diff --git a/runhouse/rns/rns_client.py b/runhouse/rns/rns_client.py index ec5f79d3b..36743c31e 100644 --- a/runhouse/rns/rns_client.py +++ b/runhouse/rns/rns_client.py @@ -1,4 +1,3 @@ -import hashlib import importlib import json import logging @@ -147,6 +146,10 @@ def current_folder(self, value): def token(self): return self._configs.token + @property + def base_cluster_token(self): + return self._configs.cluster_token + @property def username(self): return self._configs.get("username", None) @@ -202,12 +205,10 @@ def request_headers( ) -> Union[dict, None]: """Returns the authentication headers to use for requests made to Den or to a cluster. - If the request is being made to Den, we simply construct the request headers with the user's existing - Runhouse token. - - If the request is being made to (or from) a cluster, we generate a new unique token to prevent exposing the - user's original Runhouse token on the cluster. This new token is based on the user's existing Den token and - the Den address of the resource (or cluster API) they are attempting to access. + We generate a new unique token to prevent exposing the user's original Runhouse token on the cluster. + This new token is based on the user's existing Den token and the Den address of the resource (or cluster API) + they are attempting to access. Where relevant, we also store this token on the cluster in order to authenticate + cluster owners. For example, if userA tries to access a function on a cluster that was shared with them by userB, we generate a new token containing userA's Den token and top level directory associated with the @@ -238,8 +239,9 @@ def request_headers( return None if headers is None: - # Use the default headers (i.e. the user's original Den token) - headers: dict = self._configs.request_headers + # Use the default cluster headers, which includes the hash of the user's den token + # We can use this to authenticate requests to a cluster + Den + headers: dict = self._configs.cluster_request_headers if not headers: # TODO: allow this? means we failed to load token from configs @@ -265,18 +267,22 @@ def request_headers( "Failed to extract token from request auth header. Expected in format: Bearer " ) - hashed_token = self.cluster_token(den_token, resource_address) + hashed_token = self.cluster_token_from_resource_address( + den_token, resource_address + ) return {"Authorization": f"Bearer {hashed_token}"} - def cluster_token(self, den_token: str, resource_address: str): + def cluster_token_from_resource_address( + self, den_token: str, resource_address: str + ): if "/" in resource_address: # If provided as a full rns address, extract the top level directory resource_address = self.base_folder(resource_address) - hash_input = (den_token + resource_address).encode("utf-8") - hash_hex = hashlib.sha256(hash_input).hexdigest() - return f"{hash_hex}+{resource_address}+{self._configs.username}" + return self._configs._get_or_create_cluster_token( + den_token, resource_address, username=self._configs.username + ) def resource_request_payload(self, payload) -> dict: payload = remove_null_values_from_dict(payload) diff --git a/runhouse/servers/caddy/config.py b/runhouse/servers/caddy/config.py index 15c05b459..90e0a45ed 100644 --- a/runhouse/servers/caddy/config.py +++ b/runhouse/servers/caddy/config.py @@ -53,8 +53,8 @@ def __init__( self.domain = domain self.force_reinstall = force_reinstall - # To expose the server to the internet, set address to the public IP, otherwise leave it as localhost - self.address = address or "localhost" + # To expose the server to the internet, set address to the public IP, otherwise set as localhost + self.address = address self.ssl_cert_path = Path(ssl_cert_path).expanduser() if ssl_cert_path else None self.ssl_key_path = Path(ssl_key_path).expanduser() if ssl_key_path else None diff --git a/runhouse/servers/cluster_servlet.py b/runhouse/servers/cluster_servlet.py index 5d751b58f..ef30cc57b 100644 --- a/runhouse/servers/cluster_servlet.py +++ b/runhouse/servers/cluster_servlet.py @@ -118,7 +118,10 @@ async def aresource_access_level( # they have access to everything if configs.token and ( configs.token == token - or rns_client.cluster_token(configs.token, resource_uri) == token + or rns_client.cluster_token_from_resource_address( + configs.token, resource_uri + ) + == token ): return ResourceAccess.WRITE return self._auth_cache.lookup_access_level(token, resource_uri) diff --git a/runhouse/servers/http/auth.py b/runhouse/servers/http/auth.py index 06c7ab5e1..102fb6e09 100644 --- a/runhouse/servers/http/auth.py +++ b/runhouse/servers/http/auth.py @@ -3,7 +3,10 @@ from runhouse.globals import rns_client from runhouse.rns.utils.api import load_resp_content, ResourceAccess -from runhouse.servers.http.http_utils import username_from_token +from runhouse.servers.http.http_utils import ( + get_username_from_cluster_token, + username_from_token, +) logger = logging.getLogger(__name__) @@ -28,7 +31,7 @@ def lookup_access_level( self, token: str, resource_uri: str, refresh_cache=True ) -> Union[str, None]: """Get the access level of a particular resource for a user""" - if token is None: + if token is None or resource_uri is None: return # Also add this user to the username cache @@ -84,15 +87,12 @@ async def averify_cluster_access( from runhouse.globals import configs, obj_store # The logged-in user always has full access to the cluster. This is especially important if they flip on - # Den Auth without saving the cluster. We may need to generate a subtoken here to check. - if configs.token: - if configs.token == token: - return True - if ( - cluster_uri - and rns_client.cluster_token(configs.token, cluster_uri) == token - ): - return True + # Den Auth without saving the cluster. + username = get_username_from_cluster_token(token) + cluster_token = configs.load_cluster_token_from_file(username=username) + if cluster_token: + # If cluster token is saved down in file: "cluster_owners.yaml" + return True cluster_access_level = await obj_store.aresource_access_level(token, cluster_uri) diff --git a/runhouse/servers/http/http_server.py b/runhouse/servers/http/http_server.py index 0317eb268..f43a7bc08 100644 --- a/runhouse/servers/http/http_server.py +++ b/runhouse/servers/http/http_server.py @@ -960,18 +960,24 @@ async def main(): ######################################## # Handling args that could be specified in the # cluster_config.json or via CLI args + + # Throw warnings if there are mismatches between the values (CLI vs cluster_config.json) + # Don't throw warnings if the config file does not exist or has not been set ######################################## # Den URL cluster_config["api_server_url"] = api_server_url # Server port - if parse_args.port is not None and parse_args.port != cluster_config.get( - "server_port" + config_server_port = cluster_config.get("server_port") + if ( + parse_args.port is not None + and config_server_port is not None + and parse_args.port != config_server_port ): logger.warning( f"CLI provided server port: {parse_args.port} is different from the server port specified in " - f"cluster_config.json: {cluster_config.get('server_port')}. Prioritizing CLI provided port." + f"cluster_config.json: {config_server_port}. Prioritizing CLI provided port." ) port_arg = ( @@ -982,12 +988,16 @@ async def main(): cluster_config["server_port"] = port_arg # Den auth enabled - if hasattr( - parse_args, "use_den_auth" - ) and parse_args.use_den_auth != cluster_config.get("den_auth", False): + config_den_auth = cluster_config.get("den_auth") + if ( + hasattr(parse_args, "use_den_auth") + and parse_args.use_den_auth + and config_den_auth is not None + and parse_args.use_den_auth != config_den_auth + ): logger.warning( f"CLI provided den_auth: {parse_args.use_den_auth} is different from the den_auth specified in " - f"cluster_config.json: {cluster_config.get('den_auth')}. Prioritizing CLI provided den_auth." + f"cluster_config.json: {config_den_auth}. Prioritizing CLI provided den_auth." ) den_auth = getattr(parse_args, "use_den_auth", False) or cluster_config.get( @@ -999,15 +1009,17 @@ async def main(): await obj_store.aenable_den_auth() # Telemetry enabled - if hasattr( - parse_args, "use_local_telemetry" - ) and parse_args.use_local_telemetry != cluster_config.get( - "use_local_telemetry", False + config_telemetry = cluster_config.get("use_local_telemetry") + if ( + hasattr(parse_args, "use_local_telemetry") + and parse_args.use_local_telemetry + and config_telemetry is not None + and parse_args.use_local_telemetry != config_telemetry ): logger.warning( f"CLI provided use_local_telemetry: {parse_args.use_local_telemetry} is different from the " f"use_local_telemetry specified in cluster_config.json: " - f"{cluster_config.get('use_local_telemetry')}. Prioritizing CLI provided use_local_telemetry." + f"{config_telemetry}. Prioritizing CLI provided use_local_telemetry." ) use_local_telemetry = getattr( @@ -1055,12 +1067,16 @@ async def main(): ) # Host - if parse_args.host is not None and parse_args.host != cluster_config.get( - "server_host" + config_host = cluster_config.get("server_host") + if ( + parse_args.host is not None + and parse_args.host + and config_host is not None + and parse_args.host != config_host ): logger.warning( f"CLI provided server_host: {parse_args.host} is different from the server_host specified in " - f"cluster_config.json: {cluster_config.get('server_host')}. Prioritizing CLI provided server_host." + f"cluster_config.json: {config_host}. Prioritizing CLI provided server_host." ) host = parse_args.host or cluster_config.get("server_host") or DEFAULT_SERVER_HOST @@ -1080,7 +1096,7 @@ async def main(): if certs_address is not None: cluster_config["ips"] = [certs_address] else: - cluster_config["ips"] = ["0.0.0.0"] + cluster_config["ips"] = [DEFAULT_SERVER_HOST] config_conn = cluster_config.get("server_connection_type") @@ -1167,11 +1183,11 @@ async def main(): # proxy to forward requests from port 80 (HTTP) or 443 (HTTPS) to the app's port. if use_caddy: logger.debug("Using Caddy as a reverse proxy") - if certs_address is None and domain is None: - raise ValueError( - "Must provide the server address or domain to configure Caddy. No address or domain found in the " - "server start command (--certs-address or --domain) or in the cluster config YAML saved on the cluster." + if certs_address is None and domain is None and use_https: + logger.warning( + "No server address or domain provided for configuring Caddy with HTTPS. Caddy will use localhost." ) + certs_address = "localhost" cc = CaddyConfig( address=certs_address, diff --git a/runhouse/servers/http/http_utils.py b/runhouse/servers/http/http_utils.py index 9262454f3..df78f97e7 100644 --- a/runhouse/servers/http/http_utils.py +++ b/runhouse/servers/http/http_utils.py @@ -204,6 +204,13 @@ def get_token_from_request(request): return auth_headers.split("Bearer ")[-1] if auth_headers else None +def get_username_from_cluster_token(token: str): + if "+" not in token: + return None + + return token.split("+")[-1] + + def auth_headers_from_request(request): return request.headers.get("Authorization", "") diff --git a/runhouse/servers/obj_store.py b/runhouse/servers/obj_store.py index 9ab13579f..ebce4b7d0 100644 --- a/runhouse/servers/obj_store.py +++ b/runhouse/servers/obj_store.py @@ -472,7 +472,10 @@ async def ahas_resource_access(self, token: str, resource_uri=None) -> bool: return True if ( resource_uri - and rns_client.cluster_token(configs.token, resource_uri) == token + and rns_client.cluster_token_from_resource_address( + configs.token, resource_uri + ) + == token ): return True diff --git a/tests/README.md b/tests/README.md index 623759dc5..2dad4451f 100644 --- a/tests/README.md +++ b/tests/README.md @@ -21,7 +21,9 @@ do this, so ignore warnings that the imports are unused). Runhouse uses Sky (which uses ssh) to communicate with the clusters it runs on (e.g. AWS EC2). Sky generates a keypair for you locally the first time you use it to communicate with a cluster, and for local container tests -we take this key from `~/.ssh/sky-key` and copy it to the Docker containers such that Sky can communicate with them. You can override the choice of keypair by adding the following line to your `~/.rh/config.yaml`: `default_keypair: `. +we take this key from `~/.ssh/sky-key` and copy it to the Docker containers such that Sky can communicate with them. +You can override the choice of keypair by adding the following line to +your `~/.rh/config.yaml`: `default_keypair: `. To run a single test file with a given level, use a command like this: ```bash diff --git a/tests/conftest.py b/tests/conftest.py index 4b1737481..bd13692c4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -79,7 +79,7 @@ class TestLevels(str, enum.Enum): MAXIMAL = "maximal" -DEFAULT_LEVEL = TestLevels.UNIT +DEFAULT_LEVEL = TestLevels.LOCAL TEST_LEVEL_HIERARCHY = { TestLevels.UNIT: 0, diff --git a/tests/fixtures/docker_cluster_fixtures.py b/tests/fixtures/docker_cluster_fixtures.py index 086112952..cfa1507d8 100644 --- a/tests/fixtures/docker_cluster_fixtures.py +++ b/tests/fixtures/docker_cluster_fixtures.py @@ -10,7 +10,6 @@ import pytest import runhouse as rh -import yaml from runhouse.constants import DEFAULT_HTTP_PORT, DEFAULT_HTTPS_PORT, DEFAULT_SSH_PORT from runhouse.globals import rns_client @@ -276,14 +275,7 @@ def set_up_local_cluster( if rh_cluster.default_env.name == rh.Env.DEFAULT_NAME: rh.env( - reqs=["pytest", "httpx", "pytest_asyncio", "pandas"], - working_dir=None, - setup_cmds=[ - f"mkdir -p ~/.rh; touch ~/.rh/config.yaml; " - f"echo '{yaml.safe_dump(config)}' > ~/.rh/config.yaml" - ] - if logged_in - else False, + reqs=["pytest", "httpx", "pytest_asyncio", "pandas"], working_dir=None ).to(rh_cluster) def cleanup(): diff --git a/tests/test_den/test_rns.py b/tests/test_den/test_rns.py index b8c93b5c5..cd1110074 100644 --- a/tests/test_den/test_rns.py +++ b/tests/test_den/test_rns.py @@ -1,11 +1,16 @@ +import shlex +import subprocess from pathlib import Path import pytest import runhouse as rh + +import yaml from runhouse.globals import rns_client +@pytest.mark.level("unit") def test_find_working_dir(tmp_path): starting_dir = Path(tmp_path, "subdir/subdir/subdir/subdir") d = rns_client.locate_working_dir(cwd=str(starting_dir)) @@ -28,6 +33,30 @@ def test_find_working_dir(tmp_path): assert d in str(Path(tmp_path, "subdir/subdir")) +@pytest.mark.level("unit") +def test_save_and_load_cluster_token(): + cluster_token = "abcde123" + username = "test-username" + + path_to_file = Path(rh.configs.CLUSTER_TOKEN_PATH).expanduser() + + try: + user_data = {username: {"rns_address": username, "token": cluster_token}} + yaml_data = yaml.dump(user_data, default_flow_style=False, allow_unicode=True) + escaped_yaml_data = shlex.quote(yaml_data) + + token_cmd = f"mkdir -p ~/.rh && echo {escaped_yaml_data} >> {path_to_file}" + subprocess.run(token_cmd, shell=True, check=True) + + cluster_token_reloaded = rh.configs.load_cluster_token_from_file(username) + assert cluster_token_reloaded + + finally: + if path_to_file.exists(): + path_to_file.unlink() + + +@pytest.mark.skip() def test_set_folder(tmp_path): rh.set_folder("~/tests") rh.folder(name="bert_ft").save() @@ -39,9 +68,10 @@ def test_set_folder(tmp_path): rh.set_folder("@") +@pytest.mark.skip() def test_rns_path(tmp_path): rh.set_folder("~") - assert rh.folder("tests").rns_address == "~/tests" + assert rh.folder(name="tests").rns_address == "~/tests" rh.set_folder("@") assert ( @@ -78,6 +108,7 @@ def test_ls(): rh.set_folder("@") +@pytest.mark.skip() def test_from_name(ondemand_aws_cluster): f = rh.folder(name="~/tests/bert_ft") assert f.path diff --git a/tests/test_resources/test_resource_sharing.py b/tests/test_resources/test_resource_sharing.py index 960bd4f10..c15f69a84 100644 --- a/tests/test_resources/test_resource_sharing.py +++ b/tests/test_resources/test_resource_sharing.py @@ -71,7 +71,9 @@ def test_calling_shared_resource(self, resource): assert return_codes[0][0] == 0 # Call function with current token via CURL - cluster_token = rns_client.cluster_token(current_token, cluster.rns_address) + cluster_token = rns_client.cluster_token_from_resource_address( + current_token, cluster.rns_address + ) res = self.call_func_with_curl( cluster, resource.name, cluster_token, **{"a": 1, "b": 2} ) @@ -126,7 +128,9 @@ def test_use_resource_apis(self, resource): # Reset back to valid token and confirm we can call function again rh.configs.token = current_token - cluster_token = rns_client.cluster_token(current_token, cluster.rns_address) + cluster_token = rns_client.cluster_token_from_resource_address( + current_token, cluster.rns_address + ) res = self.call_func_with_curl( cluster, resource.name, cluster_token, **{"a": 1, "b": 2} @@ -170,7 +174,9 @@ def test_calling_resource_with_cluster_write_access(self, resource): ) # Confirm user can still call the function with write access to the cluster - cluster_token = rns_client.cluster_token(current_token, cluster.rns_address) + cluster_token = rns_client.cluster_token_from_resource_address( + current_token, cluster.rns_address + ) res = self.call_func_with_curl( cluster, resource.name, @@ -205,7 +211,9 @@ def test_calling_resource_with_no_cluster_access(self, resource): ), f"Failed to remove access to the cluster for user: {current_username}: {resp.text}" # Confirm current user can still call the function (which they still have explicit access to) - cluster_token = rns_client.cluster_token(current_token, cluster.rns_address) + cluster_token = rns_client.cluster_token_from_resource_address( + current_token, cluster.rns_address + ) res = self.call_func_with_curl( cluster, resource.name, cluster_token, **{"a": 1, "b": 2} ) @@ -259,7 +267,9 @@ def test_calling_resource_with_cluster_read_access(self, resource): cluster.enable_den_auth(flush=True) # Confirm user can no longer call the function with read only access to the cluster and no function access - cluster_token = rns_client.cluster_token(current_token, cluster.rns_address) + cluster_token = rns_client.cluster_token_from_resource_address( + current_token, cluster.rns_address + ) res = self.call_func_with_curl( cluster, resource.name, cluster_token, **{"a": 1, "b": 2} ) diff --git a/tests/test_servers/test_http_server.py b/tests/test_servers/test_http_server.py index e146d7f7b..08f5a1698 100644 --- a/tests/test_servers/test_http_server.py +++ b/tests/test_servers/test_http_server.py @@ -43,9 +43,9 @@ class TestHTTPServerDocker: LOCAL = { "cluster": [ "docker_cluster_pk_ssh_den_auth", - "docker_cluster_pk_ssh_no_auth", - "docker_cluster_pk_tls_den_auth", # Represents public app use case - "docker_cluster_pk_http_exposed", # Represents within VPC use case + # "docker_cluster_pk_ssh_no_auth", + # "docker_cluster_pk_tls_den_auth", # Represents public app use case + # "docker_cluster_pk_http_exposed", # Represents within VPC use case ] } diff --git a/tests/utils.py b/tests/utils.py index d2cbb13a4..52ec802c1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,7 +6,6 @@ import pytest import runhouse as rh -import yaml from runhouse.globals import rns_client from runhouse.servers.obj_store import ObjStore, RaySetupOption @@ -93,12 +92,6 @@ def test_env(logged_in=False): return rh.env( reqs=["pytest", "httpx", "pytest_asyncio", "pandas"], working_dir=None, - setup_cmds=[ - f"mkdir -p ~/.rh; touch ~/.rh/config.yaml; " - f"echo '{yaml.safe_dump(rh.configs.defaults_cache)}' > ~/.rh/config.yaml" - ] - if logged_in - else False, name="base_env", )