Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove region_name and secrets_store parameters #79

Merged
merged 3 commits into from
Mar 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions python/hsml/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ def init(
host=None,
port=None,
project=None,
region_name=None,
secrets_store=None,
hostname_verification=None,
trust_store_path=None,
api_key_file=None,
Expand All @@ -47,8 +45,6 @@ def init(
host,
port,
project,
region_name,
secrets_store,
hostname_verification,
trust_store_path,
api_key_file,
Expand Down
6 changes: 1 addition & 5 deletions python/hsml/client/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,5 @@ class ModelServingException(Exception):
class ExternalClientError(TypeError):
"""Raised when external client cannot be initialized due to missing arguments."""

def __init__(self, missing_argument):
message = (
"{0} cannot be of type NoneType, {0} is a non-optional "
"argument to connect to hopsworks from an external environment."
).format(missing_argument)
def __init__(self, message):
super().__init__(message)
175 changes: 34 additions & 141 deletions python/hsml/client/hopsworks/external.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,159 +15,78 @@
#

import os
import boto3
import base64
import json
import requests

from hsml.client import auth, exceptions
from hsml.client.hopsworks import base as hopsworks


class Client(hopsworks.Client):
DEFAULT_REGION = "default"
SECRETS_MANAGER = "secretsmanager"
PARAMETER_STORE = "parameterstore"
LOCAL_STORE = "local"

def __init__(
self,
host,
port,
project,
region_name,
secrets_store,
hostname_verification,
trust_store_path,
api_key_file,
api_key_value,
):
"""Initializes a client in an external environment such as AWS Sagemaker."""
if not host:
raise exceptions.ExternalClientError("host")
raise exceptions.ExternalClientError(
"host cannot be of type NoneType, host is a non-optional "
"argument to connect to hopsworks from an external environment."
)
if not project:
raise exceptions.ExternalClientError("project")
raise exceptions.ExternalClientError(
"project cannot be of type NoneType, project is a non-optional "
"argument to connect to hopsworks from an external environment."
)

self._host = host
self._port = port
self._base_url = "https://" + self._host + ":" + str(self._port)
self._project_name = project
self._region_name = region_name or self.DEFAULT_REGION

if api_key_value is not None:
api_key = api_key_value
elif api_key_file is not None:
file = None
if os.path.exists(api_key_file):
try:
file = open(api_key_file, mode="r")
api_key = file.read()
finally:
file.close()
else:
raise IOError(
"Could not find api key file on path: {}".format(api_key_file)
)
else:
api_key = self._get_secret(secrets_store, "api-key", api_key_file)
raise exceptions.ExternalClientError(
"Either api_key_file or api_key_value must be set when connecting to"
" hopsworks from an external environment."
)

self._auth = auth.ApiKeyAuth(api_key)

self._session = requests.session()
self._connected = True
self._verify = self._get_verify(self._host, trust_store_path)

project_info = self._get_project_info(self._project_name)
self._project_id = str(project_info["projectId"])
if self._project_name is not None:
project_info = self._get_project_info(self._project_name)
self._project_id = str(project_info["projectId"])
else:
self._project_id = None

self._cert_key = None
self._cert_folder_base = None

def _close(self):
"""Closes a client and deletes certificates."""
if self._cert_folder_base is None:
# On external Spark clients (Databricks, Spark Cluster),
# certificates need to be provided before the Spark application starts.
return

# Clean up only on AWS
self._cleanup_file(self._get_jks_key_store_path())
self._cleanup_file(self._get_jks_trust_store_path())
self._cleanup_file(os.path.join(self._cert_folder, "material_passwd"))

try:
# delete project level
os.rmdir(self._cert_folder)
# delete host level
os.rmdir(os.path.dirname(self._cert_folder))
# on AWS base dir will be empty, and can be deleted otherwise raises OSError
os.rmdir(self._cert_folder_base)
except OSError:
pass
"""Closes a client."""
self._connected = False

def _get_jks_trust_store_path(self):
return self._trust_store_path

def _get_jks_key_store_path(self):
return self._key_store_path

def _get_secret(self, secrets_store, secret_key=None, api_key_file=None):
"""Returns secret value from the AWS Secrets Manager or Parameter Store.

:param secrets_store: the underlying secrets storage to be used, e.g. `secretsmanager` or `parameterstore`
:type secrets_store: str
:param secret_key: key for the secret value, e.g. `api-key`, `cert-key`, `trust-store`, `key-store`, defaults to None
:type secret_key: str, optional
:param api_key_file: path to a file containing an api key, defaults to None
:type api_key_file: str optional
:raises ExternalClientError: `api_key_file` needs to be set for local mode
:raises UnkownSecretStorageError: Provided secrets storage not supported
:return: secret
:rtype: str
"""
if secrets_store == self.SECRETS_MANAGER:
return self._query_secrets_manager(secret_key)
elif secrets_store == self.PARAMETER_STORE:
return self._query_parameter_store(secret_key)
elif secrets_store == self.LOCAL_STORE:
if not api_key_file:
raise exceptions.ExternalClientError(
"api_key_file needs to be set for local mode"
)
with open(api_key_file) as f:
return f.readline().strip()
else:
raise exceptions.UnknownSecretStorageError(
"Secrets storage " + secrets_store + " is not supported."
)

def _query_secrets_manager(self, secret_key):
secret_name = "hopsworks/role/" + self._assumed_role()
args = {"service_name": "secretsmanager"}
region_name = self._get_region()
if region_name:
args["region_name"] = region_name
client = boto3.client(**args)
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
return json.loads(get_secret_value_response["SecretString"])[secret_key]

def _assumed_role(self):
client = boto3.client("sts")
response = client.get_caller_identity()
# arns for assumed roles in SageMaker follow the following schema
# arn:aws:sts::123456789012:assumed-role/my-role-name/my-role-session-name
local_identifier = response["Arn"].split(":")[-1].split("/")
if len(local_identifier) != 3 or local_identifier[0] != "assumed-role":
raise Exception(
"Failed to extract assumed role from arn: " + response["Arn"]
)
return local_identifier[1]

def _get_region(self):
if self._region_name != self.DEFAULT_REGION:
return self._region_name
else:
return None

def _query_parameter_store(self, secret_key):
args = {"service_name": "ssm"}
region_name = self._get_region()
if region_name:
args["region_name"] = region_name
client = boto3.client(**args)
name = "/hopsworks/role/" + self._assumed_role() + "/type/" + secret_key
return client.get_parameter(Name=name, WithDecryption=True)["Parameter"][
"Value"
]

def _get_project_info(self, project_name):
"""Makes a REST call to hopsworks to get all metadata of a project for the provided project.

Expand All @@ -178,32 +97,6 @@ def _get_project_info(self, project_name):
"""
return self._send_request("GET", ["project", "getProjectInfo", project_name])

def _get_credentials(self, project_id):
"""Makes a REST call to hopsworks for getting the project user certificates needed to connect to services such as Hive

:param project_id: id of the project
:type project_id: int
:return: JSON response with credentials
:rtype: dict
"""
return self._send_request("GET", ["project", project_id, "credentials"])

def _write_b64_cert_to_bytes(self, b64_string, path):
"""Converts b64 encoded certificate to bytes file .

:param b64_string: b64 encoded string of certificate
:type b64_string: str
:param path: path where file is saved, including file name. e.g. /path/key-store.jks
:type path: str
"""

with open(path, "wb") as f:
cert_b64 = base64.b64decode(b64_string)
f.write(cert_b64)

def _cleanup_file(self, file_path):
"""Removes local files with `file_path`."""
try:
os.remove(file_path)
except OSError:
pass
@property
def host(self):
return self._host
53 changes: 8 additions & 45 deletions python/hsml/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@
from hsml import client
from hsml.core import model_api, model_registry_api, model_serving_api

AWS_DEFAULT_REGION = "default"
HOPSWORKS_PORT_DEFAULT = 443
SECRETS_STORE_DEFAULT = "parameterstore"
HOSTNAME_VERIFICATION_DEFAULT = True


Expand All @@ -45,20 +43,19 @@ class Connection:
```

!!! hint "Save API Key as File"
To get started quickly, without saving the Hopsworks API in a secret storage,
you can simply create a file with the previously created Hopsworks API Key and
place it on the environment from which you wish to connect to the Hopsworks
Model Registry.
To get started quickly, you can simply create a file with the previously
created Hopsworks API Key and place it on the environment from which you
wish to connect to Hopsworks.

You can then connect by simply passing the path to the key file when
instantiating a connection:

```python hl_lines="6"
import hsml
conn = hsml.connection(
'my_instance', # DNS of your Model Registry instance
'my_instance', # DNS of your Hopsworks instance
443, # Port to reach your Hopsworks instance, defaults to 443
'my_project', # Name of your Hopsworks Model Registry project
'my_project', # Name of your Hopsworks project
api_key_file='modelregistry.key', # The file containing the API key generated above
hostname_verification=True) # Disable for self-signed certificates
)
Expand All @@ -79,19 +76,13 @@ class Connection:
project: The name of the project to connect to. When running on Hopsworks, this
defaults to the project from where the client is run from.
Defaults to `None`.
region_name: The name of the AWS region in which the required secrets are
stored, defaults to `"default"`.
secrets_store: The secrets storage to be used, either `"secretsmanager"`,
`"parameterstore"` or `"local"`, defaults to `"parameterstore"`.
hostname_verification: Whether or not to verify Hopsworks certificate, defaults
to `True`.
trust_store_path: Path on the file system containing the Hopsworks certificates,
defaults to `None`.
api_key_file: Path to a file containing the API Key, if provided,
`secrets_store` will be ignored, defaults to `None`.
api_key_value: API Key as string, if provided, `secrets_store` will be ignored`,
however, this should be used with care, especially if the used notebook or
job script is accessible by multiple parties. Defaults to `None`.
api_key_file: Path to a file containing the API Key.
api_key_value: API Key as string, if provided, however, this should be used with care,
especially if the used notebook or job script is accessible by multiple parties. Defaults to `None`.

# Returns
`Connection`. Connection handle to perform operations on a Hopsworks project.
Expand All @@ -102,8 +93,6 @@ def __init__(
host: str = None,
port: int = HOPSWORKS_PORT_DEFAULT,
project: str = None,
region_name: str = AWS_DEFAULT_REGION,
secrets_store: str = SECRETS_STORE_DEFAULT,
hostname_verification: bool = HOSTNAME_VERIFICATION_DEFAULT,
trust_store_path: str = None,
api_key_file: str = None,
Expand All @@ -112,8 +101,6 @@ def __init__(
self._host = host
self._port = port
self._project = project
self._region_name = region_name
self._secrets_store = secrets_store
self._hostname_verification = hostname_verification
self._trust_store_path = trust_store_path
self._api_key_file = api_key_file
Expand Down Expand Up @@ -175,8 +162,6 @@ def connect(self):
self._host,
self._port,
self._project,
self._region_name,
self._secrets_store,
self._hostname_verification,
self._trust_store_path,
self._api_key_file,
Expand Down Expand Up @@ -210,8 +195,6 @@ def connection(
host: str = None,
port: int = HOPSWORKS_PORT_DEFAULT,
project: str = None,
region_name: str = AWS_DEFAULT_REGION,
secrets_store: str = SECRETS_STORE_DEFAULT,
hostname_verification: bool = HOSTNAME_VERIFICATION_DEFAULT,
trust_store_path: str = None,
api_key_file: str = None,
Expand All @@ -222,8 +205,6 @@ def connection(
host,
port,
project,
region_name,
secrets_store,
hostname_verification,
trust_store_path,
api_key_file,
Expand Down Expand Up @@ -257,24 +238,6 @@ def project(self):
def project(self, project):
self._project = project

@property
def region_name(self):
return self._region_name

@region_name.setter
@not_connected
def region_name(self, region_name):
self._region_name = region_name

@property
def secrets_store(self):
return self._secrets_store

@secrets_store.setter
@not_connected
def secrets_store(self, secrets_store):
self._secrets_store = secrets_store

@property
def hostname_verification(self):
return self._hostname_verification
Expand Down