Skip to content

Commit

Permalink
don't create run ids if dependencies for data store are missing i.e. …
Browse files Browse the repository at this point in the history
…fail fast (Netflix#1666)

* add decorator for s3 deps

* add comments

* check before creation of run id

* decorate __init__ correctly

* linting
  • Loading branch information
madhur-ob committed Jan 18, 2024
1 parent 9fb0402 commit 9dc7b17
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 19 deletions.
2 changes: 1 addition & 1 deletion metaflow/plugins/azure/azure_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ class MetaflowAzureResourceError(MetaflowException):


class MetaflowAzurePackageError(MetaflowException):
headline = "Missing required packages azure-identity and azure-storage-blob"
headline = "Missing required packages 'azure-identity' and 'azure-storage-blob'"
3 changes: 1 addition & 2 deletions metaflow/plugins/datastores/azure_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,8 @@ def list_content_single(self, path):
class AzureStorage(DataStoreStorage):
TYPE = "azure"

@check_azure_deps
def __init__(self, root=None):
# cannot decorate __init__... invoke it with dummy decoratee
check_azure_deps(lambda: 0)
super(AzureStorage, self).__init__(root)
self._tmproot = ARTIFACT_LOCALROOT
self._default_scope_token = None
Expand Down
3 changes: 1 addition & 2 deletions metaflow/plugins/datastores/gs_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,8 @@ def load_bytes_single(self, tmpdir, key):
class GSStorage(DataStoreStorage):
TYPE = "gs"

@check_gs_deps
def __init__(self, root=None):
# cannot decorate __init__... invoke it with dummy decoratee
check_gs_deps(lambda: 0)
super(GSStorage, self).__init__(root)
self._tmproot = ARTIFACT_LOCALROOT
self._root_client = None
Expand Down
3 changes: 2 additions & 1 deletion metaflow/plugins/datastores/s3_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from itertools import starmap

from metaflow.plugins.datatools.s3.s3 import S3, S3Client, S3PutObject
from metaflow.plugins.datatools.s3.s3 import S3, S3Client, S3PutObject, check_s3_deps
from metaflow.metaflow_config import DATASTORE_SYSROOT_S3, ARTIFACT_LOCALROOT
from metaflow.datastore.datastore_storage import CloseAfterUse, DataStoreStorage

Expand All @@ -18,6 +18,7 @@
class S3Storage(DataStoreStorage):
TYPE = "s3"

@check_s3_deps
def __init__(self, root=None):
super(S3Storage, self).__init__(root)
self.s3_client = S3Client()
Expand Down
35 changes: 24 additions & 11 deletions metaflow/plugins/datatools/s3/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,25 @@
if TYPE_CHECKING:
from metaflow.client import Run

try:
import boto3
from boto3.s3.transfer import TransferConfig

DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1
boto_found = True
except:
boto_found = False
def _check_and_init_s3_deps():
try:
import boto3
from boto3.s3.transfer import TransferConfig
except (ImportError, ModuleNotFoundError):
raise MetaflowException("You need to install 'boto3' in order to use S3.")


def check_s3_deps(func):
"""The decorated function checks S3 dependencies (as needed for AWS S3 storage backend).
This includes boto3.
"""

def _inner_func(*args, **kwargs):
_check_and_init_s3_deps()
return func(*args, **kwargs)

return _inner_func


TEST_INJECT_RETRYABLE_FAILURES = int(
Expand Down Expand Up @@ -498,6 +508,7 @@ class S3(object):
def get_root_from_config(cls, echo, create_on_absent=True):
return DATATOOLS_S3ROOT

@check_s3_deps
def __init__(
self,
tmproot: str = TEMPDIR,
Expand All @@ -508,9 +519,6 @@ def __init__(
encryption: Optional[str] = S3_SERVER_SIDE_ENCRYPTION,
**kwargs
):
if not boto_found:
raise MetaflowException("You need to install 'boto3' in order to use S3.")

if run:
# 1. use a (current) run ID with optional customizations
if DATATOOLS_S3ROOT is None:
Expand Down Expand Up @@ -875,6 +883,11 @@ def get(
`S3Object`
An S3Object corresponding to the object requested.
"""
from boto3.s3.transfer import TransferConfig

DOWNLOAD_FILE_THRESHOLD = 2 * TransferConfig().multipart_threshold
DOWNLOAD_MAX_CHUNK = 2 * 1024 * 1024 * 1024 - 1

url, r = self._url_and_range(key)
src = urlparse(url)

Expand Down
2 changes: 1 addition & 1 deletion metaflow/plugins/gcp/gs_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@


class MetaflowGSPackageError(MetaflowException):
headline = "Missing required packages google-cloud-storage google-auth"
headline = "Missing required packages 'google-cloud-storage' and 'google-auth'"
2 changes: 1 addition & 1 deletion metaflow/plugins/gcp/gs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _check_and_init_gs_deps():


def check_gs_deps(func):
"""The decorated function checks GS dependencies (as needed for Azure storage backend). This includes
"""The decorated function checks GS dependencies (as needed for Google Cloud storage backend). This includes
various GCP SDK packages, as well as a Python version of >=3.7
"""

Expand Down

0 comments on commit 9dc7b17

Please sign in to comment.