diff --git a/mlflow/data/__init__.py b/mlflow/data/__init__.py index b5472f9dd1e29..368a4cbd5b084 100644 --- a/mlflow/data/__init__.py +++ b/mlflow/data/__init__.py @@ -1,7 +1,8 @@ import sys +from contextlib import suppress from typing import Union -from mlflow.data import dataset_registry, meta_dataset +from mlflow.data import dataset_registry from mlflow.data import sources as mlflow_data_sources from mlflow.data.dataset import Dataset from mlflow.data.dataset_source import DatasetSource @@ -12,6 +13,10 @@ from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE from mlflow.utils.annotations import experimental +with suppress(ImportError): + # Suppressing ImportError to pass mlflow-skinny testing. + from mlflow.data import meta_dataset # noqa: F401 + @experimental def get_source(dataset: Union[DatasetEntity, DatasetInput, Dataset]) -> DatasetSource: @@ -47,7 +52,7 @@ def get_source(dataset: Union[DatasetEntity, DatasetInput, Dataset]) -> DatasetS return dataset_source -__all__ = ["get_source", "meta_dataset"] +__all__ = ["get_source"] def _define_dataset_constructors_in_current_module(): diff --git a/mlflow/data/meta_dataset.py b/mlflow/data/meta_dataset.py index 877bf0658c3fc..cc8f97db042fd 100644 --- a/mlflow/data/meta_dataset.py +++ b/mlflow/data/meta_dataset.py @@ -1,5 +1,5 @@ -import json import hashlib +import json from typing import Any, Dict, Optional from mlflow.data.dataset import Dataset diff --git a/mlflow/data/pandas_dataset.py b/mlflow/data/pandas_dataset.py index 35f1f47521aaa..e56da90e91172 100644 --- a/mlflow/data/pandas_dataset.py +++ b/mlflow/data/pandas_dataset.py @@ -70,7 +70,7 @@ def _compute_digest(self) -> str: """ return compute_pandas_digest(self._df) - def _to_dict(self) -> Dict[str, str]: + def to_dict(self) -> Dict[str, str]: """Create config dictionary for the dataset.""" schema = json.dumps({"mlflow_colspec": self.schema.to_dict()}) if self.schema else None config = super().to_dict() diff --git a/tests/data/test_meta_dataset.py b/tests/data/test_meta_dataset.py index 76ae2e6dc6757..8d7563cc1ed34 100644 --- a/tests/data/test_meta_dataset.py +++ b/tests/data/test_meta_dataset.py @@ -1,12 +1,13 @@ import json + import pytest -from mlflow.data.http_dataset_source import HTTPDatasetSource from mlflow.data.delta_dataset_source import DeltaDatasetSource +from mlflow.data.http_dataset_source import HTTPDatasetSource from mlflow.data.huggingface_dataset_source import HuggingFaceDatasetSource from mlflow.data.meta_dataset import MetaDataset -from mlflow.types.schema import Schema, ColSpec from mlflow.types import DataType +from mlflow.types.schema import ColSpec, Schema @pytest.mark.parametrize( @@ -24,7 +25,7 @@ def test_create_meta_dataset_from_source(dataset_source_class, path): json_str = dataset.to_json() parsed_json = json.loads(json_str) - assert parsed_json["digest"] != None + assert parsed_json["digest"] is not None assert path in parsed_json["source"] assert parsed_json["source_type"] == dataset_source_class._get_source_type() @@ -50,7 +51,7 @@ def test_create_meta_dataset_from_source_with_schema(dataset_source_class, path) json_str = dataset.to_json() parsed_json = json.loads(json_str) - assert parsed_json["digest"] != None + assert parsed_json["digest"] is not None assert path in parsed_json["source"] assert parsed_json["source_type"] == dataset_source_class._get_source_type() assert json.loads(parsed_json["schema"])["mlflow_colspec"] == schema.to_dict()