Skip to content

Commit

Permalink
Stabilize dataset logging APIs (#11229)
Browse files Browse the repository at this point in the history
Signed-off-by: dbczumar <corey.zumar@databricks.com>
Signed-off-by: mlflow-automation <mlflow-automation@users.noreply.github.com>
Co-authored-by: mlflow-automation <mlflow-automation@users.noreply.github.com>
  • Loading branch information
dbczumar and mlflow-automation committed Feb 27, 2024
1 parent e02987f commit 8faf9c3
Show file tree
Hide file tree
Showing 23 changed files with 1 addition and 89 deletions.
2 changes: 0 additions & 2 deletions mlflow/data/__init__.py
Expand Up @@ -11,14 +11,12 @@
from mlflow.entities import DatasetInput
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils.annotations import experimental

with suppress(ImportError):
# Suppressing ImportError to pass mlflow-skinny testing.
from mlflow.data import meta_dataset # noqa: F401


@experimental
def get_source(dataset: Union[DatasetEntity, DatasetInput, Dataset]) -> DatasetSource:
"""Obtains the source of the specified dataset or dataset input.
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/artifact_dataset_sources.py
Expand Up @@ -8,7 +8,6 @@
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.store.artifact.artifact_repository_registry import get_registered_artifact_repositories
from mlflow.utils.annotations import experimental
from mlflow.utils.uri import is_local_uri


Expand Down Expand Up @@ -83,7 +82,6 @@ def _create_dataset_source_for_artifact_repo(scheme: str, dataset_source_name: s

DatasetForArtifactRepoSourceType = TypeVar(dataset_source_name)

@experimental
class ArtifactRepoSource(FileSystemDatasetSource):
def __init__(self, uri: str):
self._uri = uri
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/code_dataset_source.py
@@ -1,10 +1,8 @@
from typing import Any, Dict

from mlflow.data.dataset_source import DatasetSource
from mlflow.utils.annotations import experimental


@experimental
class CodeDatasetSource(DatasetSource):
def __init__(
self,
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/dataset.py
Expand Up @@ -4,10 +4,8 @@

from mlflow.data.dataset_source import DatasetSource
from mlflow.entities import Dataset as DatasetEntity
from mlflow.utils.annotations import experimental


@experimental
class Dataset:
"""
Represents a dataset for use with MLflow Tracking, including the name, digest (hash),
Expand Down
3 changes: 0 additions & 3 deletions mlflow/data/dataset_source.py
Expand Up @@ -2,10 +2,7 @@
from abc import abstractmethod
from typing import Any, Dict

from mlflow.utils.annotations import experimental


@experimental
class DatasetSource:
"""
Represents the source of a dataset used in MLflow Tracking, providing information such as
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/delta_dataset_source.py
Expand Up @@ -11,7 +11,6 @@
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils._spark_utils import _get_active_spark_session
from mlflow.utils._unity_catalog_utils import get_full_name_from_sc
from mlflow.utils.annotations import experimental
from mlflow.utils.databricks_utils import get_databricks_host_creds, is_in_databricks_runtime
from mlflow.utils.proto_json_utils import message_to_json
from mlflow.utils.rest_utils import (
Expand All @@ -30,7 +29,6 @@
_logger = logging.getLogger(__name__)


@experimental
class DeltaDatasetSource(DatasetSource):
"""
Represents the source of a dataset stored at in a delta table.
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/filesystem_dataset_source.py
Expand Up @@ -2,10 +2,8 @@
from typing import Any, Dict

from mlflow.data.dataset_source import DatasetSource
from mlflow.utils.annotations import experimental


@experimental
class FileSystemDatasetSource(DatasetSource):
"""
Represents the source of a dataset stored on a filesystem, e.g. a local UNIX filesystem,
Expand Down
3 changes: 0 additions & 3 deletions mlflow/data/huggingface_dataset.py
Expand Up @@ -12,7 +12,6 @@
from mlflow.protos.databricks_pb2 import INTERNAL_ERROR, INVALID_PARAMETER_VALUE
from mlflow.types import Schema
from mlflow.types.utils import _infer_schema
from mlflow.utils.annotations import experimental

_logger = logging.getLogger(__name__)

Expand All @@ -22,7 +21,6 @@
import datasets


@experimental
class HuggingFaceDataset(Dataset, PyFuncConvertibleDatasetMixin):
"""
Represents a HuggingFace dataset for use with MLflow Tracking.
Expand Down Expand Up @@ -174,7 +172,6 @@ def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationData
)


@experimental
def from_huggingface(
ds,
path: Optional[str] = None,
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/huggingface_dataset_source.py
@@ -1,13 +1,11 @@
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Sequence, Union

from mlflow.data.dataset_source import DatasetSource
from mlflow.utils.annotations import experimental

if TYPE_CHECKING:
import datasets


@experimental
class HuggingFaceDatasetSource(DatasetSource):
"""Represents the source of a Hugging Face dataset used in MLflow Tracking."""

Expand Down
3 changes: 0 additions & 3 deletions mlflow/data/numpy_dataset.py
Expand Up @@ -12,12 +12,10 @@
from mlflow.data.schema import TensorDatasetSchema
from mlflow.models.evaluation.base import EvaluationDataset
from mlflow.types.utils import _infer_schema
from mlflow.utils.annotations import experimental

_logger = logging.getLogger(__name__)


@experimental
class NumpyDataset(Dataset, PyFuncConvertibleDatasetMixin):
"""
Represents a NumPy dataset for use with MLflow Tracking.
Expand Down Expand Up @@ -153,7 +151,6 @@ def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationData
)


@experimental
def from_numpy(
features: Union[np.ndarray, Dict[str, np.ndarray]],
source: Union[str, DatasetSource] = None,
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/pandas_dataset.py
Expand Up @@ -14,7 +14,6 @@
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.types import Schema
from mlflow.types.utils import _infer_schema
from mlflow.utils.annotations import experimental

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -162,7 +161,6 @@ def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationData
)


@experimental
def from_pandas(
df: pd.DataFrame,
source: Union[str, DatasetSource] = None,
Expand Down
4 changes: 0 additions & 4 deletions mlflow/data/spark_dataset.py
Expand Up @@ -14,15 +14,13 @@
from mlflow.protos.databricks_pb2 import INTERNAL_ERROR, INVALID_PARAMETER_VALUE
from mlflow.types import Schema
from mlflow.types.utils import _infer_schema
from mlflow.utils.annotations import experimental

if TYPE_CHECKING:
import pyspark

_logger = logging.getLogger(__name__)


@experimental
class SparkDataset(Dataset, PyFuncConvertibleDatasetMixin):
"""
Represents a Spark dataset (e.g. data derived from a Spark Table / file directory or Delta
Expand Down Expand Up @@ -198,7 +196,6 @@ def to_evaluation_dataset(self, path=None, feature_names=None) -> EvaluationData
)


@experimental
def load_delta(
path: Optional[str] = None,
table_name: Optional[str] = None,
Expand Down Expand Up @@ -258,7 +255,6 @@ def load_delta(
)


@experimental
def from_spark(
df: "pyspark.sql.DataFrame",
path: Optional[str] = None,
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/spark_dataset_source.py
Expand Up @@ -3,10 +3,8 @@
from mlflow.data.dataset_source import DatasetSource
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils.annotations import experimental


@experimental
class SparkDatasetSource(DatasetSource):
"""
Represents the source of a dataset stored in a spark table.
Expand Down
2 changes: 0 additions & 2 deletions mlflow/data/tensorflow_dataset.py
Expand Up @@ -15,12 +15,10 @@
from mlflow.protos.databricks_pb2 import INTERNAL_ERROR, INVALID_PARAMETER_VALUE
from mlflow.types.schema import Schema
from mlflow.types.utils import _infer_schema
from mlflow.utils.annotations import experimental

_logger = logging.getLogger(__name__)


@experimental
class TensorFlowDataset(Dataset, PyFuncConvertibleDatasetMixin):
"""
Represents a TensorFlow dataset for use with MLflow Tracking.
Expand Down
2 changes: 0 additions & 2 deletions mlflow/entities/dataset.py
Expand Up @@ -2,10 +2,8 @@

from mlflow.entities._mlflow_object import _MLflowObject
from mlflow.protos.service_pb2 import Dataset as ProtoDataset
from mlflow.utils.annotations import experimental


@experimental
class Dataset(_MLflowObject):
"""Dataset object associated with an experiment."""

Expand Down
2 changes: 0 additions & 2 deletions mlflow/entities/dataset_input.py
Expand Up @@ -4,10 +4,8 @@
from mlflow.entities.dataset import Dataset
from mlflow.entities.input_tag import InputTag
from mlflow.protos.service_pb2 import DatasetInput as ProtoDatasetInput
from mlflow.utils.annotations import experimental


@experimental
class DatasetInput(_MLflowObject):
"""DatasetInput object associated with an experiment."""

Expand Down
4 changes: 0 additions & 4 deletions mlflow/entities/dataset_summary.py
@@ -1,7 +1,3 @@
from mlflow.utils.annotations import experimental


@experimental
class _DatasetSummary:
"""
DatasetSummary object.
Expand Down
2 changes: 0 additions & 2 deletions mlflow/entities/input_tag.py
@@ -1,9 +1,7 @@
from mlflow.entities._mlflow_object import _MLflowObject
from mlflow.protos.service_pb2 import InputTag as ProtoInputTag
from mlflow.utils.annotations import experimental


@experimental
class InputTag(_MLflowObject):
"""Input tag object associated with a dataset."""

Expand Down
26 changes: 0 additions & 26 deletions mlflow/java/client/src/main/java/org/mlflow/api/proto/Service.java

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8faf9c3

Please sign in to comment.