Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
Signed-off-by: chenmoneygithub <chen.qian@databricks.com>
  • Loading branch information
chenmoneygithub committed Feb 23, 2024
1 parent da6f5c2 commit eaa5b0a
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 8 deletions.
9 changes: 7 additions & 2 deletions mlflow/data/__init__.py
@@ -1,7 +1,8 @@
import sys

Check failure on line 1 in mlflow/data/__init__.py

View workflow job for this annotation

GitHub Actions / lint

[*] Import block is un-sorted or un-formatted. Run `ruff --fix .` or comment `@mlflow-automation autoformat` to fix this error.
from typing import Union

from mlflow.data import dataset_registry, meta_dataset
from contextlib import suppress
from mlflow.data import dataset_registry
from mlflow.data import sources as mlflow_data_sources
from mlflow.data.dataset import Dataset
from mlflow.data.dataset_source import DatasetSource
Expand All @@ -12,6 +13,10 @@
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
from mlflow.utils.annotations import experimental

with suppress(ImportError):
# Suppressing ImportError to pass mlflow-skinny testing.
from mlflow.data import meta_dataset # noqa: F401


@experimental
def get_source(dataset: Union[DatasetEntity, DatasetInput, Dataset]) -> DatasetSource:
Expand Down Expand Up @@ -47,7 +52,7 @@ def get_source(dataset: Union[DatasetEntity, DatasetInput, Dataset]) -> DatasetS
return dataset_source


__all__ = ["get_source", "meta_dataset"]
__all__ = ["get_source"]


def _define_dataset_constructors_in_current_module():
Expand Down
2 changes: 1 addition & 1 deletion mlflow/data/meta_dataset.py
@@ -1,5 +1,5 @@
import json
import hashlib
import json
from typing import Any, Dict, Optional

from mlflow.data.dataset import Dataset
Expand Down
2 changes: 1 addition & 1 deletion mlflow/data/pandas_dataset.py
Expand Up @@ -70,7 +70,7 @@ def _compute_digest(self) -> str:
"""
return compute_pandas_digest(self._df)

def _to_dict(self) -> Dict[str, str]:
def to_dict(self) -> Dict[str, str]:
"""Create config dictionary for the dataset."""
schema = json.dumps({"mlflow_colspec": self.schema.to_dict()}) if self.schema else None
config = super().to_dict()
Expand Down
9 changes: 5 additions & 4 deletions tests/data/test_meta_dataset.py
@@ -1,12 +1,13 @@
import json

import pytest

from mlflow.data.http_dataset_source import HTTPDatasetSource
from mlflow.data.delta_dataset_source import DeltaDatasetSource
from mlflow.data.http_dataset_source import HTTPDatasetSource
from mlflow.data.huggingface_dataset_source import HuggingFaceDatasetSource
from mlflow.data.meta_dataset import MetaDataset
from mlflow.types.schema import Schema, ColSpec
from mlflow.types import DataType
from mlflow.types.schema import ColSpec, Schema


@pytest.mark.parametrize(
Expand All @@ -24,7 +25,7 @@ def test_create_meta_dataset_from_source(dataset_source_class, path):
json_str = dataset.to_json()
parsed_json = json.loads(json_str)

assert parsed_json["digest"] != None
assert parsed_json["digest"] is not None
assert path in parsed_json["source"]
assert parsed_json["source_type"] == dataset_source_class._get_source_type()

Expand All @@ -50,7 +51,7 @@ def test_create_meta_dataset_from_source_with_schema(dataset_source_class, path)
json_str = dataset.to_json()
parsed_json = json.loads(json_str)

assert parsed_json["digest"] != None
assert parsed_json["digest"] is not None
assert path in parsed_json["source"]
assert parsed_json["source_type"] == dataset_source_class._get_source_type()
assert json.loads(parsed_json["schema"])["mlflow_colspec"] == schema.to_dict()

0 comments on commit eaa5b0a

Please sign in to comment.