diff --git a/openml/_api/clients/http.py b/openml/_api/clients/http.py index b5cc6c64d..27eeaac22 100644 --- a/openml/_api/clients/http.py +++ b/openml/_api/clients/http.py @@ -89,6 +89,26 @@ def _key_to_path(self, key: str) -> Path: """ return self.path.joinpath(key) + def _get_body_filename_from_response(self, response: Response) -> str: + content_type = response.headers.get("Content-Type", "").lower() + + if "application/json" in content_type: + return "body.json" + + if "text/xml" in content_type: + return "body.xml" + + return "body.txt" + + def _get_body_filename_from_path(self, path: Path) -> str: + if (path / "body.json").exists(): + return "body.json" + + if (path / "body.xml").exists(): + return "body.xml" + + return "body.txt" + def load(self, key: str) -> Response: """ Load a cached HTTP response from disk. @@ -112,31 +132,26 @@ def load(self, key: str) -> Response: """ path = self._key_to_path(key) - if not path.exists(): - raise FileNotFoundError(f"Cache entry not found: {path}") - meta_path = path / "meta.json" - headers_path = path / "headers.json" - body_path = path / "body.bin" + meta_raw = meta_path.read_bytes() if meta_path.exists() else "{}" + meta = json.loads(meta_raw) - if not (meta_path.exists() and headers_path.exists() and body_path.exists()): - raise FileNotFoundError(f"Incomplete cache at {path}") - - with meta_path.open("r", encoding="utf-8") as f: - meta = json.load(f) - - with headers_path.open("r", encoding="utf-8") as f: - headers = json.load(f) + headers_path = path / "headers.json" + headers_raw = headers_path.read_bytes() if headers_path.exists() else "{}" + headers = json.loads(headers_raw) + body_path = path / self._get_body_filename_from_path(path) + if not body_path.exists(): + raise FileNotFoundError(f"Incomplete cache at {body_path}") body = body_path.read_bytes() response = Response() - response.status_code = meta["status_code"] - response.url = meta["url"] - response.reason = meta["reason"] response.headers = headers response._content = body - response.encoding = meta["encoding"] + response.status_code = meta.get("status_code") + response.url = meta.get("url") + response.reason = meta.get("reason") + response.encoding = meta.get("encoding") return response @@ -160,7 +175,9 @@ def save(self, key: str, response: Response) -> None: path = self._key_to_path(key) path.mkdir(parents=True, exist_ok=True) - (path / "body.bin").write_bytes(response.content) + body_filename = self._get_body_filename_from_response(response) + with (path / body_filename).open("wb") as f: + f.write(response.content) with (path / "headers.json").open("w", encoding="utf-8") as f: json.dump(dict(response.headers), f) diff --git a/openml/_api/resources/base/resources.py b/openml/_api/resources/base/resources.py index 04e631e9e..b9461afdf 100644 --- a/openml/_api/resources/base/resources.py +++ b/openml/_api/resources/base/resources.py @@ -1,6 +1,8 @@ from __future__ import annotations +import builtins from abc import abstractmethod +from collections.abc import Iterable from typing import TYPE_CHECKING, Any from openml.enums import ResourceType @@ -9,6 +11,8 @@ if TYPE_CHECKING: from openml.evaluations import OpenMLEvaluation + from openml.flows.flow import OpenMLFlow + from openml.setups.setup import OpenMLSetup class DatasetAPI(ResourceAPI): @@ -83,3 +87,24 @@ class SetupAPI(ResourceAPI): """Abstract API interface for setup resources.""" resource_type: ResourceType = ResourceType.SETUP + + @abstractmethod + def list( + self, + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> list[OpenMLSetup]: ... + + @abstractmethod + def get(self, setup_id: int) -> OpenMLSetup: ... + + @abstractmethod + def exists( + self, + flow: OpenMLFlow, + param_settings: builtins.list[dict[str, Any]], + ) -> int | bool: ... diff --git a/openml/_api/resources/setup.py b/openml/_api/resources/setup.py index 2896d3d9f..57e466410 100644 --- a/openml/_api/resources/setup.py +++ b/openml/_api/resources/setup.py @@ -1,11 +1,280 @@ from __future__ import annotations +import builtins +from collections import OrderedDict +from collections.abc import Iterable +from typing import TYPE_CHECKING, Any + +import xmltodict + +from openml.setups.setup import OpenMLParameter, OpenMLSetup + from .base import ResourceV1API, ResourceV2API, SetupAPI +if TYPE_CHECKING: + from openml.flows.flow import OpenMLFlow + class SetupV1API(ResourceV1API, SetupAPI): - """Version 1 API implementation for setup resources.""" + """V1 XML API implementation for setups.""" + + @staticmethod + def _build_url( + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> str: + """Construct an OpenML Setup API URL with filtering parameters. + + Parameters + ---------- + The setup argument that is a list is separated from the single value + filters which are put into the kwargs. + + limit : int + offset : int + setup : list(int), optional + flow : int, optional + tag : str, optional + + Returns + ------- + str + A relative API path suitable for an OpenML HTTP request. + """ + api_call = "setup/list" + if limit is not None: + api_call += f"/limit/{limit}" + if offset is not None: + api_call += f"/offset/{offset}" + if setup is not None: + api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" + if flow is not None: + api_call += f"/flow/{flow}" + if tag is not None: + api_call += f"/tag/{tag}" + + return api_call + + @staticmethod + def _parse_list_xml(xml_content: str) -> builtins.list[OpenMLSetup]: + """Helper function to parse API calls which are lists of setups""" + setups_dict = xmltodict.parse(xml_content, force_list=("oml:setup",)) + openml_uri = "http://openml.org/openml" + # Minimalistic check if the XML is useful + if "oml:setups" not in setups_dict: + raise ValueError( + f'Error in return XML, does not contain "oml:setups": {setups_dict!s}', + ) + + if "@xmlns:oml" not in setups_dict["oml:setups"]: + raise ValueError( + f'Error in return XML, does not contain "oml:setups"/@xmlns:oml: {setups_dict!s}', + ) + + if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri: + raise ValueError( + "Error in return XML, value of " + '"oml:seyups"/@xmlns:oml is not ' + f'"{openml_uri}": {setups_dict!s}', + ) + + assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type( + setups_dict["oml:setups"] + ) + + return [ + SetupV1API._create_setup({"oml:setup_parameters": setup_}) + for setup_ in setups_dict["oml:setups"]["oml:setup"] + ] + + @staticmethod + def _create_setup(result_dict: dict) -> OpenMLSetup: + """Turns an API xml result into a OpenMLSetup object (or dict)""" + setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"]) + flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"]) + + if "oml:parameter" not in result_dict["oml:setup_parameters"]: + return OpenMLSetup(setup_id, flow_id, parameters=None) + + xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"] + if isinstance(xml_parameters, dict): + xml_parameters = [xml_parameters] + if not isinstance(xml_parameters, list): + raise ValueError( + f"Expected None, list or dict, received something else: {type(xml_parameters)!s}", + ) + + parameters = { + int(xml_parameter["oml:id"]): SetupV1API._create_setup_parameter_from_xml(xml_parameter) + for xml_parameter in xml_parameters + } + return OpenMLSetup(setup_id, flow_id, parameters) + + @staticmethod + def _create_setup_parameter_from_xml(result_dict: dict[str, str]) -> OpenMLParameter: + """Create an OpenMLParameter object or a dictionary from an API xml result.""" + return OpenMLParameter( + input_id=int(result_dict["oml:id"]), + flow_id=int(result_dict["oml:flow_id"]), + flow_name=result_dict["oml:flow_name"], + full_name=result_dict["oml:full_name"], + parameter_name=result_dict["oml:parameter_name"], + data_type=result_dict["oml:data_type"], + default_value=result_dict["oml:default_value"], + value=result_dict["oml:value"], + ) + + @staticmethod + def _to_dict( + flow_id: int, openml_parameter_settings: builtins.list[dict[str, Any]] + ) -> OrderedDict: + """Convert a flow ID and a list of OpenML parameter settings to + a dictionary representation that can be serialized to XML. + + Parameters + ---------- + flow_id : int + ID of the flow. + openml_parameter_settings : list[dict[str, Any]] + A list of OpenML parameter settings. + + Returns + ------- + OrderedDict + A dictionary representation of the flow ID and parameter settings. + """ + # for convenience, this function (ab)uses the run object. + xml: OrderedDict = OrderedDict() + xml["oml:run"] = OrderedDict() + xml["oml:run"]["@xmlns:oml"] = "http://openml.org/openml" + xml["oml:run"]["oml:flow_id"] = flow_id + xml["oml:run"]["oml:parameter_setting"] = openml_parameter_settings + + return xml + + def list( + self, + limit: int, + offset: int, + *, + setup: Iterable[int] | None = None, + flow: int | None = None, + tag: str | None = None, + ) -> builtins.list[OpenMLSetup]: + """Perform API call `/setup/list/{filters}` + + Parameters + ---------- + The setup argument that is a list is separated from the single value + filters which are put into the kwargs. + + limit : int + offset : int + setup : list(int), optional + flow : int, optional + tag : str, optional + + Returns + ------- + list + setups that match the filters, going from id to the OpenMLSetup object. + """ + api_call = SetupV1API._build_url(limit, offset, setup=setup, flow=flow, tag=tag) + setup_response = self._http.get(api_call) + xml_content = setup_response.text + + return SetupV1API._parse_list_xml(xml_content) + + def get(self, setup_id: int) -> OpenMLSetup: + """ + Downloads the setup (configuration) description from OpenML + and returns a structured object + + Parameters + ---------- + setup_id : int + The Openml setup_id + + Returns + ------- + OpenMLSetup + An initialized OpenMLSetup object parsed from the XML + """ + url_suffix = f"setup/{setup_id}" + setup_response = self._http.get(url_suffix, enable_cache=True) + xml_content = setup_response.text + result_dict = xmltodict.parse(xml_content) + + return SetupV1API._create_setup(result_dict) + + def exists( + self, + flow: OpenMLFlow, + param_settings: builtins.list[dict[str, Any]], + ) -> int | bool: + """ + Checks whether a hyperparameter configuration already exists on the server. + + Parameters + ---------- + flow : OpenMLFlow + The openml flow object. Should have flow id present for the main flow + and all subflows (i.e., it should be downloaded from the server by + means of flow.get, and not instantiated locally) + + list : + A list of dicts, where each dict has the following entries: + oml:name : str: The OpenML parameter name + oml:value : mixed: A representation of the parameter value + oml:component : int: flow id to which the parameter belongs + + Returns + ------- + setup_id : int + setup id iff exists, False otherwise + """ + if flow.flow_id is None: + raise ValueError("Flow must have a flow_id") + description = xmltodict.unparse( + SetupV1API._to_dict(flow.flow_id, param_settings), pretty=True + ) + file_elements = { + "description": ("description.arff", description), + } + + api_call = "setup/exists/" + setup_response = self._http.post(api_call, files=file_elements) + xml_content = setup_response.text + result_dict = xmltodict.parse(xml_content) + + setup_id = int(result_dict["oml:setup_exists"]["oml:id"]) + return setup_id if setup_id > 0 else False class SetupV2API(ResourceV2API, SetupAPI): - """Version 2 API implementation for setup resources.""" + """V2 JSoN API implementation for setups.""" + + def list( + self, + limit: int, # noqa: ARG002 + offset: int, # noqa: ARG002 + *, + setup: Iterable[int] | None = None, # noqa: ARG002 + flow: int | None = None, # noqa: ARG002 + tag: str | None = None, # noqa: ARG002 + ) -> builtins.list[OpenMLSetup]: + self._not_supported(method="list") + + def get(self, setup_id: int) -> OpenMLSetup: # noqa: ARG002 + self._not_supported(method="get") + + def exists( + self, + flow: OpenMLFlow, # noqa: ARG002 + param_settings: builtins.list[dict[str, Any]], # noqa: ARG002 + ) -> int | bool: + self._not_supported(method="exists") diff --git a/openml/setups/functions.py b/openml/setups/functions.py index a24d3a456..0b1a4f375 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -1,22 +1,20 @@ # License: BSD 3-Clause from __future__ import annotations -from collections import OrderedDict from collections.abc import Iterable from functools import partial from itertools import chain -from pathlib import Path -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import pandas as pd -import xmltodict import openml import openml.exceptions import openml.utils from openml.flows import OpenMLFlow, flow_exists -from .setup import OpenMLParameter, OpenMLSetup +if TYPE_CHECKING: + from .setup import OpenMLSetup def setup_exists(flow: OpenMLFlow) -> int: @@ -51,50 +49,8 @@ def setup_exists(flow: OpenMLFlow) -> int: ) openml_param_settings = flow.extension.obtain_parameter_values(flow) - description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True) - file_elements = { - "description": ("description.arff", description), - } # type: openml._api_calls.FILE_ELEMENTS_TYPE - result = openml._api_calls._perform_api_call( - "/setup/exists/", - "post", - file_elements=file_elements, - ) - result_dict = xmltodict.parse(result) - setup_id = int(result_dict["oml:setup_exists"]["oml:id"]) - return setup_id if setup_id > 0 else False - - -def _get_cached_setup(setup_id: int) -> OpenMLSetup: - """Load a run from the cache. - - Parameters - ---------- - setup_id : int - ID of the setup to be loaded. - - Returns - ------- - OpenMLSetup - The loaded setup object. - - Raises - ------ - OpenMLCacheException - If the setup file for the given setup ID is not cached. - """ - cache_dir = Path(openml.config.get_cache_directory()) - setup_cache_dir = cache_dir / "setups" / str(setup_id) - try: - setup_file = setup_cache_dir / "description.xml" - with setup_file.open(encoding="utf8") as fh: - setup_xml = xmltodict.parse(fh.read()) - return _create_setup_from_xml(setup_xml) - except OSError as e: - raise openml.exceptions.OpenMLCacheException( - f"Setup file for setup id {setup_id} not cached", - ) from e + return openml._backend.setup.exists(flow, openml_param_settings) def get_setup(setup_id: int) -> OpenMLSetup: @@ -111,21 +67,8 @@ def get_setup(setup_id: int) -> OpenMLSetup: ------- OpenMLSetup (an initialized openml setup object) """ - setup_dir = Path(openml.config.get_cache_directory()) / "setups" / str(setup_id) - setup_dir.mkdir(exist_ok=True, parents=True) - - setup_file = setup_dir / "description.xml" - - try: - return _get_cached_setup(setup_id) - except openml.exceptions.OpenMLCacheException: - url_suffix = f"/setup/{setup_id}" - setup_xml = openml._api_calls._perform_api_call(url_suffix, "get") - with setup_file.open("w", encoding="utf8") as fh: - fh.write(setup_xml) - - result_dict = xmltodict.parse(setup_xml) - return _create_setup_from_xml(result_dict) + setup: OpenMLSetup = openml._backend.setup.get(setup_id=setup_id) + return setup def list_setups( # noqa: PLR0913 @@ -160,7 +103,7 @@ def list_setups( # noqa: PLR0913 "Invalid output format selected. Only 'object', or 'dataframe' applicable.", ) - listing_call = partial(_list_setups, flow=flow, tag=tag, setup=setup) + listing_call = partial(openml._backend.setup.list, flow=flow, tag=tag, setup=setup) batches = openml.utils._list_all( listing_call, batch_size=1_000, # batch size for setups is lower @@ -175,77 +118,6 @@ def list_setups( # noqa: PLR0913 return pd.DataFrame.from_records(records, index="setup_id") -def _list_setups( - limit: int, - offset: int, - *, - setup: Iterable[int] | None = None, - flow: int | None = None, - tag: str | None = None, -) -> list[OpenMLSetup]: - """Perform API call `/setup/list/{filters}` - - Parameters - ---------- - The setup argument that is a list is separated from the single value - filters which are put into the kwargs. - - limit : int - offset : int - setup : list(int), optional - flow : int, optional - tag : str, optional - - Returns - ------- - The setups that match the filters, going from id to the OpenMLSetup object. - """ - api_call = "setup/list" - if limit is not None: - api_call += f"/limit/{limit}" - if offset is not None: - api_call += f"/offset/{offset}" - if setup is not None: - api_call += f"/setup/{','.join([str(int(i)) for i in setup])}" - if flow is not None: - api_call += f"/flow/{flow}" - if tag is not None: - api_call += f"/tag/{tag}" - - return __list_setups(api_call=api_call) - - -def __list_setups(api_call: str) -> list[OpenMLSetup]: - """Helper function to parse API calls which are lists of setups""" - xml_string = openml._api_calls._perform_api_call(api_call, "get") - setups_dict = xmltodict.parse(xml_string, force_list=("oml:setup",)) - openml_uri = "http://openml.org/openml" - # Minimalistic check if the XML is useful - if "oml:setups" not in setups_dict: - raise ValueError( - f'Error in return XML, does not contain "oml:setups": {setups_dict!s}', - ) - - if "@xmlns:oml" not in setups_dict["oml:setups"]: - raise ValueError( - f'Error in return XML, does not contain "oml:setups"/@xmlns:oml: {setups_dict!s}', - ) - - if setups_dict["oml:setups"]["@xmlns:oml"] != openml_uri: - raise ValueError( - "Error in return XML, value of " - '"oml:seyups"/@xmlns:oml is not ' - f'"{openml_uri}": {setups_dict!s}', - ) - - assert isinstance(setups_dict["oml:setups"]["oml:setup"], list), type(setups_dict["oml:setups"]) - - return [ - _create_setup_from_xml({"oml:setup_parameters": setup_}) - for setup_ in setups_dict["oml:setups"]["oml:setup"] - ] - - def initialize_model(setup_id: int, *, strict_version: bool = True) -> Any: """ Initialized a model based on a setup_id (i.e., using the exact @@ -278,69 +150,3 @@ def initialize_model(setup_id: int, *, strict_version: bool = True) -> Any: subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value return flow.extension.flow_to_model(flow, strict_version=strict_version) - - -def _to_dict(flow_id: int, openml_parameter_settings: list[dict[str, Any]]) -> OrderedDict: - """Convert a flow ID and a list of OpenML parameter settings to - a dictionary representation that can be serialized to XML. - - Parameters - ---------- - flow_id : int - ID of the flow. - openml_parameter_settings : list[dict[str, Any]] - A list of OpenML parameter settings. - - Returns - ------- - OrderedDict - A dictionary representation of the flow ID and parameter settings. - """ - # for convenience, this function (ab)uses the run object. - xml: OrderedDict = OrderedDict() - xml["oml:run"] = OrderedDict() - xml["oml:run"]["@xmlns:oml"] = "http://openml.org/openml" - xml["oml:run"]["oml:flow_id"] = flow_id - xml["oml:run"]["oml:parameter_setting"] = openml_parameter_settings - - return xml - - -def _create_setup_from_xml(result_dict: dict) -> OpenMLSetup: - """Turns an API xml result into a OpenMLSetup object (or dict)""" - setup_id = int(result_dict["oml:setup_parameters"]["oml:setup_id"]) - flow_id = int(result_dict["oml:setup_parameters"]["oml:flow_id"]) - - if "oml:parameter" not in result_dict["oml:setup_parameters"]: - return OpenMLSetup(setup_id, flow_id, parameters=None) - - xml_parameters = result_dict["oml:setup_parameters"]["oml:parameter"] - if isinstance(xml_parameters, dict): - parameters = { - int(xml_parameters["oml:id"]): _create_setup_parameter_from_xml(xml_parameters), - } - elif isinstance(xml_parameters, list): - parameters = { - int(xml_parameter["oml:id"]): _create_setup_parameter_from_xml(xml_parameter) - for xml_parameter in xml_parameters - } - else: - raise ValueError( - f"Expected None, list or dict, received something else: {type(xml_parameters)!s}", - ) - - return OpenMLSetup(setup_id, flow_id, parameters) - - -def _create_setup_parameter_from_xml(result_dict: dict[str, str]) -> OpenMLParameter: - """Create an OpenMLParameter object or a dictionary from an API xml result.""" - return OpenMLParameter( - input_id=int(result_dict["oml:id"]), - flow_id=int(result_dict["oml:flow_id"]), - flow_name=result_dict["oml:flow_name"], - full_name=result_dict["oml:full_name"], - parameter_name=result_dict["oml:parameter_name"], - data_type=result_dict["oml:data_type"], - default_value=result_dict["oml:default_value"], - value=result_dict["oml:value"], - ) diff --git a/tests/conftest.py b/tests/conftest.py index 35d40809d..1359e6247 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -205,7 +205,7 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]: _c_root_dir = root_dir / "org" / "openml" / "test" res_paths = [root_dir, _c_root_dir] - for _d in ["datasets", "tasks", "runs", "setups"]: + for _d in ["datasets", "tasks", "runs"]: res_paths.append(_c_root_dir / _d) for _id in ["-1", "2"]: @@ -221,7 +221,6 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]: res_paths.append(_c_root_dir / "datasets" / "30" / "dataset_30.pq") res_paths.append(_c_root_dir / "runs" / "1" / "description.xml") - res_paths.append(_c_root_dir / "setups" / "1" / "description.xml") for _id in ["1", "3", "1882"]: tmp_p = _c_root_dir / "tasks" / _id @@ -232,6 +231,12 @@ def _expected_static_cache_state(root_dir: Path) -> list[Path]: ] ) + res_paths.extend([ + _c_root_dir / "api" / "v1" / "xml" / "setup", + _c_root_dir / "api" / "v1" / "xml" / "setup" / "1", + _c_root_dir / "api" / "v1" / "xml" / "setup" / "1" / "body.xml", + ]) + return res_paths diff --git a/tests/files/org/openml/test/setups/1/description.xml b/tests/files/org/openml/test/api/v1/xml/setup/1/body.xml similarity index 100% rename from tests/files/org/openml/test/setups/1/description.xml rename to tests/files/org/openml/test/api/v1/xml/setup/1/body.xml diff --git a/tests/test_api/test_http.py b/tests/test_api/test_http.py index 46d834b3d..570c62e62 100644 --- a/tests/test_api/test_http.py +++ b/tests/test_api/test_http.py @@ -104,10 +104,11 @@ def test_get_with_cache_creates_cache(http_client, cache, sample_url_v1, sample_ cache_key = cache.get_key(sample_url_v1, {}) cache_path = cache._key_to_path(cache_key) + body_filename = cache._get_body_filename_from_path(cache_path) assert (cache_path / "meta.json").exists() assert (cache_path / "headers.json").exists() - assert (cache_path / "body.bin").exists() + assert (cache_path / body_filename).exists() @pytest.mark.test_server() diff --git a/tests/test_api/test_setup.py b/tests/test_api/test_setup.py new file mode 100644 index 000000000..b8418521d --- /dev/null +++ b/tests/test_api/test_setup.py @@ -0,0 +1,111 @@ +# License: BSD 3-Clause +from __future__ import annotations + +import pytest +import uuid +import sklearn.tree +import sklearn.naive_bayes +import openml +from openml_sklearn import SklearnExtension +from openml.testing import TestBase + + +from openml._api import SetupV1API, SetupV2API +from openml.setups.setup import OpenMLSetup +from openml.exceptions import OpenMLNotSupportedError + +def get_sentinel(): + # Create a unique prefix for the flow. Necessary because the flow is + # identified by its name and external version online. Having a unique + # name allows us to publish the same flow in each test run + sentinel = uuid.uuid4().hex[:10] + return f"TEST{sentinel}" + +@pytest.fixture +def setup_v1(http_client_v1, minio_client) -> SetupV1API: + return SetupV1API(http=http_client_v1, minio=minio_client) + +@pytest.fixture +def setup_v2(http_client_v2, minio_client) -> SetupV2API: + return SetupV2API(http=http_client_v2, minio=minio_client) + + +@pytest.mark.test_server() +def test_v1_list(setup_v1): + setups = setup_v1.list(limit=10, offset=0) + + assert isinstance(setups, list) + assert len(setups) > 0 + assert all(isinstance(s, OpenMLSetup) for s in setups) + + +@pytest.mark.test_server() +def test_v1_get(setup_v1): + setup_id = 1 + setup = setup_v1.get(setup_id) + + assert isinstance(setup, OpenMLSetup) + assert setup.setup_id == setup_id + + +@pytest.mark.sklearn() +@pytest.mark.test_server() +def test_v1_exists_nonexisting_setup(setup_v1): + """Test exists() returns False when setup doesn't exist""" + # first publish a non-existing flow + sentinel = get_sentinel() + # because of the sentinel, we can not use flows that contain subflows + dectree = sklearn.tree.DecisionTreeClassifier() + flow = SklearnExtension().model_to_flow(dectree) + flow.name = f"{sentinel}{flow.name}" + flow.publish() + TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") + openml_param_settings = flow.extension.obtain_parameter_values(flow) + # although the flow exists (created as of previous statement), + # we can be sure there are no setups (yet) as it was just created + # and hasn't been ran + setup_id = setup_v1.exists(flow, openml_param_settings) + assert not setup_id + + +@pytest.mark.sklearn() +@pytest.mark.test_server() +def test_v1_exists_existing_setup(setup_v1): + """Test exists() returns setup_id when setup exists""" + flow =SklearnExtension().model_to_flow( + sklearn.naive_bayes.GaussianNB() + ) + flow.name = f"{get_sentinel()}{flow.name}" + flow.publish() + TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") + openml_param_settings = flow.extension.obtain_parameter_values(flow) + # now run the flow on an easy task: + task = openml.tasks.get_task(115) + run = openml.runs.run_flow_on_task(flow, task) + # spoof flow id, otherwise the sentinel is ignored + run.flow_id = flow.flow_id + run.publish() + TestBase._mark_entity_for_removal("run", run.run_id) + TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {run.run_id}") + # download the run, as it contains the right setup id + run = openml.runs.get_run(run.run_id) + # execute the function we are interested in + setup_id = setup_v1.exists(flow, openml_param_settings) + assert setup_id == run.setup_id + + +def test_v2_list(setup_v2): + with pytest.raises(OpenMLNotSupportedError): + setup_v2.list(limit=10, offset=0) + + +def test_v2_get(setup_v2): + with pytest.raises(OpenMLNotSupportedError): + setup_v2.get(1) + + +def test_v2_exists(setup_v2): + with pytest.raises(OpenMLNotSupportedError): + setup_v2.exists(flow=None, param_settings=None) diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index 30943ea70..0735925f2 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -183,9 +183,8 @@ def test_setuplist_offset(self): @pytest.mark.test_server() def test_get_cached_setup(self): openml.config.set_root_cache_directory(self.static_cache_dir) - openml.setups.functions._get_cached_setup(1) - def test_get_uncached_setup(self): - openml.config.set_root_cache_directory(self.static_cache_dir) - with pytest.raises(openml.exceptions.OpenMLCacheException): - openml.setups.functions._get_cached_setup(10) + with unittest.mock.patch("requests.sessions.Session.request") as mock_request: + setup = openml.setups.get_setup(1) + mock_request.assert_not_called() + assert setup is not None