From ca5846167b7a07cc2c0beaee67da8f841a245a20 Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Sat, 15 Apr 2023 14:00:02 +0200 Subject: [PATCH 1/8] fix task ID for Iris task --- examples/30_extended/custom_flow_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py index 513d445ba..40e7d270f 100644 --- a/examples/30_extended/custom_flow_.py +++ b/examples/30_extended/custom_flow_.py @@ -124,7 +124,7 @@ OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]), ] -task_id = 1965 # Iris Task +task_id = 1200 # Iris Task task = openml.tasks.get_task(task_id) dataset_id = task.get_dataset().dataset_id From 8c57c4debf51d7bd6179ba9b7be4480ee201e476 Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Sat, 15 Apr 2023 14:10:32 +0200 Subject: [PATCH 2/8] update comment on homepage --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index b8856e83b..da48194eb 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -30,7 +30,7 @@ Example ('estimator', tree.DecisionTreeClassifier()) ] ) - # Download the OpenML task for the german credit card dataset with 10-fold + # Download the OpenML task for the pendigits dataset with 10-fold # cross-validation. task = openml.tasks.get_task(32) # Run the scikit-learn model on the task. From 524c01cd10d3b6dd7f088c8d12894247ce0f724a Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Sun, 16 Apr 2023 14:03:36 +0200 Subject: [PATCH 3/8] added additional documentation specific to the `components` parameter. --- examples/30_extended/custom_flow_.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py index 40e7d270f..241f3e6eb 100644 --- a/examples/30_extended/custom_flow_.py +++ b/examples/30_extended/custom_flow_.py @@ -77,6 +77,8 @@ # you can use the Random Forest Classifier flow as a *subflow*. It allows for # all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow. # +# Note: you can currently only specific one subflow as part of the components. +# # In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow. # This allows people to specify auto-sklearn hyperparameters used in this flow. # In general, using a subflow is not required. @@ -87,6 +89,8 @@ autosklearn_flow = openml.flows.get_flow(9313) # auto-sklearn 0.5.1 subflow = dict( components=OrderedDict(automl_tool=autosklearn_flow), + # If you do not want to reference a subflow, you can use the following: + # components=OrderedDict(), ) #################################################################################################### From da53346c779fac1cf4ffd4c40bae5e238371aa17 Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Mon, 17 Apr 2023 11:21:12 +0200 Subject: [PATCH 4/8] add change to progress.rst --- doc/progress.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/progress.rst b/doc/progress.rst index 6b58213e5..b3c9279b8 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -9,6 +9,7 @@ Changelog 0.13.1 ~~~~~~ + * DOC #1241 #1229 #1231: Minor documentation fixes. * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``). * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server. * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API. From ab72f5bebc3f9d5ed4bb28da0dfbdfa15d2c50c8 Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Tue, 18 Apr 2023 12:42:18 +0200 Subject: [PATCH 5/8] Fix dataframe append being deprecated by replacing it with (backwards-compatible) pd.concat --- openml/utils.py | 2 +- tests/test_utils/test_utils.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/openml/utils.py b/openml/utils.py index 3c2fa876f..19f77f8c6 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -283,7 +283,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters): if len(result) == 0: result = new_batch else: - result = result.append(new_batch, ignore_index=True) + result = pd.concat([result, new_batch], ignore_index=True) else: # For output_format = 'dict' or 'object' result.update(new_batch) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index a5add31c8..789727108 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -18,6 +18,16 @@ def mocked_perform_api_call(call, request_method): def test_list_all(self): openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) + openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks, output_format="dataframe") + + def test_list_all_with_multiple_batches(self): + res = openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks, output_format="dict", + batch_size=2000) + # Verify that test server state is still valid for this test to work as intended + # -> If the number of results is less than 2000, the test can not test the batching operation. + assert len(res) > 2000 + openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks, output_format="dataframe", + batch_size=2000) @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call) def test_list_all_few_results_available(self, _perform_api_call): @@ -98,7 +108,7 @@ def test__create_cache_directory(self, config_mock): os.chmod(subdir, 0o444) config_mock.return_value = subdir with self.assertRaisesRegex( - openml.exceptions.OpenMLCacheException, - r"Cannot create cache directory", + openml.exceptions.OpenMLCacheException, + r"Cannot create cache directory", ): openml.utils._create_cache_directory("ghi") From 478b65ee2ef8ee0065f1741ee77ea7ef3f6d5a75 Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Tue, 18 Apr 2023 12:45:38 +0200 Subject: [PATCH 6/8] fix logging example and add new changes to progress.rst --- doc/progress.rst | 2 +- examples/30_extended/configure_logging.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/progress.rst b/doc/progress.rst index b3c9279b8..d546ac4bd 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -9,7 +9,7 @@ Changelog 0.13.1 ~~~~~~ - * DOC #1241 #1229 #1231: Minor documentation fixes. + * DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working. * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``). * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server. * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API. diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py index 2dae4047f..3d33f1546 100644 --- a/examples/30_extended/configure_logging.py +++ b/examples/30_extended/configure_logging.py @@ -37,8 +37,8 @@ import logging -openml.config.console_log.setLevel(logging.DEBUG) -openml.config.file_log.setLevel(logging.WARNING) +openml.config.set_console_log_level(logging.DEBUG) +openml.config.set_file_log_level(logging.WARNING) openml.datasets.get_dataset("iris") # Now the log level that was previously written to file should also be shown in the console. From 0a53e7458f6fd668752b4c7784a35d3ea4c78f14 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:46:45 +0000 Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_utils/test_utils.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 789727108..ce7b5eb8a 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -18,16 +18,22 @@ def mocked_perform_api_call(call, request_method): def test_list_all(self): openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) - openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks, output_format="dataframe") + openml.utils._list_all( + listing_call=openml.tasks.functions._list_tasks, output_format="dataframe" + ) def test_list_all_with_multiple_batches(self): - res = openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks, output_format="dict", - batch_size=2000) + res = openml.utils._list_all( + listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=2000 + ) # Verify that test server state is still valid for this test to work as intended # -> If the number of results is less than 2000, the test can not test the batching operation. assert len(res) > 2000 - openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks, output_format="dataframe", - batch_size=2000) + openml.utils._list_all( + listing_call=openml.tasks.functions._list_tasks, + output_format="dataframe", + batch_size=2000, + ) @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call) def test_list_all_few_results_available(self, _perform_api_call): @@ -108,7 +114,7 @@ def test__create_cache_directory(self, config_mock): os.chmod(subdir, 0o444) config_mock.return_value = subdir with self.assertRaisesRegex( - openml.exceptions.OpenMLCacheException, - r"Cannot create cache directory", + openml.exceptions.OpenMLCacheException, + r"Cannot create cache directory", ): openml.utils._create_cache_directory("ghi") From da9f1b17446b221433123f329e2a5f6205840ff8 Mon Sep 17 00:00:00 2001 From: Lennart Purucker Date: Tue, 18 Apr 2023 13:05:33 +0200 Subject: [PATCH 8/8] fix comment too long --- tests/test_utils/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index ce7b5eb8a..8558d27c8 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -27,7 +27,8 @@ def test_list_all_with_multiple_batches(self): listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=2000 ) # Verify that test server state is still valid for this test to work as intended - # -> If the number of results is less than 2000, the test can not test the batching operation. + # -> If the number of results is less than 2000, the test can not test the + # batching operation. assert len(res) > 2000 openml.utils._list_all( listing_call=openml.tasks.functions._list_tasks,