diff --git a/sktime/datasets/_data_io.py b/sktime/datasets/_data_io.py index d00c5e92c7c..a664dcdaf37 100644 --- a/sktime/datasets/_data_io.py +++ b/sktime/datasets/_data_io.py @@ -233,7 +233,10 @@ def _mkdir_if_not_exist(*path): return full_path -CLASSIF_URLS = ["https://timeseriesclassification.com/ClassificationDownloads"] +CLASSIF_URLS = [ + "https://timeseriesclassification.com/aeon-toolkit", # main mirror (UEA) + "https://github.com/sktime/sktime-datasets/raw/main/TSC", # backup mirror (sktime) +] def _load_dataset(name, split, return_X_y, return_type=None, extract_path=None): @@ -290,12 +293,15 @@ def _get_data_from(path): if extract_path is None: extract_path = os.path.join(MODULE, "local_data") + # in either case below, we need to ensure the directory exists + _mkdir_if_not_exist(extract_path) + + # search if the dataset is already in the extract path after download if name in _list_available_datasets(extract_path): return _get_data_from(extract_path) # now we know the dataset is not in the download/cache path # so we need to download it - _mkdir_if_not_exist(extract_path) # download the dataset from CLASSIF_URLS # will try multiple mirrors if necessary diff --git a/sktime/datasets/tests/test_single_problem_loaders.py b/sktime/datasets/tests/test_single_problem_loaders.py index f90475f64ee..60c88658f3b 100644 --- a/sktime/datasets/tests/test_single_problem_loaders.py +++ b/sktime/datasets/tests/test_single_problem_loaders.py @@ -81,9 +81,6 @@ def test_load_numpy2d_multivariate_raises(loader): X, y = loader(return_type="numpy2d") -@pytest.mark.xfail( - reason="repeated upstream location failures, see 4754. xfail until fixed." -) def test_load_UEA(): """Test loading of a random subset of the UEA data, to check API.""" from sktime.datasets.tsc_dataset_names import multivariate, univariate