diff --git a/pyproject.toml b/pyproject.toml index 17a7ac14..550a02eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,8 +152,9 @@ per-file-ignores = [ [tool.mypy] mypy_path = "tsml_eval/" ignore_missing_imports = true +follow_imports = "silent" exclude = [ - "_wip", + "_wip/", # Ignore the publications symlinks and its contents "tsml_eval/publications/2023", ] diff --git a/tsml_eval/experiments/_get_clusterer.py b/tsml_eval/experiments/_get_clusterer.py index 3e52a372..5b968f85 100644 --- a/tsml_eval/experiments/_get_clusterer.py +++ b/tsml_eval/experiments/_get_clusterer.py @@ -30,7 +30,7 @@ distance_based_clusterers = [ "kmeans-euclidean", "kmeans-squared", - "kmeans-dtw", + ["kmeans-dtw", "timeserieskmeans"], "kmeans-ddtw", "kmeans-wdtw", "kmeans-wddtw", @@ -43,7 +43,7 @@ "kmeans-shape_dtw", "kmedoids-euclidean", "kmedoids-squared", - "kmedoids-dtw", + ["kmedoids-dtw", "timeserieskmedoids"], "kmedoids-ddtw", "kmedoids-wdtw", "kmedoids-wddtw", @@ -56,7 +56,7 @@ "kmedoids-shape_dtw", "clarans-euclidean", "clarans-squared", - "clarans-dtw", + ["clarans-dtw", "timeseriesclarans"], "clarans-ddtw", "clarans-wdtw", "clarans-wddtw", @@ -69,7 +69,7 @@ "clarans-shape_dtw", "clara-euclidean", "clara-squared", - "clara-dtw", + ["clara-dtw", "timeseriesclara"], "clara-ddtw", "clara-wdtw", "clara-wddtw", @@ -114,7 +114,7 @@ "kmeans-ssg-ba-msm", "kmeans-ssg-ba-adtw", "kmeans-ssg-ba-shape_dtw", - "som-dtw", + ["som-dtw", "elasticsom"], "som-ddtw", "som-wdtw", "som-wddtw", @@ -126,16 +126,8 @@ "som-adtw", "som-shape_dtw", "som-soft_dtw", - "ksc", - "kshape", - "timeserieskmeans", - "timeserieskmedoids", - "timeseriesclarans", - "timeseriesclara", - "elasticsom", - "kspectralcentroid", - "timeserieskshape", - "timeserieskernelkmeans", + ["kspectralcentroid", "ksc"], + ["timeserieskshape", "kshape"], ] feature_based_clusterers = [ ["catch22", "catch22clusterer"], @@ -423,7 +415,7 @@ def _set_clusterer_distance_based( random_state=random_state, **kwargs, ) - elif "kshape" in c or "timeserieskshape" in c: + elif "kshape" in c: return TimeSeriesKShape( init=init_algorithm, max_iter=50, @@ -432,7 +424,7 @@ def _set_clusterer_distance_based( random_state=random_state, **kwargs, ) - elif c == "timeserieskernelkmeans" or c == "kernelkmeans": + elif "timeserieskernelkmeans" in c: return TimeSeriesKernelKMeans( max_iter=50, n_init=10, diff --git a/tsml_eval/experiments/experiments.py b/tsml_eval/experiments/experiments.py index 8e1c224f..ac2b1e63 100644 --- a/tsml_eval/experiments/experiments.py +++ b/tsml_eval/experiments/experiments.py @@ -230,6 +230,9 @@ def run_classification_experiment( dataset_name, results_path, full_path=False, + first_line_classifier_name=( + f"{classifier_name} ({type(classifier).__name__})" + ), split="TRAIN", resample_id=resample_id, time_unit="MILLISECONDS", @@ -280,6 +283,9 @@ def run_classification_experiment( dataset_name, results_path, full_path=False, + first_line_classifier_name=( + f"{classifier_name} ({type(classifier).__name__})" + ), split="TEST", resample_id=resample_id, time_unit="MILLISECONDS", @@ -552,6 +558,7 @@ def run_regression_experiment( dataset_name, results_path, full_path=False, + first_line_regressor_name=f"{regressor_name} ({type(regressor).__name__})", split="TRAIN", resample_id=resample_id, time_unit="MILLISECONDS", @@ -597,6 +604,7 @@ def run_regression_experiment( dataset_name, results_path, full_path=False, + first_line_regressor_name=f"{regressor_name} ({type(regressor).__name__})", split="TEST", resample_id=resample_id, time_unit="MILLISECONDS", @@ -916,6 +924,7 @@ def run_clustering_experiment( dataset_name, results_path, full_path=False, + first_line_clusterer_name=f"{clusterer_name} ({type(clusterer).__name__})", split="TRAIN", resample_id=resample_id, time_unit="MILLISECONDS", @@ -960,6 +969,7 @@ def run_clustering_experiment( dataset_name, results_path, full_path=False, + first_line_clusterer_name=f"{clusterer_name} ({type(clusterer).__name__})", split="TEST", resample_id=resample_id, time_unit="MILLISECONDS", @@ -1197,6 +1207,7 @@ def run_forecasting_experiment( dataset_name, results_path, full_path=False, + first_line_forecaster_name=f"{forecaster_name} ({type(forecaster).__name__})", split="TEST", random_seed=random_seed, time_unit="MILLISECONDS", diff --git a/tsml_eval/experiments/tests/test_classification.py b/tsml_eval/experiments/tests/test_classification.py index e08ac75b..51275252 100644 --- a/tsml_eval/experiments/tests/test_classification.py +++ b/tsml_eval/experiments/tests/test_classification.py @@ -171,7 +171,7 @@ def test_run_classification_experiment_invalid_estimator(): def test_get_classifier_by_name(): """Test get_classifier_by_name method.""" - classifier_lists = [ + classifier_name_lists = [ _get_classifier.convolution_based_classifiers, _get_classifier.deep_learning_classifiers, _get_classifier.dictionary_based_classifiers, @@ -184,12 +184,14 @@ def test_get_classifier_by_name(): _get_classifier.vector_classifiers, ] + # filled by _check_set_method + classifier_list = [] classifier_dict = {} all_classifier_names = [] - - for classifier_list in classifier_lists: + for classifier_name_list in classifier_name_lists: _check_set_method( get_classifier_by_name, + classifier_name_list, classifier_list, classifier_dict, all_classifier_names, diff --git a/tsml_eval/experiments/tests/test_clustering.py b/tsml_eval/experiments/tests/test_clustering.py index 853e7f2b..57a1c832 100644 --- a/tsml_eval/experiments/tests/test_clustering.py +++ b/tsml_eval/experiments/tests/test_clustering.py @@ -173,7 +173,7 @@ def test_run_clustering_experiment_invalid_estimator(): def test_get_clusterer_by_name(): """Test get_clusterer_by_name method.""" - clusterer_lists = [ + clusterer_name_lists = [ _get_clusterer.deep_learning_clusterers, _get_clusterer.distance_based_clusterers, _get_clusterer.feature_based_clusterers, @@ -187,29 +187,30 @@ def test_get_clusterer_by_name(): "base_estimator", ] + clusterer_list = [] clusterer_dict = {} all_clusterer_names = [] - - for clusterer_list in clusterer_lists: - estimatorrs = _check_set_method( + for clusterer_name_list in clusterer_name_lists: + _check_set_method( get_clusterer_by_name, + clusterer_name_list, clusterer_list, clusterer_dict, all_clusterer_names, - return_estimator=True, ) - # Check that clusterers with estimator parameters which are likely to be - # a sub-estimator are not None so n_clusters can be set - for clusterer in estimatorrs: + # Check that clusterers with parameters which are likely to be + # a sub-estimator are not None so n_clusters can be set + for clusterers in clusterer_list: + for c in clusterers: for param_name in clusterer_non_default_params: - params = clusterer.get_params() + params = c.get_params() if param_name in params: assert params[param_name] is not None, ( f"Clusterers which have an estimator parameter i.e. " f"pipelines and deep learners must not have None as the " f"estimator. Found None for {param_name} in " - f"{clusterer.__class__.__name__}" + f"{c.__class__.__name__}" ) _check_set_method_results( @@ -230,6 +231,7 @@ def test_aeon_clusterers_available(): "ClustererPipeline", "SklearnClustererWrapper", # just missing + "TimeSeriesKernelKMeans", ] est = [e for e, _ in all_estimators(type_filter="clusterer")] diff --git a/tsml_eval/experiments/tests/test_data_transform.py b/tsml_eval/experiments/tests/test_data_transform.py index 60538976..851b366d 100644 --- a/tsml_eval/experiments/tests/test_data_transform.py +++ b/tsml_eval/experiments/tests/test_data_transform.py @@ -9,14 +9,15 @@ def test_get_data_transform_by_name(): """Test get_data_transform_by_name method.""" - transform_lists = [_get_data_transform.transformers] + transform_name_lists = [_get_data_transform.transformers] + transform_list = [] transform_dict = {} all_transform_names = [] - - for transform_list in transform_lists: + for transform_name_list in transform_name_lists: _check_set_method( get_data_transform_by_name, + transform_name_list, transform_list, transform_dict, all_transform_names, diff --git a/tsml_eval/experiments/tests/test_regression.py b/tsml_eval/experiments/tests/test_regression.py index 9cb25dde..9ce61e5b 100644 --- a/tsml_eval/experiments/tests/test_regression.py +++ b/tsml_eval/experiments/tests/test_regression.py @@ -170,7 +170,7 @@ def test_run_regression_experiment_invalid_estimator(): def test_get_regressor_by_name(): """Test get_regressor_by_name method.""" - regressor_lists = [ + regressor_name_lists = [ _get_regressor.convolution_based_regressors, _get_regressor.deep_learning_regressors, _get_regressor.distance_based_regressors, @@ -182,12 +182,13 @@ def test_get_regressor_by_name(): _get_regressor.vector_regressors, ] + regressor_list = [] regressor_dict = {} all_regressor_names = [] - - for regressor_list in regressor_lists: + for regressor_name_list in regressor_name_lists: _check_set_method( get_regressor_by_name, + regressor_name_list, regressor_list, regressor_dict, all_regressor_names, diff --git a/tsml_eval/publications/y2023/distance_based_clustering/tests/test_set_distance_clusterer.py b/tsml_eval/publications/y2023/distance_based_clustering/tests/test_set_distance_clusterer.py index 51b3166d..23f64e81 100644 --- a/tsml_eval/publications/y2023/distance_based_clustering/tests/test_set_distance_clusterer.py +++ b/tsml_eval/publications/y2023/distance_based_clustering/tests/test_set_distance_clusterer.py @@ -11,12 +11,13 @@ def test_set_distance_clusterer(): """Test set_distance_clusterer method.""" + clusterer_list = [] clusterer_dict = {} all_clusterer_names = [] - _check_set_method( _set_distance_clusterer, distance_based_clusterers, + clusterer_list, clusterer_dict, all_clusterer_names, ) diff --git a/tsml_eval/publications/y2023/rist_pipeline/tests/test_set_estimator.py b/tsml_eval/publications/y2023/rist_pipeline/tests/test_set_estimator.py index 78b4d776..e775bed3 100644 --- a/tsml_eval/publications/y2023/rist_pipeline/tests/test_set_estimator.py +++ b/tsml_eval/publications/y2023/rist_pipeline/tests/test_set_estimator.py @@ -13,12 +13,13 @@ def test_set_rist_classifier(): """Test set_rist_classifier method.""" + classifier_list = [] classifier_dict = {} all_classifier_names = [] - _check_set_method( _set_rist_classifier, rist_classifiers, + classifier_list, classifier_dict, all_classifier_names, ) @@ -38,12 +39,13 @@ def test_set_rist_classifier_invalid(): def test_set_rist_regressor(): """Test set_rist_regressors method.""" + regressor_list = [] regressor_dict = {} all_regressor_names = [] - _check_set_method( _set_rist_regressor, rist_regressors, + regressor_list, regressor_dict, all_regressor_names, ) diff --git a/tsml_eval/publications/y2023/tsc_bakeoff/tests/test_set_classifier.py b/tsml_eval/publications/y2023/tsc_bakeoff/tests/test_set_classifier.py index 2e407cc7..3f3665b2 100644 --- a/tsml_eval/publications/y2023/tsc_bakeoff/tests/test_set_classifier.py +++ b/tsml_eval/publications/y2023/tsc_bakeoff/tests/test_set_classifier.py @@ -11,12 +11,13 @@ def test_set_bakeoff_classifier(): """Test set_bakeoff_classifier method.""" + classifier_list = [] classifier_dict = {} all_classifier_names = [] - _check_set_method( _set_bakeoff_classifier, bakeoff_classifiers, + classifier_list, classifier_dict, all_classifier_names, ) diff --git a/tsml_eval/publications/y2023/tser_archive_expansion/tests/test_set_regressor.py b/tsml_eval/publications/y2023/tser_archive_expansion/tests/test_set_regressor.py index e82a8dc7..0d9151ea 100644 --- a/tsml_eval/publications/y2023/tser_archive_expansion/tests/test_set_regressor.py +++ b/tsml_eval/publications/y2023/tser_archive_expansion/tests/test_set_regressor.py @@ -11,12 +11,13 @@ def test_set_expansion_regressor(): """Test set_tser_exp_regressor method.""" + regressor_list = [] regressor_dict = {} all_regressor_names = [] - _check_set_method( _set_tser_exp_regressor, expansion_regressors, + regressor_list, regressor_dict, all_regressor_names, ) diff --git a/tsml_eval/testing/testing_utils.py b/tsml_eval/testing/testing_utils.py index 32b52b24..9bb28205 100644 --- a/tsml_eval/testing/testing_utils.py +++ b/tsml_eval/testing/testing_utils.py @@ -25,23 +25,25 @@ def _check_set_method( set_method, - estimator_sub_list, + estimator_name_list, + estimator_list, estimator_dict, all_estimator_names, - return_estimator=False, ): - estimators = [] - for estimator_names in estimator_sub_list: + for estimator_names in estimator_name_list: estimator_names = ( [estimator_names] if isinstance(estimator_names, str) else estimator_names ) + s_out = None for estimator_alias in estimator_names: + # no duplicate names assert ( estimator_alias not in all_estimator_names ), f"Estimator {estimator_alias} is duplicated" all_estimator_names.append(estimator_alias) + # all names should pass except for not installed soft dependencies try: out = set_method(estimator_alias) except ModuleNotFoundError as err: @@ -49,17 +51,44 @@ def _check_set_method( "optional dependency", "soft dependency", "python version", + "No module named 'xgboost'", ] - if any(s in str(err) for s in exempt_errors) or "." not in str(err): + if any(s in str(err) for s in exempt_errors): continue else: raise err assert out is not None, f"Estimator {estimator_alias} not found" + # data transformers can return multiple transforms if not isinstance(out, list): out = [out] + if s_out is None: + # make sure this set of names returns a unique estimator + for e in estimator_list: + if len(e) == len(out) and type(e[0]) is type(out[0]): + assert not all( + [ + str(out[i].get_params()) == str(e[i].get_params()) + for i in range(len(out)) + ] + ) + + s_out = out + estimator_list.append(out) + else: + # make sure all names in a set return the same estimators + assert len(out) == len(s_out) + assert all( + [ + str(out[i].get_params()) == str(s_out[i].get_params()) + for i in range(len(out)) + ] + ) + + # make sure output are estimators, and record if the class name matches + # an alias name for e in out: assert isinstance( e, BaseEstimator @@ -71,11 +100,6 @@ def _check_set_method( elif e_name not in estimator_dict: estimator_dict[e_name] = False - if return_estimator: - estimators.append(e) - if return_estimator: - return estimators - EXEMPT_ESTIMATOR_NAMES = [ "channelensembleregressor", diff --git a/tsml_eval/testing/tests/test_testing_utils.py b/tsml_eval/testing/tests/test_testing_utils.py index d727e9cb..f0c62974 100644 --- a/tsml_eval/testing/tests/test_testing_utils.py +++ b/tsml_eval/testing/tests/test_testing_utils.py @@ -15,6 +15,7 @@ def test_check_set_method_fail(): _check_set_method( _test_set_method_fail, ["a", "b", "c"], + [], {}, [], ) diff --git a/tsml_eval/utils/results_writing.py b/tsml_eval/utils/results_writing.py index 7c4b45e1..0f21e57b 100644 --- a/tsml_eval/utils/results_writing.py +++ b/tsml_eval/utils/results_writing.py @@ -23,6 +23,7 @@ def write_classification_results( dataset_name, file_path, full_path=True, + first_line_classifier_name=None, split=None, resample_id=None, time_unit="N/A", @@ -61,6 +62,10 @@ def write_classification_results( If True, results are written directly to the directory passed in file_path. If False, then a standard file structure using the classifier and dataset names is created and used to write the results file. + first_line_classifier_name : str or None, default=None + Alternative name for the classifier to be written to the file. If None, the + classifier_name is used. Useful if full_path is False and extra information is + wanted in the classifier name (i.e. and alias and class name) split : str or None, default=None Either None, 'TRAIN' or 'TEST'. Influences the result file name and first line of the file. @@ -133,6 +138,7 @@ def write_classification_results( file_path, predicted_probabilities=probabilities, full_path=full_path, + first_line_estimator_name=first_line_classifier_name, split=split, resample_id=resample_id, time_unit=time_unit, @@ -149,6 +155,7 @@ def write_regression_results( dataset_name, file_path, full_path=True, + first_line_regressor_name=None, split=None, resample_id=None, time_unit="N/A", @@ -183,6 +190,10 @@ def write_regression_results( If True, results are written directly to the directory passed in file_path. If False, then a standard file structure using the regressor and dataset names is created and used to write the results file. + first_line_regressor_name : str or None, default=None + Alternative name for the regressor to be written to the file. If None, the + regressor_name is used. Useful if full_path is False and extra information is + wanted in the regressor name (i.e. and alias and class name) split : str or None, default=None Either None, 'TRAIN' or 'TEST'. Influences the result file name and first line of the file. @@ -239,6 +250,7 @@ def write_regression_results( dataset_name, file_path, full_path=full_path, + first_line_estimator_name=first_line_regressor_name, split=split, resample_id=resample_id, time_unit=time_unit, @@ -256,6 +268,7 @@ def write_clustering_results( dataset_name, file_path, full_path=True, + first_line_clusterer_name=None, split=None, resample_id=None, time_unit="N/A", @@ -293,6 +306,10 @@ def write_clustering_results( If True, results are written directly to the directory passed in file_path. If False, then a standard file structure using the clusterer and dataset names is created and used to write the results file. + first_line_clusterer_name : str or None, default=None + Alternative name for the clusterer to be written to the file. If None, the + clusterer_name is used. Useful if full_path is False and extra information is + wanted in the clusterer name (i.e. and alias and class name) split : str or None, default=None Either None, 'TRAIN' or 'TEST'. Influences the result file name and first line of the file. @@ -353,6 +370,7 @@ def write_clustering_results( file_path, predicted_probabilities=cluster_probabilities, full_path=full_path, + first_line_estimator_name=first_line_clusterer_name, split=split, resample_id=resample_id, time_unit=time_unit, @@ -369,6 +387,7 @@ def write_forecasting_results( dataset_name, file_path, full_path=True, + first_line_forecaster_name=None, split=None, random_seed=None, time_unit="N/A", @@ -400,6 +419,10 @@ def write_forecasting_results( If True, results are written directly to the directory passed in file_path. If False, then a standard file structure using the forecaster and dataset names is created and used to write the results file. + first_line_forecaster_name : str or None, default=None + Alternative name for the forecaster to be written to the file. If None, the + forecaster_name is used. Useful if full_path is False and extra information is + wanted in the forecaster name (i.e. and alias and class name) split : str or None, default=None Either None, 'TRAIN' or 'TEST'. Influences the result file name and first line of the file. @@ -440,6 +463,7 @@ def write_forecasting_results( dataset_name, file_path, full_path=full_path, + first_line_estimator_name=first_line_forecaster_name, split=split, resample_id=random_seed, time_unit=time_unit, @@ -456,6 +480,7 @@ def write_results_to_tsml_format( dataset_name, file_path, predicted_probabilities=None, + first_line_estimator_name=None, full_path=True, split=None, resample_id=None, @@ -488,6 +513,10 @@ def write_results_to_tsml_format( If True, results are written directly to the directory passed in file_path. If False, then a standard file structure using the estimator and dataset names is created and used to write the results file. + first_line_estimator_name : str or None, default=None + Alternative name for the estimator to be written to the file. If None, the + estimator_name is used. Useful if full_path is False and extra information is + wanted in the estimator name (i.e. and alias and class name) split : str or None, default=None Either None, 'TRAIN' or 'TEST'. Influences the result file name and first line of the file. @@ -535,11 +564,14 @@ def write_results_to_tsml_format( ) fname = fname.lower() if split == "" else fname + if first_line_estimator_name is None: + first_line_estimator_name = estimator_name + with open(f"{file_path}/{fname}.csv", "w") as file: # the first line of the output file is in the form of: first_line = ( f"{dataset_name}," - f"{estimator_name}," + f"{first_line_estimator_name}," f"{'No split' if split == '' else split.upper()}," f"{'None' if resample_id is None else resample_id}," f"{time_unit.upper()},"