From e4410d6ffa5c61060750663f990b898e817557c2 Mon Sep 17 00:00:00 2001 From: MaciekEO Date: Thu, 17 Feb 2022 12:54:04 +0100 Subject: [PATCH] Fix tests (#516) --- tests/tests_algorithms/test_catboost.py | 12 +- tests/tests_algorithms/test_lightgbm.py | 1 + .../{test_eda.py => disable_eda.py} | 0 .../tests_validation/test_validator_kfold.py | 344 ++++++++--------- .../tests_validation/test_validator_split.py | 364 +++++++++--------- 5 files changed, 355 insertions(+), 366 deletions(-) rename tests/tests_preprocessing/{test_eda.py => disable_eda.py} (100%) diff --git a/tests/tests_algorithms/test_catboost.py b/tests/tests_algorithms/test_catboost.py index 2126c772..2526f2c2 100644 --- a/tests/tests_algorithms/test_catboost.py +++ b/tests/tests_algorithms/test_catboost.py @@ -36,13 +36,13 @@ def setUpClass(cls): def test_reproduce_fit(self): metric = Metric({"name": "mse"}) prev_loss = None - for _ in range(3): + for _ in range(2): model = CatBoostAlgorithm(self.params) model.fit(self.X, self.y) y_predicted = model.predict(self.X) loss = metric(self.y, y_predicted) if prev_loss is not None: - assert_almost_equal(prev_loss, loss) + assert_almost_equal(prev_loss, loss, decimal=3) prev_loss = loss def test_get_metric_name(self): @@ -79,13 +79,13 @@ def setUpClass(cls): def test_reproduce_fit(self): metric = Metric({"name": "logloss"}) prev_loss = None - for _ in range(3): + for _ in range(2): model = CatBoostAlgorithm(self.params) model.fit(self.X, self.y) y_predicted = model.predict(self.X) loss = metric(self.y, y_predicted) if prev_loss is not None: - assert_almost_equal(prev_loss, loss) + assert_almost_equal(prev_loss, loss, decimal=3) prev_loss = loss def test_fit_predict(self): @@ -97,7 +97,7 @@ def test_fit_predict(self): y_predicted = cat.predict(self.X) loss = metric(self.y, y_predicted) if loss_prev is not None: - assert_almost_equal(loss, loss_prev) + assert_almost_equal(loss, loss_prev, decimal=3) loss_prev = loss def test_copy(self): @@ -137,7 +137,7 @@ def test_save_and_load(self): y_predicted = cat2.predict(self.X) loss2 = metric(self.y, y_predicted) - assert_almost_equal(loss, loss2) + assert_almost_equal(loss, loss2, decimal=3) def test_get_metric_name(self): model = CatBoostAlgorithm(self.params) diff --git a/tests/tests_algorithms/test_lightgbm.py b/tests/tests_algorithms/test_lightgbm.py index 2a233c3b..0d637680 100644 --- a/tests/tests_algorithms/test_lightgbm.py +++ b/tests/tests_algorithms/test_lightgbm.py @@ -37,6 +37,7 @@ def setUpClass(cls): "bagging_fraction": 0.8, "bagging_freq": 1, "seed": 1, + "early_stopping_rounds": 0, } def test_reproduce_fit(self): diff --git a/tests/tests_preprocessing/test_eda.py b/tests/tests_preprocessing/disable_eda.py similarity index 100% rename from tests/tests_preprocessing/test_eda.py rename to tests/tests_preprocessing/disable_eda.py diff --git a/tests/tests_validation/test_validator_kfold.py b/tests/tests_validation/test_validator_kfold.py index 6bf97bd1..b06206bd 100644 --- a/tests/tests_validation/test_validator_kfold.py +++ b/tests/tests_validation/test_validator_kfold.py @@ -5,165 +5,117 @@ import pandas as pd from supervised.utils.utils import dump_data from supervised.validation.validator_kfold import KFoldValidator +import tempfile class KFoldValidatorTest(unittest.TestCase): - def setUp(self): - self._results_path = "/tmp/k_fold_test" - os.mkdir(self._results_path) - - def tearDown(self): - shutil.rmtree(self._results_path, ignore_errors=True) - def test_create(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] + ), + "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": False, + "stratify": True, + "k_folds": 2, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + } + vl = KFoldValidator(params) + + self.assertEqual(params["k_folds"], vl.get_n_splits()) + # for train, validation in vl.split(): + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold) - data = { - "X": pd.DataFrame( - np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] - ), - "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": False, - "stratify": True, - "k_folds": 2, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - } - vl = KFoldValidator(params) - - self.assertEqual(params["k_folds"], vl.get_n_splits()) - # for train, validation in vl.split(): - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold) - - X_train, y_train = train.get("X"), train.get("y") - X_validation, y_validation = validation.get("X"), validation.get("y") - - self.assertEqual(X_train.shape[0], 2) - self.assertEqual(y_train.shape[0], 2) - self.assertEqual(X_validation.shape[0], 2) - self.assertEqual(y_validation.shape[0], 2) + X_train, y_train = train.get("X"), train.get("y") + X_validation, y_validation = validation.get("X"), validation.get("y") - def test_missing_target_values(self): + self.assertEqual(X_train.shape[0], 2) + self.assertEqual(y_train.shape[0], 2) + self.assertEqual(X_validation.shape[0], 2) + self.assertEqual(y_validation.shape[0], 2) - data = { - "X": pd.DataFrame( - np.array([[1, 0], [2, 1], [3, 0], [4, 1], [5, 1], [6, 1]]), - columns=["a", "b"], - ), - "y": pd.DataFrame( - np.array(["a", "b", "a", "b", np.nan, np.nan]), columns=["target"] - ), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": False, - "stratify": True, - "k_folds": 2, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - } - vl = KFoldValidator(params) - - self.assertEqual(params["k_folds"], vl.get_n_splits()) - - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold) - X_train, y_train = train.get("X"), train.get("y") - X_validation, y_validation = validation.get("X"), validation.get("y") - - self.assertEqual(X_train.shape[0], 3) - self.assertEqual(y_train.shape[0], 3) - self.assertEqual(X_validation.shape[0], 3) - self.assertEqual(y_validation.shape[0], 3) + def test_missing_target_values(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[1, 0], [2, 1], [3, 0], [4, 1], [5, 1], [6, 1]]), + columns=["a", "b"], + ), + "y": pd.DataFrame( + np.array(["a", "b", "a", "b", np.nan, np.nan]), columns=["target"] + ), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": False, + "stratify": True, + "k_folds": 2, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + } + vl = KFoldValidator(params) + + self.assertEqual(params["k_folds"], vl.get_n_splits()) - def test_create_with_target_as_labels(self): + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold) + X_train, y_train = train.get("X"), train.get("y") + X_validation, y_validation = validation.get("X"), validation.get("y") - data = { - "X": pd.DataFrame( - np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] - ), - "y": pd.DataFrame(np.array(["a", "b", "a", "b"]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": True, - "stratify": True, - "k_folds": 2, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - } - vl = KFoldValidator(params) - - self.assertEqual(params["k_folds"], vl.get_n_splits()) - - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold) - X_train, y_train = train.get("X"), train.get("y") - X_validation, y_validation = validation.get("X"), validation.get("y") - - self.assertEqual(X_train.shape[0], 2) - self.assertEqual(y_train.shape[0], 2) - self.assertEqual(X_validation.shape[0], 2) - self.assertEqual(y_validation.shape[0], 2) + self.assertEqual(X_train.shape[0], 3) + self.assertEqual(y_train.shape[0], 3) + self.assertEqual(X_validation.shape[0], 3) + self.assertEqual(y_validation.shape[0], 3) - def test_repeats(self): + def test_create_with_target_as_labels(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] + ), + "y": pd.DataFrame(np.array(["a", "b", "a", "b"]), columns=["target"]), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": True, + "stratify": True, + "k_folds": 2, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + } + vl = KFoldValidator(params) + + self.assertEqual(params["k_folds"], vl.get_n_splits()) - data = { - "X": pd.DataFrame( - np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] - ), - "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": True, - "stratify": False, - "k_folds": 2, - "repeats": 10, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - "random_seed": 1, - } - vl = KFoldValidator(params) - - self.assertEqual(params["k_folds"], vl.get_n_splits()) - self.assertEqual(params["repeats"], vl.get_repeats()) - - for repeat in range(vl.get_repeats()): for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold, repeat) - + train, validation = vl.get_split(k_fold) X_train, y_train = train.get("X"), train.get("y") X_validation, y_validation = validation.get("X"), validation.get("y") @@ -172,32 +124,74 @@ def test_repeats(self): self.assertEqual(X_validation.shape[0], 2) self.assertEqual(y_validation.shape[0], 2) - def test_disable_repeats_when_disabled_shuffle(self): + def test_repeats(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] + ), + "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": True, + "stratify": False, + "k_folds": 2, + "repeats": 10, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + "random_seed": 1, + } + vl = KFoldValidator(params) + + self.assertEqual(params["k_folds"], vl.get_n_splits()) + self.assertEqual(params["repeats"], vl.get_repeats()) + + for repeat in range(vl.get_repeats()): + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold, repeat) + + X_train, y_train = train.get("X"), train.get("y") + X_validation, y_validation = validation.get("X"), validation.get("y") + + self.assertEqual(X_train.shape[0], 2) + self.assertEqual(y_train.shape[0], 2) + self.assertEqual(X_validation.shape[0], 2) + self.assertEqual(y_validation.shape[0], 2) - data = { - "X": pd.DataFrame( - np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] - ), - "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": False, - "stratify": False, - "k_folds": 2, - "repeats": 10, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - "random_seed": 1, - } - vl = KFoldValidator(params) - - self.assertEqual(params["k_folds"], vl.get_n_splits()) - self.assertEqual(1, vl.get_repeats()) + def test_disable_repeats_when_disabled_shuffle(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] + ), + "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": False, + "stratify": False, + "k_folds": 2, + "repeats": 10, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + "random_seed": 1, + } + vl = KFoldValidator(params) + + self.assertEqual(params["k_folds"], vl.get_n_splits()) + self.assertEqual(1, vl.get_repeats()) diff --git a/tests/tests_validation/test_validator_split.py b/tests/tests_validation/test_validator_split.py index acb4850b..8246ea35 100644 --- a/tests/tests_validation/test_validator_split.py +++ b/tests/tests_validation/test_validator_split.py @@ -5,210 +5,204 @@ import pandas as pd from supervised.utils.utils import dump_data from supervised.validation.validator_split import SplitValidator +import tempfile class SplitValidatorTest(unittest.TestCase): - def setUp(self): - self._results_path = "/tmp/split_test" - os.mkdir(self._results_path) + def test_create(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array( + [[0, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 1]] + ), + columns=["a", "b"], + ), + "y": pd.DataFrame(np.array([0, 0, 1, 0, 1, 0, 1, 1]), columns=["target"]), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": False, + "stratify": False, + "train_ratio": 0.5, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + } + vl = SplitValidator(params) + + self.assertEqual(1, vl.get_n_splits()) + # for train, validation in vl.split(): + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold) - def tearDown(self): - shutil.rmtree(self._results_path, ignore_errors=True) + X_train, y_train = train.get("X"), train.get("y") + X_validation, y_validation = validation.get("X"), validation.get("y") - def test_create(self): + self.assertEqual(X_train.shape[0], 4) + self.assertEqual(y_train.shape[0], 4) + self.assertEqual(X_validation.shape[0], 4) + self.assertEqual(y_validation.shape[0], 4) - data = { - "X": pd.DataFrame( - np.array( - [[0, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 1]] + def test_missing_target_values(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[1, 0], [2, 1], [3, 0], [4, 1], [5, 1], [6, 1]]), + columns=["a", "b"], ), - columns=["a", "b"], - ), - "y": pd.DataFrame(np.array([0, 0, 1, 0, 1, 0, 1, 1]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": False, - "stratify": False, - "train_ratio": 0.5, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - } - vl = SplitValidator(params) - - self.assertEqual(1, vl.get_n_splits()) - # for train, validation in vl.split(): - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold) - - X_train, y_train = train.get("X"), train.get("y") - X_validation, y_validation = validation.get("X"), validation.get("y") - - self.assertEqual(X_train.shape[0], 4) - self.assertEqual(y_train.shape[0], 4) - self.assertEqual(X_validation.shape[0], 4) - self.assertEqual(y_validation.shape[0], 4) + "y": pd.DataFrame( + np.array(["a", "b", np.nan, "a", "b", np.nan]), columns=["target"] + ), + } - def test_missing_target_values(self): + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") - data = { - "X": pd.DataFrame( - np.array([[1, 0], [2, 1], [3, 0], [4, 1], [5, 1], [6, 1]]), - columns=["a", "b"], - ), - "y": pd.DataFrame( - np.array(["a", "b", np.nan, "a", "b", np.nan]), columns=["target"] - ), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": False, - "stratify": False, - "train_ratio": 0.5, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - } - vl = SplitValidator(params) - - self.assertEqual(1, vl.get_n_splits()) - - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold) - X_train, y_train = train.get("X"), train.get("y") - X_validation, y_validation = validation.get("X"), validation.get("y") - - self.assertEqual(X_train.shape[0], 3) - self.assertEqual(y_train.shape[0], 3) - self.assertEqual(X_validation.shape[0], 3) - self.assertEqual(y_validation.shape[0], 3) + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) - def test_create_with_target_as_labels(self): + params = { + "shuffle": False, + "stratify": False, + "train_ratio": 0.5, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + } + vl = SplitValidator(params) - data = { - "X": pd.DataFrame( - np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] - ), - "y": pd.DataFrame(np.array(["a", "b", "a", "b"]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": True, - "stratify": True, - "train_ratio": 0.5, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - } - vl = SplitValidator(params) - - self.assertEqual(1, vl.get_n_splits()) - - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold) - X_train, y_train = train.get("X"), train.get("y") - X_validation, y_validation = validation.get("X"), validation.get("y") - - self.assertEqual(X_train.shape[0], 2) - self.assertEqual(y_train.shape[0], 2) - self.assertEqual(X_validation.shape[0], 2) - self.assertEqual(y_validation.shape[0], 2) + self.assertEqual(1, vl.get_n_splits()) - def test_repeats(self): + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold) + X_train, y_train = train.get("X"), train.get("y") + X_validation, y_validation = validation.get("X"), validation.get("y") + + self.assertEqual(X_train.shape[0], 3) + self.assertEqual(y_train.shape[0], 3) + self.assertEqual(X_validation.shape[0], 3) + self.assertEqual(y_validation.shape[0], 3) - data = { - "X": pd.DataFrame( - np.array( - [[0, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 1]] + def test_create_with_target_as_labels(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"] ), - columns=["a", "b"], - ), - "y": pd.DataFrame(np.array([0, 0, 1, 0, 1, 0, 1, 1]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": True, - "stratify": False, - "train_ratio": 0.5, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - "repeats": 3, - } - vl = SplitValidator(params) - - self.assertEqual(1, vl.get_n_splits()) - self.assertEqual(3, vl.get_repeats()) - - cnt = 0 - for repeat in range(vl.get_repeats()): - for k_fold in range(vl.get_n_splits()): - train, validation = vl.get_split(k_fold, repeat) + "y": pd.DataFrame(np.array(["a", "b", "a", "b"]), columns=["target"]), + } + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": True, + "stratify": True, + "train_ratio": 0.5, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + } + vl = SplitValidator(params) + + self.assertEqual(1, vl.get_n_splits()) + + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold) X_train, y_train = train.get("X"), train.get("y") X_validation, y_validation = validation.get("X"), validation.get("y") - self.assertEqual(X_train.shape[0], 4) - self.assertEqual(y_train.shape[0], 4) - self.assertEqual(X_validation.shape[0], 4) - self.assertEqual(y_validation.shape[0], 4) - cnt += 1 + self.assertEqual(X_train.shape[0], 2) + self.assertEqual(y_train.shape[0], 2) + self.assertEqual(X_validation.shape[0], 2) + self.assertEqual(y_validation.shape[0], 2) + + def test_repeats(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array( + [[0, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 1]] + ), + columns=["a", "b"], + ), + "y": pd.DataFrame(np.array([0, 0, 1, 0, 1, 0, 1, 1]), columns=["target"]), + } - self.assertEqual(cnt, 3) + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") - def test_disable_repeats_when_disabled_shuffle(self): + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": True, + "stratify": False, + "train_ratio": 0.5, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + "repeats": 3, + } + vl = SplitValidator(params) + + self.assertEqual(1, vl.get_n_splits()) + self.assertEqual(3, vl.get_repeats()) - data = { - "X": pd.DataFrame( - np.array( - [[0, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 1]] + cnt = 0 + for repeat in range(vl.get_repeats()): + for k_fold in range(vl.get_n_splits()): + train, validation = vl.get_split(k_fold, repeat) + + X_train, y_train = train.get("X"), train.get("y") + X_validation, y_validation = validation.get("X"), validation.get("y") + + self.assertEqual(X_train.shape[0], 4) + self.assertEqual(y_train.shape[0], 4) + self.assertEqual(X_validation.shape[0], 4) + self.assertEqual(y_validation.shape[0], 4) + cnt += 1 + + self.assertEqual(cnt, 3) + + def test_disable_repeats_when_disabled_shuffle(self): + with tempfile.TemporaryDirectory() as results_path: + data = { + "X": pd.DataFrame( + np.array( + [[0, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 1]] + ), + columns=["a", "b"], ), - columns=["a", "b"], - ), - "y": pd.DataFrame(np.array([0, 0, 1, 0, 1, 0, 1, 1]), columns=["target"]), - } - - X_path = os.path.join(self._results_path, "X.data") - y_path = os.path.join(self._results_path, "y.data") - - dump_data(X_path, data["X"]) - dump_data(y_path, data["y"]) - - params = { - "shuffle": False, - "stratify": False, - "train_ratio": 0.5, - "results_path": self._results_path, - "X_path": X_path, - "y_path": y_path, - "repeats": 3, - } - vl = SplitValidator(params) - - self.assertEqual(1, vl.get_n_splits()) - self.assertEqual(1, vl.get_repeats()) + "y": pd.DataFrame(np.array([0, 0, 1, 0, 1, 0, 1, 1]), columns=["target"]), + } + + X_path = os.path.join(results_path, "X.data") + y_path = os.path.join(results_path, "y.data") + + dump_data(X_path, data["X"]) + dump_data(y_path, data["y"]) + + params = { + "shuffle": False, + "stratify": False, + "train_ratio": 0.5, + "results_path": results_path, + "X_path": X_path, + "y_path": y_path, + "repeats": 3, + } + vl = SplitValidator(params) + + self.assertEqual(1, vl.get_n_splits()) + self.assertEqual(1, vl.get_repeats())