ludwig-ai · tgaddair · Jun 10, 2022 · Jun 8, 2022 · Jun 9, 2022 · Jun 9, 2022
@@ -69,9 +69,9 @@ jobs:
         uses: actions/cache@v2
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ hashFiles('requirements*.txt') }}
+          key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt') }}
           restore-keys: |
-            ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-
+            ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-
 
       - name: Install dependencies
         env:
@@ -84,7 +84,14 @@ jobs:
           pip --version
           python -m pip install -U pip
           cmake --version
-          if [ $PYTORCH == "nightly" ]; then
+
+          if [ "$MARKERS" != "distributed" ]; then
+            # Skip distributed and hyperopt requirements to test optional imports
+            echo > requirements-temp && mv requirements-temp requirements_distributed.txt
+            echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
+          fi
+
+          if [ "$PYTORCH" == "nightly" ]; then
             cat requirements.txt | sed '/^torch[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt
             extra_index_url=https://download.pytorch.org/whl/nightly/cpu
             pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
@@ -93,17 +100,24 @@ jobs:
             pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
           fi
           pip install protobuf==3.20.1 # https://github.com/databrickslabs/dbx/issues/257
-          pip install ray==$RAY_VERSION
+
+          if [ "$MARKERS" == "distributed" ]; then
+            pip install ray==$RAY_VERSION
+            ray_expected=$(python -c "import ray; print(ray.__version__)")
+          fi
 
           torch_expected=$(python -c "import torch; print(torch.__version__)")
-          ray_expected=$(python -c "import ray; print(ray.__version__)")
 
           pip install --no-build-isolation --no-use-pep517 ConfigSpace # temporary fix: https://github.com/automl/ConfigSpace/issues/173
           pip install '.[test]' --extra-index-url $extra_index_url
           pip list
 
           python -c "import torch; assert torch.__version__ == \"$torch_expected\", f\"torch {torch.__version__} != $torch_expected\""
-          python -c "import ray; assert ray.__version__ == \"$ray_expected\", f\"ray {ray.__version__} != $ray_expected\""
+          if [ "$MARKERS" == "distributed" ]; then
+            python -c "import ray; assert ray.__version__ == \"$ray_expected\", f\"ray {ray.__version__} != $ray_expected\""
+          else
+            python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
+          fi
         shell: bash
 
       - name: Install Neuropod backend
@@ -113,6 +127,7 @@ jobs:
         shell: bash
 
       - name: Reinstall Horovod if necessary
+        if: matrix.test-markers == 'distributed'
         env:
           HOROVOD_WITH_PYTORCH: 1
           HOROVOD_WITHOUT_MPI: 1

@@ -1,6 +1,5 @@
 
-# ray[default,tune]>=1.9.2,!=1.10  # TODO: remove
-ray[default,tune]>=1.11.0
+ray[default,tune]>=1.12.0
 
 
 # required for Ray Tune Search Algorithm support for AutoML

@@ -6,13 +6,18 @@
 import numpy as np
 import pandas as pd
 import pytest
-import ray
 
 from ludwig.api import LudwigModel
 from ludwig.backend import LocalBackend
-from ludwig.backend.ray import RayBackend
 from tests.integration_tests.utils import create_data_set_to_use, spawn
 
+try:
+    import ray
+
+    from ludwig.backend.ray import RayBackend
+except ImportError:
+    ray = None
+
 rs = np.random.RandomState(42)
 RAY_BACKEND_CONFIG = {
     "type": "ray",

@@ -111,7 +111,6 @@ def _prepare_hyperopt_data(csv_filename, config_filename):
     return dataset_filename
 
 
-@pytest.mark.distributed
 def test_train_cli_dataset(csv_filename):
     """Test training using `ludwig train --dataset`."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -120,7 +119,6 @@ def test_train_cli_dataset(csv_filename):
         _run_ludwig("train", dataset=dataset_filename, config=config_filename, output_directory=tmpdir)
 
 
-@pytest.mark.distributed
 def test_train_cli_training_set(csv_filename):
     """Test training using `ludwig train --training_set`."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -163,7 +161,6 @@ def test_train_cli_horovod(csv_filename):
 
 
 @pytest.mark.skip(reason="Issue #1451: Use torchscript.")
-@pytest.mark.distributed
 def test_export_savedmodel_cli(csv_filename):
     """Test exporting Ludwig model to Tensorflows savedmodel format."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -178,7 +175,6 @@ def test_export_savedmodel_cli(csv_filename):
 
 
 @pytest.mark.skip(reason="Issue #1451: Use torchscript.")
-@pytest.mark.distributed
 def test_export_neuropod_cli(csv_filename):
     """Test exporting Ludwig model to neuropod format."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -192,7 +188,6 @@ def test_export_neuropod_cli(csv_filename):
         )
 
 
-@pytest.mark.distributed
 def test_experiment_cli(csv_filename):
     """Test experiment cli."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -201,7 +196,6 @@ def test_experiment_cli(csv_filename):
         _run_ludwig("experiment", dataset=dataset_filename, config=config_filename, output_directory=tmpdir)
 
 
-@pytest.mark.distributed
 def test_predict_cli(csv_filename):
     """Test predict cli."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -216,7 +210,6 @@ def test_predict_cli(csv_filename):
         )
 
 
-@pytest.mark.distributed
 def test_evaluate_cli(csv_filename):
     """Test evaluate cli."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -240,7 +233,6 @@ def test_hyperopt_cli(csv_filename):
         _run_ludwig("hyperopt", dataset=dataset_filename, config=config_filename, output_directory=tmpdir)
 
 
-@pytest.mark.distributed
 def test_visualize_cli(csv_filename):
     """Test Ludwig 'visualize' cli."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -256,7 +248,6 @@ def test_visualize_cli(csv_filename):
         )
 
 
-@pytest.mark.distributed
 def test_collect_summary_activations_weights_cli(csv_filename):
     """Test collect_summary cli."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -270,7 +261,6 @@ def test_collect_summary_activations_weights_cli(csv_filename):
         assert "Parameters" in stdout
 
 
-@pytest.mark.distributed
 def test_synthesize_dataset_cli(csv_filename):
     """Test synthesize_data cli."""
     with tempfile.TemporaryDirectory() as tmpdir:
@@ -298,7 +288,6 @@ def test_synthesize_dataset_cli(csv_filename):
         )
 
 
-@pytest.mark.distributed
 def test_preprocess_cli(csv_filename):
     """Test preprocess `ludwig preprocess."""
     with tempfile.TemporaryDirectory() as tmpdir:

@@ -3,16 +3,13 @@
 import subprocess
 import sys
 
-import pytest
-
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logging.getLogger("ludwig").setLevel(logging.INFO)
 
 TEST_SCRIPT = os.path.join(os.path.dirname(__file__), "scripts", "run_train_comet.py")
 
 
-@pytest.mark.distributed
 def test_contrib_experiment(csv_filename):
     cmdline = [sys.executable, TEST_SCRIPT, "--csv-filename", csv_filename]
     exit_code = subprocess.call(" ".join(cmdline), shell=True, env=os.environ.copy())

@@ -3,16 +3,13 @@
 import subprocess
 import sys
 
-import pytest
-
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logging.getLogger("ludwig").setLevel(logging.INFO)
 
 TEST_SCRIPT = os.path.join(os.path.dirname(__file__), "scripts", "run_train_wandb.py")
 
 
-@pytest.mark.distributed
 def test_contrib_experiment(csv_filename, tmpdir):
     wandb_dir = os.path.join(tmpdir, "results")
     os.makedirs(wandb_dir, exist_ok=True)

@@ -89,7 +89,6 @@ def run_experiment_with_encoder(encoder, csv_filename):
     run_experiment(input_features, output_features, dataset=rel_path)
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize("encoder", HF_ENCODERS_SHORT)
 def test_experiment_text_feature_HF(encoder, csv_filename):
     run_experiment_with_encoder(encoder, csv_filename)

@@ -26,7 +26,6 @@
 )
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize(
     "output_features",
     [

@@ -18,7 +18,6 @@
 from typing import Dict, Optional, Tuple
 
 import pytest
-import ray
 import torch
 
 from ludwig.constants import ACCURACY, RAY, TRAINER
@@ -29,6 +28,12 @@
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import category_feature, generate_data, text_feature
 
+try:
+    import ray
+except ImportError:
+    ray = None
+
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logging.getLogger("ludwig").setLevel(logging.INFO)

@@ -19,18 +19,24 @@
 import mlflow
 import pandas as pd
 import pytest
-import ray
 from mlflow.tracking import MlflowClient
 
 from ludwig.constants import ACCURACY, TRAINER
 from ludwig.contribs import MlflowCallback
 from ludwig.hyperopt.execution import get_build_hyperopt_executor
-from ludwig.hyperopt.results import RayTuneResults
 from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
 from ludwig.hyperopt.sampling import get_build_hyperopt_sampler
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import category_feature, generate_data, text_feature
 
+try:
+    import ray
+
+    from ludwig.hyperopt.results import RayTuneResults
+except ImportError:
+    ray = None
+
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logging.getLogger("ludwig").setLevel(logging.INFO)

@@ -20,20 +20,26 @@
 from unittest.mock import patch
 
 import pytest
-import ray
-from ray.tune.sync_client import get_sync_client
 
 from ludwig.api import LudwigModel
-from ludwig.backend.ray import RayBackend
 from ludwig.callbacks import Callback
 from ludwig.constants import ACCURACY, TRAINER
-from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor
-from ludwig.hyperopt.results import RayTuneResults
 from ludwig.hyperopt.run import hyperopt, update_hyperopt_params_with_defaults
 from ludwig.hyperopt.sampling import get_build_hyperopt_sampler
 from ludwig.utils.defaults import merge_with_defaults
 from tests.integration_tests.utils import binary_feature, create_data_set_to_use, generate_data, number_feature, spawn
 
+try:
+    import ray
+    from ray.tune.sync_client import get_sync_client
+
+    from ludwig.backend.ray import RayBackend
+    from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor
+    from ludwig.hyperopt.results import RayTuneResults
+except ImportError:
+    ray = None
+    RayTuneExecutor = object
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logging.getLogger("ludwig").setLevel(logging.INFO)

@@ -72,7 +72,6 @@
 ]
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize("features_to_use", FEATURES_TO_TEST)
 def test_kfold_cv_cli(features_to_use: FeaturesToUse):
     # k-fold cross validation cli
@@ -132,7 +131,6 @@ def test_kfold_cv_cli(features_to_use: FeaturesToUse):
             assert key in cv_indices
 
 
-@pytest.mark.distributed
 def test_kfold_cv_api_from_file():
     # k-fold_cross_validate api with config file
     num_folds = 3
@@ -175,7 +173,6 @@ def test_kfold_cv_api_from_file():
             assert key in kfold_split_indices
 
 
-@pytest.mark.distributed
 def test_kfold_cv_api_in_memory():
     # k-fold_cross_validate api with in-memory config
     num_folds = 3
@@ -231,7 +228,6 @@ def test_kfold_cv_api_in_memory():
 ]
 
 
-@pytest.mark.distributed
 @pytest.mark.parametrize("data_format", DATA_FORMATS_FOR_KFOLDS)
 def test_kfold_cv_dataset_formats(data_format):
     # k-fold_cross_validate api with in-memory config

@@ -33,6 +33,7 @@ def init_backend(backend: str):
 
 
 @pytest.mark.parametrize("backend", ["local", "ray"])
+@pytest.mark.distributed
 def test_sample_ratio(backend, tmpdir):
     num_examples = 100
     sample_ratio = 0.25