Factor pytorch device setting code (#2068)

ludwig-ai · May 27, 2022 · d3419cc · d3419cc
1 parent aaead85
commit d3419cc
Show file tree

Hide file tree

Showing 28 changed files with 65 additions and 34 deletions.
diff --git a/ludwig/api.py b/ludwig/api.py
@@ -88,6 +88,7 @@
 from ludwig.utils.fs_utils import makedirs, open_file, path_exists, upload_output_directory
 from ludwig.utils.misc_utils import get_file_names, get_output_directory
 from ludwig.utils.print_utils import print_boxed
+from ludwig.utils.torch_utils import get_torch_device
 
 logger = logging.getLogger(__name__)
 
@@ -1399,7 +1400,7 @@ def load_weights(
         """
         if self.backend.is_coordinator():
             weights_save_path = os.path.join(model_dir, MODEL_WEIGHTS_FILE_NAME)
-            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            device = torch.device(get_torch_device())
             self.model.load_state_dict(torch.load(weights_save_path, map_location=device))
 
         self.backend.sync_model(self.model)

diff --git a/ludwig/backend/ray.py b/ludwig/backend/ray.py
@@ -38,7 +38,7 @@
 from ludwig.models.predictor import BasePredictor, get_output_columns, Predictor, RemotePredictor
 from ludwig.models.trainer import BaseTrainer, RemoteTrainer, TrainerConfig
 from ludwig.utils.horovod_utils import initialize_horovod
-from ludwig.utils.torch_utils import initialize_pytorch
+from ludwig.utils.torch_utils import get_torch_device, initialize_pytorch
 
 _ray112 = LooseVersion(ray.__version__) >= LooseVersion("1.12")
 import ray.train as rt  # noqa: E402
@@ -185,7 +185,7 @@ def train_fn(
             )
 
         model = ray.get(model_ref)
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = get_torch_device()
         model = model.to(device)
 
         trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs)
@@ -228,7 +228,7 @@ def tune_batch_size_fn(
             training_set_metadata,
         )
 
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = get_torch_device()
         model = model.to(device)
 
         trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs)
@@ -261,7 +261,7 @@ def tune_learning_rate_fn(
             training_set_metadata,
         )
 
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = get_torch_device()
         model = model.to(device)
 
         trainer = RemoteTrainer(model=model, horovod=hvd, **executable_kwargs)
@@ -530,7 +530,7 @@ def eval_fn(
         )
 
         model = ray.get(model_ref)
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        device = get_torch_device()
         model = model.to(device)
 
         predictor = RemotePredictor(model=model, horovod=hvd, **predictor_kwargs)
@@ -659,7 +659,7 @@ def get_batch_infer_model(
         class BatchInferModel:
             def __init__(self):
                 model = ray.get(model_ref)
-                device = "cuda" if torch.cuda.is_available() else "cpu"
+                device = get_torch_device()
                 self.model = model.to(device)
 
                 self.output_columns = output_columns

diff --git a/ludwig/models/predictor.py b/ludwig/models/predictor.py
@@ -25,6 +25,7 @@
 from ludwig.utils.horovod_utils import return_first
 from ludwig.utils.print_utils import repr_ordered_dict
 from ludwig.utils.strings_utils import make_safe_filename
+from ludwig.utils.torch_utils import get_torch_device
 
 EXCLUDE_PRED_SET = {LOGITS, LAST_HIDDEN}
 SKIP_EVAL_METRICS = {"confusion_matrix", "roc_curve"}
@@ -68,7 +69,7 @@ def __init__(self, model: ECD, batch_size=128, horovod=None, **kwargs):
         self._batch_size = batch_size
         self._horovod = horovod
 
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = get_torch_device()
         self.model = model.to(self.device)
 
     def batch_predict(

diff --git a/ludwig/models/trainer.py b/ludwig/models/trainer.py
@@ -55,6 +55,7 @@
 from ludwig.utils.math_utils import exponential_decay, learning_rate_warmup, learning_rate_warmup_distributed
 from ludwig.utils.metric_utils import get_metric_names, TrainerMetric
 from ludwig.utils.misc_utils import set_random_seed
+from ludwig.utils.torch_utils import get_torch_device
 from ludwig.utils.trainer_utils import (
     get_final_steps_per_checkpoint,
     get_new_progress_tracker,
@@ -212,7 +213,7 @@ def __init__(
         self.callbacks = callbacks or []
         self.device = device
         if self.device is None:
-            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            self.device = get_torch_device()
 
         self.model = model
         self.model = self.model.to(self.device)

diff --git a/ludwig/modules/embedding_modules.py b/ludwig/modules/embedding_modules.py
@@ -21,11 +21,11 @@
 from ludwig.constants import TYPE
 from ludwig.modules.initializer_modules import get_initializer
 from ludwig.utils.data_utils import load_pretrained_embeddings
-from ludwig.utils.torch_utils import LudwigModule
+from ludwig.utils.torch_utils import get_torch_device, LudwigModule
 
 logger = logging.getLogger(__name__)
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 def embedding_matrix(

diff --git a/ludwig/utils/torch_utils.py b/ludwig/utils/torch_utils.py
@@ -12,7 +12,13 @@
 from ludwig.utils.strings_utils import SpecialSymbol
 
 _TORCH_INIT_PARAMS: Optional[Tuple] = None
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+
+
+def get_torch_device():
+    return "cuda" if torch.cuda.is_available() else "cpu"
+
+
+DEVICE = get_torch_device()
 
 
 def sequence_length_2D(sequence: torch.Tensor) -> torch.Tensor:

diff --git a/tests/integration_tests/test_collect.py b/tests/integration_tests/test_collect.py
@@ -22,9 +22,10 @@
 from ludwig.api import LudwigModel
 from ludwig.collect import collect_activations, collect_weights, print_model_summary
 from ludwig.constants import TRAINER
+from ludwig.utils.torch_utils import get_torch_device
 from tests.integration_tests.utils import category_feature, ENCODERS, generate_data, sequence_feature, spawn
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 def _prepare_data(csv_filename):

diff --git a/tests/integration_tests/test_regularizers.py b/tests/integration_tests/test_regularizers.py
@@ -9,6 +9,7 @@
 from ludwig.constants import TRAINER
 from ludwig.data.preprocessing import preprocess_for_training
 from ludwig.utils.data_utils import read_csv
+from ludwig.utils.torch_utils import get_torch_device
 from tests.integration_tests.utils import (
     binary_feature,
     category_feature,
@@ -21,7 +22,7 @@
     set_feature,
 )
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 BATCH_SIZE = 32
 RANDOM_SEED = 42
 IMAGE_DIR = tempfile.mkdtemp()

diff --git a/tests/ludwig/combiners/test_combiners.py b/tests/ludwig/combiners/test_combiners.py
@@ -26,12 +26,13 @@
     TransformerCombinerConfig,
 )
 from ludwig.schema.utils import load_config
+from ludwig.utils.torch_utils import get_torch_device
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 logging.getLogger("ludwig").setLevel(logging.INFO)
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 BATCH_SIZE = 16
 SEQ_SIZE = 12
 HIDDEN_SIZE = 24

diff --git a/tests/ludwig/encoders/test_bag_encoders.py b/tests/ludwig/encoders/test_bag_encoders.py
@@ -4,8 +4,9 @@
 import torch
 
 from ludwig.encoders.bag_encoders import BagEmbedWeightedEncoder
+from ludwig.utils.torch_utils import get_torch_device
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("vocab", [["a", "b", "c", "d", "e", "f", "g", "h"]])

diff --git a/tests/ludwig/encoders/test_category_encoders.py b/tests/ludwig/encoders/test_category_encoders.py
@@ -4,8 +4,9 @@
 import torch
 
 from ludwig.encoders.category_encoders import CategoricalEmbedEncoder, CategoricalSparseEncoder
+from ludwig.utils.torch_utils import get_torch_device
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("vocab", [["red", "orange", "yellow", "green", "blue", "violet"], ["a", "b", "c"]])

diff --git a/tests/ludwig/encoders/test_date_encoders.py b/tests/ludwig/encoders/test_date_encoders.py
@@ -3,10 +3,11 @@
 import torch
 
 from ludwig.encoders.date_encoders import DateEmbed, DateWave
+from ludwig.utils.torch_utils import get_torch_device
 
 logger = logging.getLogger(__name__)
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 def test_date_embed():

diff --git a/tests/ludwig/encoders/test_h3_encoders.py b/tests/ludwig/encoders/test_h3_encoders.py
@@ -3,9 +3,10 @@
 import torch
 
 from ludwig.encoders import h3_encoders
+from ludwig.utils.torch_utils import get_torch_device
 
 logger = logging.getLogger(__name__)
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 def test_h3_embed():

diff --git a/tests/ludwig/encoders/test_sequence_encoders.py b/tests/ludwig/encoders/test_sequence_encoders.py
@@ -13,8 +13,9 @@
     StackedRNN,
     StackedTransformer,
 )
+from ludwig.utils.torch_utils import get_torch_device
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("reduce_output", ["mean", "avg", "max", "last", "concat", "attention", None])

diff --git a/tests/ludwig/encoders/test_set_encoders.py b/tests/ludwig/encoders/test_set_encoders.py
@@ -4,8 +4,9 @@
 import torch
 
 from ludwig.encoders.set_encoders import SetSparseEncoder
+from ludwig.utils.torch_utils import get_torch_device
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("vocab", [["a", "b", "c", "d", "e", "f", "g", "h"]])

diff --git a/tests/ludwig/features/test_audio_feature.py b/tests/ludwig/features/test_audio_feature.py
@@ -9,6 +9,7 @@
 from ludwig.backend import LOCAL_BACKEND
 from ludwig.constants import BACKFILL, PROC_COLUMN
 from ludwig.features.audio_feature import AudioFeatureMixin, AudioInputFeature
+from ludwig.utils.torch_utils import get_torch_device
 from tests.integration_tests.utils import audio_feature, category_feature, generate_data
 
 BATCH_SIZE = 2
@@ -17,7 +18,7 @@
 
 CHARS = ascii_uppercase + ascii_lowercase + digits
 VOCAB = ["".join(choice(CHARS) for _ in range(2)) for _ in range(256)]
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("encoder", ["rnn", "stacked_cnn", "parallel_cnn", "stacked_parallel_cnn", "rnn", "cnnrnn"])

diff --git a/tests/ludwig/features/test_bag_feature.py b/tests/ludwig/features/test_bag_feature.py
@@ -6,6 +6,7 @@
 import torch
 
 from ludwig.features.bag_feature import BagInputFeature
+from ludwig.utils.torch_utils import get_torch_device
 
 BATCH_SIZE = 2
 SEQ_SIZE = 20
@@ -14,7 +15,7 @@
 
 CHARS = ascii_uppercase + ascii_lowercase + digits
 VOCAB = ["".join(choice(CHARS) for _ in range(2)) for _ in range(256)]
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.fixture(scope="module")

diff --git a/tests/ludwig/features/test_binary_feature.py b/tests/ludwig/features/test_binary_feature.py
@@ -4,10 +4,11 @@
 import torch
 
 from ludwig.features.binary_feature import BinaryInputFeature, BinaryOutputFeature
+from ludwig.utils.torch_utils import get_torch_device
 
 BATCH_SIZE = 2
 BINARY_W_SIZE = 1
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.fixture(scope="module")

diff --git a/tests/ludwig/features/test_category_feature.py b/tests/ludwig/features/test_category_feature.py
@@ -6,9 +6,10 @@
 
 from ludwig.features.category_feature import CategoryInputFeature
 from ludwig.models.ecd import build_single_input
+from ludwig.utils.torch_utils import get_torch_device
 
 BATCH_SIZE = 2
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.fixture(scope="module")

diff --git a/tests/ludwig/features/test_number_feature.py b/tests/ludwig/features/test_number_feature.py
@@ -6,9 +6,10 @@
 
 from ludwig.features.number_feature import NumberInputFeature
 from ludwig.models.ecd import build_single_input
+from ludwig.utils.torch_utils import get_torch_device
 
 BATCH_SIZE = 2
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.fixture(scope="module")

diff --git a/tests/ludwig/features/test_sequence_features.py b/tests/ludwig/features/test_sequence_features.py
@@ -8,9 +8,10 @@
 from ludwig.constants import LAST_HIDDEN, LOGITS
 from ludwig.features.sequence_feature import _SequencePreprocessing, SequenceInputFeature, SequenceOutputFeature
 from ludwig.features.text_feature import TextInputFeature, TextOutputFeature
+from ludwig.utils.torch_utils import get_torch_device
 from tests.integration_tests.utils import ENCODERS, sequence_feature
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 BATCH_SIZE = 8
 SEQ_SIZE = 6
 VOCAB_SIZE = 64

diff --git a/tests/ludwig/features/test_set_feature.py b/tests/ludwig/features/test_set_feature.py
@@ -6,9 +6,10 @@
 
 from ludwig.features.set_feature import SetInputFeature
 from ludwig.models.ecd import build_single_input
+from ludwig.utils.torch_utils import get_torch_device
 
 BATCH_SIZE = 2
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.fixture(scope="module")

diff --git a/tests/ludwig/features/test_timeseries_feature.py b/tests/ludwig/features/test_timeseries_feature.py
@@ -4,12 +4,13 @@
 import torch
 
 from ludwig.features.timeseries_feature import TimeseriesInputFeature
+from ludwig.utils.torch_utils import get_torch_device
 
 SEQ_SIZE = 2
 TIMESERIES_W_SIZE = 1
 MAX_LEN = 7
 EMBEDDING_SIZE = 5
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.fixture(scope="module")

diff --git a/tests/ludwig/modules/test_embedding_modules.py b/tests/ludwig/modules/test_embedding_modules.py
@@ -4,8 +4,9 @@
 import torch
 
 from ludwig.modules.embedding_modules import Embed, EmbedSequence, EmbedSet, EmbedWeighted, TokenAndPositionEmbedding
+from ludwig.utils.torch_utils import get_torch_device
 
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("vocab", [["a", "b", "c"]])

diff --git a/tests/ludwig/modules/test_encoder.py b/tests/ludwig/modules/test_encoder.py
@@ -29,9 +29,10 @@
     StackedParallelCNN,
     StackedRNN,
 )
+from ludwig.utils.torch_utils import get_torch_device
 
 DROPOUT = 0.5
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 def create_encoder(encoder_type, **encoder_kwargs):

diff --git a/tests/ludwig/modules/test_fully_connected_modules.py b/tests/ludwig/modules/test_fully_connected_modules.py
@@ -4,9 +4,10 @@
 import torch
 
 from ludwig.modules.fully_connected_modules import FCLayer, FCStack
+from ludwig.utils.torch_utils import get_torch_device
 
 BATCH_SIZE = 2
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DEVICE = get_torch_device()
 
 
 @pytest.mark.parametrize("input_size", [2, 3])