Skip to content

Commit

Permalink
[air] Fix broken release tests from head node sync deprecation (ray-p…
Browse files Browse the repository at this point in the history
…roject#37613)

Signed-off-by: Justin Yu <justinvyu@anyscale.com>
Signed-off-by: e428265 <arvind.chandramouli@lmco.com>
  • Loading branch information
justinvyu authored and arvind-chandra committed Aug 31, 2023
1 parent 114455a commit adb9695
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 89 deletions.
9 changes: 0 additions & 9 deletions python/ray/train/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,6 @@ py_test(
# Please keep these sorted alphabetically.
# --------------------------------------------------------------------

py_test(
name = "pytorch_pbt_failure",
size = "small",
srcs = ["tests/pytorch_pbt_failure.py"],
tags = ["team:ml", "exlusive", "no_main"],
deps = [":train_lib"],
args = ["--smoke-test"]
)

py_test(
name = "test_accelerate_trainer_gpu",
size = "large",
Expand Down
79 changes: 0 additions & 79 deletions python/ray/train/tests/pytorch_pbt_failure.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ray import train
from ray.air import session
from ray.train.torch import TorchTrainer
from ray.air.config import ScalingConfig
from ray.air.config import RunConfig, ScalingConfig


def add_fake_labels(batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
Expand Down Expand Up @@ -108,6 +108,7 @@ def main(data_size_gb: int, num_epochs=2, num_workers=1, smoke_test: bool = Fals
scaling_config=ScalingConfig(
num_workers=num_workers, use_gpu=int(not smoke_test)
),
run_config=RunConfig(storage_path="/mnt/cluster_storage"),
)
trainer.fit()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
failure_config=FailureConfig(max_failures=-1),
checkpoint_config=CheckpointConfig(num_to_keep=10),
callbacks=[FailureInjectorCallback(time_between_checks=90), ProgressCallback()],
storage_path="/mnt/cluster_storage",
),
)

Expand Down

0 comments on commit adb9695

Please sign in to comment.