ray-project · amogkam · Jun 9, 2022 · May 16, 2022 · Jun 8, 2022 · Jun 8, 2022
@@ -49,7 +49,19 @@ py_test(
 )
 
 py_test(
-    name = "transformers_example",
+    name = "transformers_example_gpu",
+    size = "large",
+    main = "examples/transformers/transformers_example.py",
+    srcs = ["examples/transformers/transformers_example.py"],
+    tags = ["team:ml", "exclusive", "tune", "gpu_only"],
+    deps = [":train_lib"],
+    args = ["--model_name_or_path=bert-base-cased", "--task_name=mrpc",
+    "--max_length=32", "--per_device_train_batch_size=64",
+    "--max_train_steps=2", "--start_local", "--num_workers=2", "--use_gpu"]
+)
+
+py_test(
+    name = "transformers_example_cpu",
     size = "large",
     main = "examples/transformers/transformers_example.py",
     srcs = ["examples/transformers/transformers_example.py"],

@@ -19,6 +19,7 @@ available_node_types:
     ray.head.default:
         min_workers: 0
         max_workers: 0
+        resources: {}
         node_config:
             InstanceType: g3.8xlarge
             ImageId: latest_dlami
@@ -33,6 +34,7 @@ available_node_types:
     ray.worker.default:
         min_workers: 3
         max_workers: 3
+        resources: {}
         node_config:
             InstanceType: g3.8xlarge
             ImageId: latest_dlami

@@ -228,10 +228,14 @@ def parse_args():
 
 
 def train_func(config: Dict[str, Any]):
+    # Accelerator reads from this environment variable for GPU placement.
+    os.environ["LOCAL_RANK"] = str(ray.train.local_rank())
+    os.environ["WORLD_SIZE"] = str(ray.train.world_size())
+
     args = config["args"]
     # Initialize the accelerator. We will let the accelerator handle device
     # placement for us in this example.
-    accelerator = Accelerator()
+    accelerator = Accelerator(cpu=not args.use_gpu)
     # Make one log on every process with the configuration for debugging.
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",