Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Disable autoboost selecting clock rate based on thermal, and power budget effect
Run the `experiment_runner.py` from the `pytorch` directory, which should be the
parent of the `xla` directory.

The following example runs the alexnet benchmark on GPU through the
The following example runs the alexnet benchmark on CPU through the
Pytorch/XLA-dynamo path and through the Inductor-dynamo with 5 repetitions each.
The results will be stored in a json file (eg results.jsonl) in `experiment_results`.

Expand All @@ -88,7 +88,7 @@ python xla/benchmarks/experiment_runner.py \
--xla=PJRT --xla=None \
--test=eval --test=train \
--suite-name=torchbench \
--accelerator=cuda \
--accelerator=cpu \
--output-dirname=experiment_results \
--repeat=5 \
--print-subprocess \
Expand Down Expand Up @@ -118,7 +118,7 @@ python xla/benchmarks/experiment_runner.py \
--suite-name=torchbench \
--progress-bar \
--model-config='{"model_name":"BERT_pytorch"}' \
--experiment-config='{"accelerator":"cuda","xla":"PJRT","xla_flags":null,"dynamo":"openxla","torch_xla2":null,"test":"train","keep_model_data_on_cuda":false,"enable_functionalization":false}' \
--experiment-config='{"accelerator":"cpu","xla":"PJRT","xla_flags":null,"dynamo":"openxla","torch_xla2":null,"test":"train","enable_functionalization":false}' \
--repeat 1
```

Expand All @@ -135,13 +135,13 @@ works only for inference now.

```
cd pytorch
PJRT_DEVICE=CUDA python3 new_xla/benchmarks/experiment_runner.py \
PJRT_DEVICE=CPU python3 new_xla/benchmarks/experiment_runner.py \
--xla=PJRT \
--dynamo=openxla \
--test=eval \
--filter=BERT_pytorch$ \
--suite-name=torchbench \
--accelerator=cuda \
--accelerator=cpu \
--progress-bar \
--output-dirname=/tmp/output \
--repeat=2 \
Expand Down
27 changes: 7 additions & 20 deletions benchmarks/benchmark_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,12 @@ def list_experiment_configs(self):

# Start with default config.
config_choices = {
"accelerator": ["cpu", "cuda", "tpu"],
"accelerator": ["cpu", "tpu"],
"xla": [None, "PJRT", "XRT"],
"xla_flags": [None],
"dynamo": [None, "inductor", "openxla"],
"torch_xla2": [None], # options only apply to torch_xla2
"test": ["eval", "train"],
"keep_model_data_on_cuda": [False],
"enable_functionalization": [False],
}

Expand All @@ -46,10 +45,6 @@ def list_experiment_configs(self):
if self._args.xla_flags:
config_choices["xla_flags"] = list(
map(parse_none_str, set(self._args.xla_flags)))
if self._args.keep_model_data_on_cuda:
config_choices["keep_model_data_on_cuda"] = [
self._args.keep_model_data_on_cuda
]
if self._args.enable_functionalization:
config_choices["enable_functionalization"] = [
self._args.enable_functionalization
Expand Down Expand Up @@ -85,7 +80,6 @@ def _is_available(self,
cfg_xla = experiment_config["xla"]
cfg_test = experiment_config["test"]
cfg_torch_xla2 = experiment_config["torch_xla2"]
cfg_keep_model_data_on_cuda = experiment_config["keep_model_data_on_cuda"]

# Check that dynamo refers to an existing backend.
if cfg_dynamo is not None and cfg_dynamo not in dynamo.list_backends(
Expand Down Expand Up @@ -118,16 +112,16 @@ def _is_available(self,
if cfg_accelerator == "tpu":
if cfg_xla is None:
return False
elif cfg_accelerator in ("cpu", "cuda"):
elif cfg_accelerator == "cpu":
if cfg_xla == "XRT":
return False
elif cfg_accelerator == "cuda":
if cfg_xla is not None:
# PyTorch/XLA with CUDA backend is no longer supported.
return False
else:
raise NotImplementedError

# cfg_keep_model_data_on_cuda is only avaible when using dynamo
if cfg_keep_model_data_on_cuda and cfg_dynamo != "openxla":
return False

return True

def load_experiment(self,
Expand All @@ -140,15 +134,13 @@ def load_experiment(self,
test = experiment_config["test"]
batch_size = experiment_config.get("batch_size", self._args.batch_size)
torch_xla2 = experiment_config["torch_xla2"]
keep_model_data_on_cuda = experiment_config["keep_model_data_on_cuda"]
enable_functionalization = experiment_config["enable_functionalization"]
return BenchmarkExperiment(
accelerator=accelerator,
xla=xla,
xla_flags=xla_flags,
dynamo=dynamo,
torch_xla2=torch_xla2,
keep_model_data_on_cuda=keep_model_data_on_cuda,
test=test,
batch_size=batch_size,
enable_functionalization=enable_functionalization,
Expand All @@ -159,14 +151,12 @@ class BenchmarkExperiment:

def __init__(self, accelerator: str, xla: Optional[str],
xla_flags: Optional[str], dynamo: str, torch_xla2: bool,
keep_model_data_on_cuda: bool, test: str, batch_size: str,
enable_functionalization: bool):
test: str, batch_size: str, enable_functionalization: bool):
self.accelerator = accelerator
self.xla = xla
self.xla_flags = xla_flags
self.dynamo = dynamo
self.torch_xla2 = torch_xla2
self.keep_model_data_on_cuda = keep_model_data_on_cuda
self.test = test
self.batch_size = batch_size
self.accelerator_model = get_accelerator_model(self.accelerator)
Expand All @@ -191,8 +181,6 @@ def update_process_env(self, process_env: Dict[str, str]):
if is_xla_device_available("TPU"):
process_env["TPU_NUM_DEVICES"] = "1"
process_env["XRT_TPU_CONFIG"] = "localservice;0;localhost:51011"
elif is_xla_device_available("CUDA"):
process_env["GPU_NUM_DEVICES"] = "1"
elif self.xla is None:
# In non-xla CPU training experiments, an env var is still needed if an
# xla device exists, or there will be "Missing XLA configuration" error.
Expand Down Expand Up @@ -246,7 +234,6 @@ def to_dict(self):
d["xla_flags"] = self.xla_flags
d["dynamo"] = self.dynamo
d["torch_xla2"] = self.torch_xla2
d["keep_model_data_on_cuda"] = self.keep_model_data_on_cuda
d["test"] = self.test
d["batch_size"] = self.batch_size
d["enable_functionalization"] = self.enable_functionalization
Expand Down
11 changes: 1 addition & 10 deletions benchmarks/benchmark_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def prepare_for_experiment(
else:
raise NotImplementedError

keep_model_data_on_cuda = self.benchmark_experiment.keep_model_data_on_cuda
if self.benchmark_experiment.torch_xla2:
import torch_xla2.export
import torch_xla2
Expand All @@ -125,7 +124,7 @@ def prepare_for_experiment(
self.module = lambda *x: jax_func(weights, x)
self.example_inputs = move_to_device(
self.example_inputs, device, torch_xla2=True)
elif not keep_model_data_on_cuda:
else:
self.module = self.module.to(self.device)
self.example_inputs = move_to_device(
self.example_inputs, self.device, torch_xla2=False)
Expand All @@ -137,14 +136,6 @@ def prepare_for_experiment(
logger.info(f"Running torch.compile with opts {compilation_opts}")
self.model_iter_fn = torch.compile(self.model_iter_fn, **compilation_opts)

if keep_model_data_on_cuda:

def assert_func(t):
assert t.device.type.lower(
) == 'cuda', 'When keep_model_data_on_cuda is set, the input data should remain on the CUDA device.'

pytree.tree_map_only(torch.Tensor, assert_func, self.example_inputs)

def pick_grad(self):
if self.benchmark_experiment.test == "eval":
return torch.no_grad()
Expand Down
5 changes: 0 additions & 5 deletions benchmarks/experiment_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,11 +936,6 @@ def __str__(self):
help="""Collect CUDA and CPU times per operation. This will also gather
CPU fallbacks.""",
)
parser.add_argument(
"--keep-model-data-on-cuda",
action="store_true",
help="""Whether to keep the model and data on CUDA and not to move to an XLA device. This is to be used with PyTorch/XLA dynamo. When set, PyTorch/XLA dynamo bridge move the model and data to the XLA device.""",
)
parser.add_argument(
"--xla-flags",
type=str,
Expand Down
Loading