enable core allocation verification in handler

pytorch · Jun 13, 2023 · ecc5e02 · ecc5e02
1 parent a64938c
commit ecc5e02
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 2 deletions.
diff --git a/examples/large_models/inferentia2/inf2_handler.py b/examples/large_models/inferentia2/inf2_handler.py
@@ -3,6 +3,7 @@
 from abc import ABC
 
 import torch
+import torch_neuronx
 import transformers
 from transformers import AutoTokenizer
 from transformers_neuronx.opt.model import OPTForSampling
@@ -42,6 +43,18 @@ def initialize(self, ctx):
 
         # allocate "tp_degree" number of neuron cores to the worker process
         os.environ["NEURON_RT_NUM_CORES"] = str(tp_degree)
+        try:
+            num_neuron_cores_available = (
+                torch_neuronx.xla_impl.data_parallel.device_count()
+            )
+            assert num_neuron_cores_available == int(tp_degree)
+        except (RuntimeError, AssertionError) as error:
+            raise RuntimeError(
+                "Required number of neuron cores for tp_degree "
+                + str(tp_degree)
+                + " are not available: "
+                + str(error)
+            )
 
         torch.manual_seed(seed)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name, return_tensors="pt")

diff --git a/examples/large_models/inferentia2/requirements.txt b/examples/large_models/inferentia2/requirements.txt
@@ -1,3 +1,3 @@
-transformers
+torch-neuronx
 transformers-neuronx
-torch==1.13.1
+transformers