Do not fail on lack of default precision set. (#6139)

golechwierowicz · web-flow · commit 6b1344e6a957 · 2023-12-13T22:03:07.000+01:00
I discovered some models from the suite do not have the default precision set so instead of failing the script we just log the case, and do nothing, as no additional machinery should run for the Inductor anyway. Additionally I wrapped the exceptions with the ValueError so the logging message will not pollute with info about str not inheriting from Exception class. @cota , note that needs to be hooked "somewhere". Not sure where, as there was a revert in #6134, but in general it can be done prior to moving the model to the device safely.
diff --git a/benchmarks/torchbench_model.py b/benchmarks/torchbench_model.py
@@ -204,23 +204,36 @@ def set_up(self):
     gc.collect()
 
   def apply_default_precision_config(self, test, benchmark):
+    """
+    Apply default precision config to XLA, if present.
+
+    Whenever a model has a default precision for cuda set
+    we need to set proper environment flags so XLA catches
+    the requird precision.
+
+    This function is a workaround. Proper solution requires
+    changes to the PT/XLA bridge so that the input shape
+    is properly inferred after issuing converts to `torch.nn.Module`.
+    """
     if test == "eval" and hasattr(benchmark, 'DEFAULT_EVAL_CUDA_PRECISION'):
       precision = benchmark.DEFAULT_EVAL_CUDA_PRECISION
     elif test == "train" and hasattr(benchmark, 'DEFAULT_TRAIN_CUDA_PRECISION'):
       precision = benchmark.DEFAULT_TRAIN_CUDA_PRECISION
     else:
-      raise f"Unkown test type {test}!"
+      logger.warning("No default precision set. No patching needed.")
+      return
 
     if precision == "fp16":
       os.environ['XLA_USE_FP16'] = '1'
     elif precision == "amp":
-      raise f"AMP for PT/XLA:GPU is not implemented yet for torchbench models"
+      raise ValueError(
+          f"AMP for PT/XLA:GPU is not implemented yet for torchbench models")
     elif precision == "bf16":
       os.environ['XLA_USE_BF16'] = '1'
     elif precision == "fp32":
       logger.warning("Sticking with the default fp32 precision.")
     else:
-      raise f"Unknown precision: {precision}"
+      raise ValueError(f"Unknown precision: {precision}")
 
   def pick_grad(self):
     # special case