Update on "Extend SampleInput str representation with tensor data."

As in the title. The aim of this addition is to make debugging certain CI failures (that cannot be reproduced locally) easier. For instance, currently we see messages like ``` Exception: Caused by sample input at index 0: SampleInput(input=Tensor[size=(20,), device="cuda:0", dtype=torch.float64], args=(), kwargs={}, broadcasts_input=False, name='') ``` that is not really useful (as all those sample parameters can often be detected by other means) without showing actual sample data. The sample data can then be related to the `index` part in the error messages like: ``` Mismatched elements: 2 / 20 (10.0%) Greatest absolute difference: nan at index (10,) (up to 1e-05 allowed) Greatest relative difference: nan at index (10,) (up to 1e-07 allowed) ``` As an example of usefulness of this PR, consider the following failure message: ``` inductor/test_torchinductor_opinfo.py::TestInductorOpInfoCPU::test_comprehensive_polygamma_polygamma_n_0_cpu_int32 ('RERUN', {'yellow': True}) [1.5510s] [ 70%] inductor/test_torchinductor_opinfo.py::TestInductorOpInfoCPU::test_comprehensive_polygamma_polygamma_n_0_cpu_int32 ('RERUN', {'yellow': True}) [0.0473s] [ 70%] inductor/test_torchinductor_opinfo.py::TestInductorOpInfoCPU::test_comprehensive_polygamma_polygamma_n_0_cpu_int32 FAILED [0.0493s] [ 70%] ==================================== RERUNS ==================================== __ TestInductorOpInfoCPU.test_comprehensive_polygamma_polygamma_n_0_cpu_int32 __ Traceback (most recent call last): <snip> AssertionError: Tensor-likes are not close! Mismatched elements: 9 / 25 (36.0%) Greatest absolute difference: inf at index (0, 0) (up to 1e-05 allowed), inf vs 20177651499008.0 Greatest relative difference: inf at index (0, 0) (up to 1.3e-06 allowed) The above exception was the direct cause of the following exception: <snip> Exception: Caused by sample input at index 0: SampleInput(input=Tensor[size=(5, 5), device="cpu", dtype=torch.int32, data=[-8, 6, 9, 0, 0, 5, 5, 7, 6, 5, 1, -5, 2, -1, 8, -4, 0, -6, 3, -5]], args=(1), kwargs={}, broadcasts_input=False, name='') ``` from which we learn that `torch.polygamma` result is actually correct because `polygamma(0, -8) -> inf` while the used reference value (20177651499008.0) is wrong (see #106692 for more details). [ghstack-poisoned]
pytorch · Feb 10, 2024 · 2d60eb2 · 2d60eb2
2 parents 90afb08 + 3818325
commit 2d60eb2
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 10 deletions.
diff --git a/test/inductor/test_torchinductor_opinfo.py b/test/inductor/test_torchinductor_opinfo.py
@@ -350,6 +350,15 @@ def wrapper_noop_set_seed(op, *args, **kwargs):
     ("softmax", "cuda", f16): {"atol": 1e-4, "rtol": 0.02},
     ("_softmax_backward_data", "cuda", f16): {"atol": 0.008, "rtol": 0.002},
     ("special.log_ndtr", "cuda", f64): {"atol": 1e-6, "rtol": 1e-5},
+    ("polygamma.polygamma_n_0", "cpu", f32): {"atol": 1e-3, "rtol": 1e-4},
+    ("polygamma.polygamma_n_1", "cpu", f32): {"atol": 1e-3, "rtol": 1e-4},
+    ("polygamma.polygamma_n_2", "cpu", f32): {"atol": 1e-3, "rtol": 1e-4},
+    ("polygamma.polygamma_n_3", "cpu", f32): {"atol": 1e-3, "rtol": 1e-4},
+    ("polygamma.polygamma_n_4", "cpu", f32): {"atol": 1e-3, "rtol": 1e-4},
+    ("special.polygamma.special_polygamma_n_0", "cpu", f32): {
+        "atol": 1e-3,
+        "rtol": 1e-4,
+    },
     ("std_mean.unbiased", "cuda", f16): {"reference_in_float": True},
     ("uniform", "cuda"): {"reference_in_float": True},
     # Following tests are failing with strict comparision but atol=1 is acceptable due roundings errors
@@ -498,7 +507,6 @@ def test_comprehensive(self, device, dtype, op):
             overridden_kwargs = inductor_override_kwargs[(op_name, device_type)]
         elif (op_name, device_type, dtype) in inductor_override_kwargs:
             overridden_kwargs = inductor_override_kwargs[(op_name, device_type, dtype)]
-
         func = op.get_op()
 
         def fn(*args, **kwargs):

diff --git a/torch/optim/lr_scheduler.py b/torch/optim/lr_scheduler.py
@@ -492,7 +492,7 @@ class ConstantLR(LRScheduler):
         >>> # lr = 0.025   if epoch == 2
         >>> # lr = 0.025   if epoch == 3
         >>> # lr = 0.05    if epoch >= 4
-        >>> scheduler = ConstantLR(self.opt, factor=0.5, total_iters=4)
+        >>> scheduler = ConstantLR(optimizer, factor=0.5, total_iters=4)
         >>> for epoch in range(100):
         >>>     train(...)
         >>>     validate(...)
@@ -556,7 +556,7 @@ class LinearLR(LRScheduler):
         >>> # lr = 0.0375   if epoch == 2
         >>> # lr = 0.04375  if epoch == 3
         >>> # lr = 0.05    if epoch >= 4
-        >>> scheduler = LinearLR(self.opt, start_factor=0.5, total_iters=4)
+        >>> scheduler = LinearLR(optimizer, start_factor=0.5, total_iters=4)
         >>> for epoch in range(100):
         >>>     train(...)
         >>>     validate(...)
@@ -656,9 +656,9 @@ class SequentialLR(LRScheduler):
         >>> # lr = 0.9     if epoch == 2
         >>> # lr = 0.81    if epoch == 3
         >>> # lr = 0.729   if epoch == 4
-        >>> scheduler1 = ConstantLR(self.opt, factor=0.1, total_iters=2)
-        >>> scheduler2 = ExponentialLR(self.opt, gamma=0.9)
-        >>> scheduler = SequentialLR(self.opt, schedulers=[scheduler1, scheduler2], milestones=[2])
+        >>> scheduler1 = ConstantLR(optimizer, factor=0.1, total_iters=2)
+        >>> scheduler2 = ExponentialLR(optimizer, gamma=0.9)
+        >>> scheduler = SequentialLR(optimizer, schedulers=[scheduler1, scheduler2], milestones=[2])
         >>> for epoch in range(100):
         >>>     train(...)
         >>>     validate(...)
@@ -769,7 +769,7 @@ class PolynomialLR(LRScheduler):
         >>> # lr = 0.00050   if epoch == 2
         >>> # lr = 0.00025   if epoch == 3
         >>> # lr = 0.0       if epoch >= 4
-        >>> scheduler = PolynomialLR(self.opt, total_iters=4, power=1.0)
+        >>> scheduler = PolynomialLR(optimizer, total_iters=4, power=1.0)
         >>> for epoch in range(100):
         >>>     train(...)
         >>>     validate(...)
@@ -893,8 +893,8 @@ class ChainedScheduler(LRScheduler):
         >>> # lr = 0.729    if epoch == 2
         >>> # lr = 0.6561   if epoch == 3
         >>> # lr = 0.59049  if epoch >= 4
-        >>> scheduler1 = ConstantLR(self.opt, factor=0.1, total_iters=2)
-        >>> scheduler2 = ExponentialLR(self.opt, gamma=0.9)
+        >>> scheduler1 = ConstantLR(optimizer, factor=0.1, total_iters=2)
+        >>> scheduler2 = ExponentialLR(optimizer, gamma=0.9)
         >>> scheduler = ChainedScheduler([scheduler1, scheduler2])
         >>> for epoch in range(100):
         >>>     train(...)

diff --git a/torch/testing/_internal/opinfo/definitions/special.py b/torch/testing/_internal/opinfo/definitions/special.py
@@ -65,7 +65,12 @@ def sample_inputs_i0_i1(op_info, device, dtype, requires_grad, **kwargs):
 
 def sample_inputs_polygamma(op_info, device, dtype, requires_grad, **kwargs):
     make_arg = partial(
-        make_tensor, device=device, dtype=dtype, requires_grad=requires_grad
+        make_tensor,
+        device=device,
+        # TODO: eliminate low after gh-106692 is fixed:
+        low=(1 if dtype in {torch.int32, torch.int64} else None),
+        dtype=dtype,
+        requires_grad=requires_grad,
     )
     tensor_shapes = ((S, S), ())
     ns = (1, 2, 3, 4, 5)