Remove TestGradients due to breakage on the PT side, and due to close proximity to JIT-abandoning point.

dlibenzi · dlibenzi · commit 1f4006bb5364 · 2019-04-14T10:38:42.000+02:00
diff --git a/test/test_operations.py b/test/test_operations.py
@@ -851,295 +851,6 @@ def test(self):
     self.assertEqualRel(x, xla_x.to_tensor(), rel_err=1e-3, abs_err=5)
 
 
-class TestGradients(XlaTestCase):
-
-  def checkGrad(self,
-                model,
-                inputs,
-                grad_outputs='random',
-                xla=True,
-                rel_err=1e-2,
-                abs_err=1e-5):
-    # Trace and symbolically differentiate
-    traced_model = torch.jit.trace(model, *inputs)
-    fwd = traced_model._get_method('forward')
-    xm.forward_passes(fwd.graph)
-
-    inputs_params = inputs + list(model.parameters())
-    inputs_params_buffers = inputs + list(fwd.initial_ivalues())
-
-    gradient = torch._C._jit_differentiate(fwd.graph)
-    xm.forward_passes(gradient.f)
-    xm.backward_passes(gradient.df)
-
-    ##############################################################
-    # Run forward and backwarg graphs via jit interpreter
-    exec_f = torch_xla._XLAC.GraphExecutor(gradient.f, False)
-    exec_df = torch_xla._XLAC.GraphExecutor(gradient.df, False)
-
-    # forward function
-    raw_outputs = exec_f(*inputs_params_buffers)
-    raw_outputs = xu.as_list(raw_outputs)
-    intermediate_outputs = [
-        raw_output for raw_output in raw_outputs[gradient.f_real_outputs:]
-        if isinstance(raw_output, torch.Tensor)
-    ]
-    outputs = raw_outputs[:gradient.f_real_outputs]
-
-    if grad_outputs == 'random':
-      grad_outputs = _random_like(outputs) + _zeros_like(intermediate_outputs)
-
-    raw_grad_outputs = []
-    raw_grad_outputs += grad_outputs
-    raw_grad_outputs += [
-        inputs_params_buffers[i] for i in gradient.df_input_captured_inputs
-    ]
-    raw_grad_outputs += [
-        raw_outputs[i] for i in gradient.df_input_captured_outputs
-    ]
-
-    grad_inputs = exec_df(*raw_grad_outputs)
-    grad_inputs = xu.as_list(grad_inputs)
-
-    ##############################################################
-    # backward with XLA
-    if xla:
-      xla_model = torch_xla._XLAC.XlaModule(traced_model)
-      inputs_xla = [torch_xla._XLAC.XLATensor(input) for input in inputs]
-      xla_model((tuple(inputs_xla)))
-      grads_output_xla = [
-          torch_xla._XLAC.XLATensor(grad_output)
-          for grad_output in grad_outputs[:gradient.f_real_outputs]
-      ]
-      xla_model.backward((tuple(grads_output_xla)))
-      grad_inputs_xla = [input_xla.grad.to_tensor() for input_xla in inputs_xla]
-      grad_inputs_xla.extend(
-          [p.grad.to_tensor() for p in xla_model.parameters()[0]])
-    ##############################################################
-    # forward + backward with regular autograd / torch
-    outputs_gt = model(*inputs)
-    outputs_gt = xu.as_list(outputs_gt)
-    grad_inputs_gt = torch.autograd.grad(
-        outputs_gt, inputs_params, grad_outputs, only_inputs=True)
-    for out_jit, out_autograd in zip(outputs, outputs_gt):
-      self.assertEqualRel(
-          out_jit, out_autograd, rel_err=rel_err, abs_err=abs_err)
-
-    for grad_input_jit, grad_input_autograd in zip(grad_inputs, grad_inputs_gt):
-      self.assertEqualRel(
-          grad_input_jit, grad_input_autograd, rel_err=rel_err, abs_err=abs_err)
-
-    # TODO: test buffers as well (running_mean, etc.)
-    if xla:
-      for i, (grad_input_jit,
-              grad_input_xla) in enumerate(zip(grad_inputs, grad_inputs_xla)):
-        self.assertEqualRel(grad_input_jit, grad_input_xla, rel_err, abs_err)
-
-  def test_avgpool(self):
-
-    class AvgPoolGrad(nn.Module):
-
-      def __init__(self, stride, padding, count_include_pad):
-        super(AvgPoolGrad, self).__init__()
-        self.stride = stride
-        self.padding = padding
-        self.count_include_pad = count_include_pad
-
-      def forward(self, x):
-        return F.avg_pool2d(x, 2, self.stride, self.padding, False,
-                            self.count_include_pad)
-
-    for stride in [1, 2]:
-      for padding in [0, 1]:
-        for count_include_pad in [False, True]:
-          model = AvgPoolGrad(stride, padding, count_include_pad)
-          inputs = [_gen_tensor(4, 1, 28, 28, requires_grad=True)]
-          self.checkGrad(model, inputs, xla=True)
-
-  def test_adaptive_avgpool(self):
-
-    class AdaptiveAvgPoolGrad(nn.Module):
-
-      def __init__(self, output_size):
-        super(AdaptiveAvgPoolGrad, self).__init__()
-        self.output_size = output_size
-
-      def forward(self, x):
-        return F.adaptive_avg_pool2d(x, self.output_size)
-
-    model = AdaptiveAvgPoolGrad((2, 3))
-    for scale in [1, 2]:
-      inputs = [_gen_tensor(10, 3, 2 * scale, 3 * scale, requires_grad=True)]
-      self.checkGrad(model, inputs, xla=True)
-
-  def test_threshold(self):
-
-    class ThresholdPoolGrad(nn.Module):
-
-      def __init__(self):
-        super(ThresholdPoolGrad, self).__init__()
-        self.threshold = nn.Threshold(0.4, 20)
-
-      def forward(self, x):
-        return self.threshold(x)
-
-    model = ThresholdPoolGrad()
-    inputs = [_gen_tensor(4, 2, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=True)
-
-  def test_maxpool(self):
-
-    class MaxPoolGrad(nn.Module):
-
-      def forward(self, x):
-        return F.max_pool2d(x, 2)
-
-    model = MaxPoolGrad()
-    inputs = [_gen_tensor(4, 1, 28, 28, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=True)
-
-  def test_tanh(self):
-
-    class TanhGrad(nn.Module):
-
-      def forward(self, x):
-        return torch.tanh(x)
-
-    model = TanhGrad()
-    inputs = [_gen_tensor(4, 2, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=True)
-
-  def test_sigmoid(self):
-
-    class SigmoidGrad(nn.Module):
-
-      def forward(self, x):
-        return torch.sigmoid(x)
-
-    model = SigmoidGrad()
-    inputs = [_gen_tensor(4, 2, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=True, rel_err=1e-2, abs_err=1e-2)
-
-  @unittest.skip(
-      'differentiation of prim::ListUnpack is not supported, or it is missing '
-      'necessary type information')
-  def test_chunk(self):
-
-    class ChunkGrad(nn.Module):
-
-      def forward(self, x):
-        return x.chunk(2, 1)
-
-    model = ChunkGrad()
-    inputs = [_gen_tensor(4, 4, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=True)
-
-  @unittest.skip('bool value of Tensor with more than one value is ambiguous')
-  def test_lstm_cell(self):
-
-    class LSTMCellGrad(nn.Module):
-
-      def __init__(self):
-        super(LSTMCellGrad, self).__init__()
-        self.i2h = nn.Linear(3, 8)
-        self.h2h = nn.Linear(2, 8)
-
-      def forward(self, x, hx, cx):
-        gates = self.i2h(x) + self.h2h(hx)
-
-        ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
-        ingate = torch.sigmoid(ingate)
-        forgetgate = torch.sigmoid(forgetgate)
-        cellgate = torch.tanh(cellgate)
-        outgate = torch.sigmoid(outgate)
-
-        cy = (forgetgate * cx) + (ingate * cellgate)
-        hy = outgate * torch.tanh(cy)
-        return hy, cy
-
-    model = LSTMCellGrad()
-    inputs = [
-        _gen_tensor(4, 3, requires_grad=True),
-        _gen_tensor(4, 2, requires_grad=True),
-        _gen_tensor(4, 2, requires_grad=True)
-    ]
-    self.checkGrad(model, inputs, xla=True)
-
-  def test_conv2d(self):
-    if FLAGS.long_test:
-      config = [
-          [1, 7, 15, 32],  # ichans
-          [1, 4, 21, 32],  # ochans
-          [1, 2, 3, 5],  # size
-          [1, 2],  # stride
-          [0, 1],  # padding
-          [True, False],  # bias
-      ]
-    else:
-      config = [
-          [1, 5],  # ichans
-          [1, 4],  # ochans
-          [1, 3],  # size
-          [1],  # stride
-          [0],  # padding
-          [False],  # bias
-      ]
-    for ichans, ochans, size, stride, padding, bias in (
-        itertools.product(*config)):
-      # TODO: dilation, groups, transpose
-      model = nn.Conv2d(ichans, ochans, size, stride, padding, bias=bias)
-      inputs = [_gen_tensor(4, ichans, 28, 28, requires_grad=True)]
-      self.checkGrad(model, inputs, xla=True, abs_err=1e-3)
-
-  def test_batchnorm2d(self):
-    for chans in [1, 15, 32]:
-      for eps in [1e-5, 1e-3, 1e-2]:
-        # TODO: momentum, training, affine
-        model = nn.BatchNorm2d(chans, eps=eps)
-        inputs = [_gen_tensor(4, chans, 28, 28, requires_grad=True)]
-        self.checkGrad(model, inputs, xla=True)
-
-  def test_logsoftmax(self):
-    for dim in [0, 1]:  # todo test 3d as well
-      for batch in [1, 3, 4]:
-
-        class LSMGrad(nn.Module):
-
-          def forward(self, x):
-            return F.log_softmax(x, dim)
-
-        model = LSMGrad()
-        inputs = [_gen_tensor(batch, 9, requires_grad=True)]
-        self.checkGrad(model, inputs, xla=True)
-
-  def test_nll_loss(self):
-    input = _gen_tensor(3, 5, requires_grad=True)
-    target = torch.empty(3, dtype=torch.long).random_(5)
-    model = XlaNllLoss()
-    traced_model = torch.jit.trace(model, (input, target))
-    xla_model = torch_xla._XLAC.XlaModule(traced_model)
-    xla_inputs = [
-        torch_xla._XLAC.XLATensor(input),
-        torch_xla._XLAC.XLATensor(target)
-    ]
-    output_xla = xla_model((tuple(xla_inputs)))
-    xla_model.backward(*output_xla)
-    output = model(input, target)
-    output.backward()
-    self.assertEqual(input.grad.data, xla_inputs[0].grad.data.to_tensor())
-
-  def test_mnist(self):
-    model = XlaMNIST()
-    inputs = [_gen_tensor(4, 1, 28, 28, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=True)
-
-  @unittest.skip('Disable until we figure out the precision issue')
-  def test_resnet(self):
-    model = torchvision.models.resnet18()
-    inputs = [_gen_tensor(4, 3, 224, 224, requires_grad=True)]
-    self.checkGrad(model, inputs, xla=False)
-
-
 class TestOptimizer(XlaTestCase):
 
   def test_inplace_add_mul(self):