diff --git a/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu b/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu index f787323413b26..007b22e2444b3 100644 --- a/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu +++ b/aten/src/THCUNN/generic/SpatialDilatedConvolution.cu @@ -16,8 +16,6 @@ static inline void THNN_(SpatialDilatedConvolution_shapeCheck)( "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); THArgCheck(dW > 0 && dH > 0, 11, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); - THArgCheck(THCTensor_(isContiguous)(state, weight), 4, - "weight tensor has to be contiguous"); THArgCheck(!bias || THCTensor_(isContiguous)(state, bias), 5, "bias tensor has to be contiguous"); THArgCheck(dilationW > 0 && dilationH > 0, 14, diff --git a/test/common_nn.py b/test/common_nn.py index 9011f4f661742..cbae4cb8f43fa 100644 --- a/test/common_nn.py +++ b/test/common_nn.py @@ -720,6 +720,9 @@ def __init__(self, *args, **kwargs): self.jacobian_input = kwargs.get('jacobian_input', True) self.should_test_cuda = kwargs.get('test_cuda', True) self.should_test_pickle = kwargs.get('pickle', True) + self.check_gradgrad = kwargs.get('check_gradgrad', True) + self.FIXME_no_cuda_gradgrad_comparison = \ + kwargs.get('FIXME_no_cuda_gradgrad_comparison', False) def __call__(self, test_case): module = self.constructor(*self.constructor_args) @@ -818,9 +821,10 @@ def test_cuda(self, test_case): gpu_output = test_case._forward(gpu_module, gpu_input) test_case.assertEqual(cpu_output, gpu_output, 2e-4) + # Run backwards on CPU and GPU and compare results for i in range(5): cpu_output_t = cpu_output.data if isinstance(cpu_output, Variable) else cpu_output - cpu_gradOutput = cpu_output_t.clone().bernoulli_() + cpu_gradOutput = cpu_output_t.clone().normal_() gpu_gradOutput = cpu_gradOutput.type('torch.cuda.FloatTensor') cpu_gradInput = test_case._backward(cpu_module, cpu_input, cpu_output, cpu_gradOutput) gpu_gradInput = test_case._backward(gpu_module, gpu_input, gpu_output, gpu_gradOutput) @@ -828,6 +832,40 @@ def test_cuda(self, test_case): for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]): test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4) + # Run double-backwards on CPU and GPU and compare results + if self.check_gradgrad and not self.FIXME_no_cuda_gradgrad_comparison: + cpu_output_t = cpu_output.data if isinstance(cpu_output, Variable) else cpu_output + cpu_gradOutput = Variable(cpu_output_t.clone().normal_(), requires_grad=True) + gpu_gradOutput = Variable(cpu_gradOutput.type('torch.cuda.FloatTensor').data, requires_grad=True) + + cpu_gradInputs = torch.autograd.grad( + cpu_module(cpu_input), + (cpu_input,) + tuple(cpu_module.parameters()), + cpu_gradOutput, + create_graph=True) + gpu_gradInputs = torch.autograd.grad( + gpu_module(gpu_input), + (gpu_input,) + tuple(gpu_module.parameters()), + gpu_gradOutput, + create_graph=True) + + # We mix output into the second backwards computation so that + # torch.autograd.grad doesn't complain that some inputs + # are unreachable (which can happen if you differentiate + # only on the gradient. + cpu_gg = torch.autograd.grad( + cpu_output.sum() + sum(map(lambda x: x.sum(), cpu_gradInputs)), + (cpu_input, cpu_gradOutput) + tuple(cpu_module.parameters()), + retain_graph=True) + gpu_gg = torch.autograd.grad( + gpu_output.sum() + sum(map(lambda x: x.sum(), gpu_gradInputs)), + (gpu_input, gpu_gradOutput) + tuple(gpu_module.parameters()), + retain_graph=True) + + test_case.assertEqual(cpu_gradInput, gpu_gradInput, 2e-4) + for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg): + test_case.assertEqual(cpu_d_p, gpu_d_p, 2e-4) + self.test_noncontig(test_case, gpu_module, gpu_input) except NotImplementedError: pass diff --git a/test/test_legacy_nn.py b/test/test_legacy_nn.py index fd0f6bcd5b4ca..c08153368a3a5 100644 --- a/test/test_legacy_nn.py +++ b/test/test_legacy_nn.py @@ -15,6 +15,8 @@ class OldModuleTest(ModuleTest): def __init__(self, *args, **kwargs): super(OldModuleTest, self).__init__(*args, **kwargs) self.check_inplace = kwargs.get('check_inplace', False) + # Never check gradgrad for legacy NN + self.check_gradgrad = False def _do_test(self, test_case, module, input): # TODO: check update parameters @@ -633,7 +635,7 @@ def _forward(self, module, input): with freeze_rng_state(): return module.forward(input) - def _backward(self, module, input, output, grad_output): + def _backward(self, module, input, output, grad_output, create_graph=False): return module.backward(input, grad_output) def _forward_criterion(self, criterion, input, target): diff --git a/test/test_nn.py b/test/test_nn.py index df341e50e2615..4073fcd83f284 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -110,6 +110,9 @@ def _do_test(self, test_case, module, input): module.__repr__() if self.check_inplace: + # check if the inplace variant of the module gives the same result + # as the out-of-place + module_ip = self.constructor(*self.constructor_args, inplace=True) input_version = input._version @@ -130,6 +133,9 @@ def _do_test(self, test_case, module, input): test_case.assertEqual(input.grad, input_ip.grad) if type(input.data) == torch.LongTensor and TEST_CUDA: + # check that cuda() moves module parameters to correct GPU device, + # and that float() casts parameters correctly + input = input.cuda() module.float().cuda() module(input) @@ -146,6 +152,8 @@ def _do_test(self, test_case, module, input): test_case.assertEqual(type(p.data), torch.cuda.FloatTensor) test_case.assertEqual(p.get_device(), 1) else: + # check that float()/double() casters work correctly + # to float if type(input.data) != torch.LongTensor: input = input.float() @@ -164,6 +172,9 @@ def _do_test(self, test_case, module, input): # TODO: Hardshrink is lacking a CUDA implementation if TEST_CUDA and self.should_test_cuda and type(module) != nn.Hardshrink: + # check that cuda() moves module parameters to correct GPU device, + # and that float() casts parameters correctly + # to GPU0 input = input.float().cuda() module.float().cuda() @@ -187,6 +198,7 @@ def _do_test(self, test_case, module, input): test_case.assertEqual(type(p.data), torch.cuda.FloatTensor) test_case.assertEqual(p.get_device(), 0) + # test that forwards of module runs correctly without cuDNN if self.cudnn: torch.backends.cudnn.enabled = False try: @@ -198,6 +210,7 @@ def _do_test(self, test_case, module, input): torch.backends.cudnn.enabled = True if torch.cuda.device_count() >= 2: + # test cross-GPU transfer works # to GPU1 input = input.cuda(1) module.cuda(1) @@ -234,8 +247,8 @@ def _forward(self, module, input): with freeze_rng_state(): return module(input) - def _backward(self, module, input, output, grad_output): - output.backward(grad_output, retain_graph=True) + def _backward(self, module, input, output, grad_output, create_graph=False): + output.backward(grad_output, retain_graph=True, create_graph=create_graph) if input.grad is None: return None return input.grad.data @@ -4429,6 +4442,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='affine', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm1d', @@ -4437,6 +4451,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='3d_input', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm1d', @@ -4445,6 +4460,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='not_affine', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm1d', @@ -4453,6 +4469,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='3d_input_not_affine', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm2d', @@ -4460,6 +4477,7 @@ def smoothl1loss_no_reduce_test(): input_size=(2, 3, 6, 6), cudnn=True, check_eval=True, + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm2d', @@ -4468,6 +4486,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='momentum', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm2d', @@ -4476,6 +4495,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='not_affine', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm3d', @@ -4483,6 +4503,7 @@ def smoothl1loss_no_reduce_test(): input_size=(2, 3, 4, 4, 4), cudnn=True, check_eval=True, + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm3d', @@ -4491,6 +4512,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='momentum', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='BatchNorm3d', @@ -4499,6 +4521,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, check_eval=True, desc='not_affine', + FIXME_no_cuda_gradgrad_comparison=True, # See #4422 ), dict( module_name='Conv1d', @@ -4571,6 +4594,7 @@ def smoothl1loss_no_reduce_test(): input_size=(1, 3, 6), cudnn=True, desc='dilated', + FIXME_no_cuda_gradgrad_comparison=True, # See #4500 ), dict( fullname='ConvTranspose1d_groups', @@ -4646,6 +4670,7 @@ def smoothl1loss_no_reduce_test(): input_size=(1, 3, 6, 7), cudnn=True, desc='dilated', + FIXME_no_cuda_gradgrad_comparison=True, # See #4500 ), dict( module_name='ConvTranspose2d', @@ -4659,6 +4684,7 @@ def smoothl1loss_no_reduce_test(): constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2), input_size=(1, 2, 4, 5), cudnn=True, + FIXME_no_cuda_gradgrad_comparison=True, # See #4500 ), dict( fullname='Conv2d_depthwise', @@ -4840,6 +4866,7 @@ def smoothl1loss_no_reduce_test(): constructor_args=(2, 3, (2, 3, 2)), cudnn=True, input_size=(1, 2, 4, 5, 4), + FIXME_no_cuda_gradgrad_comparison=True, # See #4500 ), dict( module_name='ConvTranspose3d', @@ -4847,6 +4874,7 @@ def smoothl1loss_no_reduce_test(): cudnn=True, input_size=(1, 2, 4, 5, 4), desc='dilated', + FIXME_no_cuda_gradgrad_comparison=True, # See #4500 ), dict( module_name='MaxPool3d', diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml index fe6e51a1f5aa6..a400296123a0f 100644 --- a/tools/autograd/derivatives.yaml +++ b/tools/autograd/derivatives.yaml @@ -859,7 +859,7 @@ bias: grad.contiguous().view({grad.size(0), grad.size(1), -1}).sum(0).sum(1) - name: thnn_conv_depthwise2d_backward(Tensor grad_output, Tensor self, Tensor weight, IntList kernel_size, IntList stride, IntList padding, IntList dilation, std::array output_mask) - grad_output, self, weight: _convolution_double_backward(grads[0], grads[1], grads[2], grad_output, weight, self, stride, padding, dilation, false, {{0, 0}}, 1, false, false, false, grad_input_mask) + grad_output, self, weight: _convolution_double_backward(grads[0], grads[1], {}, grad_output, weight, self, stride, padding, dilation, false, {{0, 0}}, self.size(1), false, false, false, grad_input_mask) - name: thnn_conv3d_forward(Tensor self, Tensor weight, IntList kernel_size, Tensor bias, IntList stride, IntList padding) self, weight, bias: thnn_conv3d_backward(grad, self, weight, kernel_size, stride, padding, finput, fgrad_input, grad_input_mask) diff --git a/tools/autograd/gen_autograd_functions.py b/tools/autograd/gen_autograd_functions.py index a795aa95a6566..af90df60531e1 100644 --- a/tools/autograd/gen_autograd_functions.py +++ b/tools/autograd/gen_autograd_functions.py @@ -5,7 +5,7 @@ # python_functions.h/cpp: Python bindings for the above classes # from .utils import nested_dict, CodeTemplate, write -from .gen_variable_type import VIEW_FUNCTIONS, uses_grad, template_path +from .gen_variable_type import VIEW_FUNCTIONS, uses_single_grad, template_path FUNCTIONS_H = CodeTemplate.from_file(template_path + '/Functions.h') FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/Functions.cpp') @@ -127,7 +127,7 @@ def save_arg(arg, is_output): body = [] - if uses_grad(func): + if uses_single_grad(func): body.append('auto& grad = grads[0];') def emit_derivative(derivative): diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py index 16ef8a07e377d..2d36568fd123c 100644 --- a/tools/autograd/gen_variable_type.py +++ b/tools/autograd/gen_variable_type.py @@ -204,7 +204,7 @@ def split_name_params(prototype): return name, params.split(', ') -def uses_grad(func): +def uses_single_grad(func): if func is None: return False for derivative in func['derivatives']: @@ -515,7 +515,7 @@ def emit_body_via_derived(declaration): # I don't think this is a good way to implement this, but # there doesn't seem to be a good place to mark things as # differentiable or non-differentiable at the moment. - if uses_grad(declaration.get('derivative')): + if uses_single_grad(declaration.get('derivative')): env['result'] = "std::get<0>(ret)" if len(declaration['returns']) > 1 else 'ret' else: env['result'] = CodeTemplate("{ ${outs} }").substitute(outs=diff_outs) diff --git a/tools/autograd/load_derivatives.py b/tools/autograd/load_derivatives.py index e7f7ccf209e01..1a09bbee59ee8 100644 --- a/tools/autograd/load_derivatives.py +++ b/tools/autograd/load_derivatives.py @@ -56,6 +56,14 @@ def transform_return(r): formula, saved_inputs = saved_variables(formula, arguments) formula, saved_outputs = saved_variables(formula, returns) + # Check that the referenced gradients in the formula are in bounds + for i in used_gradient_indices(formula): + if i >= len(declaration['returns']): + raise RuntimeError( + "Out of bounds grads access: derivative formula for {} " + "used grads[{}], but the forward only returns {} outputs." + .format(declaration['name'], i, len(declaration['returns']))) + return { 'formula': formula, 'output_indices': output_indices, @@ -189,6 +197,23 @@ def get_signature(declaration, ignore_inplace=False): return '{}({})'.format(name, ', '.join(simple_types)) +GRAD_INDEX_REGEX = r'(?:^|\W)grads\[(\d+)\]' + + +def used_gradient_indices(formula): + """Determine a list of gradient indices (the i in grads[i]) that + are used by the formula. + + NB: references to 'grad' don't count as a gradient index (technically, + we should return 0 in this case, but it doesn't matter this can never + be out of bounds + + >>> used_gradient_indices("foo(grads[0], grads[1])") + [0, 1] + """ + return [int(i) for i in re.findall(GRAD_INDEX_REGEX, formula)] + + def saved_variables(formula, args): # find which arguments need to be saved saved = []