Skip to content

Error while running max_unpool2d with xla: RuntimeError: torch_xla/csrc/tensor.cpp:817 : Check failed: xla_data != nullptr #2162

@Asar5

Description

@Asar5

❓ Questions and Help

Hi, i'm extremely new to this platform and I'm trying. my hand at using google colab's TPU.
I'm trying to train a SegNet Model using TPU and this is the error i get:

2020-06-03 03:25:28.796417: E tensorflow/compiler/xla/xla_client/tf_logging.cc:11] Check failed: xla_data != nullptr 
*** Begin stack trace ***
	tensorflow::CurrentStackTrace[abi:cxx11]()
	torch_xla::XLATensor::GatherTensorsXlaData(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> > const&, absl::Span<unsigned long const>, absl::Span<std::shared_ptr<xla::ComputationClient::Data> const>)
	torch_xla::XLATensor::GetTensorsFused(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
	torch_xla::XLATensor::GetTensors(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
	torch_xla::bridge::XlaCreateTensorList(c10::ArrayRef<at::Tensor> const&)
	torch_xla::AtenXlaTypeDefault::max_unpool2d(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
	c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, c10::ArrayRef<long> > >, at::Tensor (at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)>::call(c10::OperatorKernel*, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
	
	
	
	
	
	
	
	_PyEval_EvalFrameDefault
	
	
	
	_PyEval_EvalFrameDefault
	_PyFunction_FastCallDict
	
	PyObject_Call
	_PyEval_EvalFrameDefault
	
	_PyFunction_FastCallDict
	
	
	_PyObject_FastCallKeywords
	
	_PyEval_EvalFrameDefault
	
	
	_PyEval_EvalFrameDefault
	
	PyEval_EvalCode
	
	PyRun_FileExFlags
	PyRun_SimpleFileExFlags
	Py_Main
	main
	__libc_start_main
	_start
*** End stack trace ***

Traceback (most recent call last):
  File "train.py", line 128, in <module>
    train()
  File "train.py", line 57, in train
    predicted_tensor, softmaxed_tensor = model(input_tensor)
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 550, in __call__
    result = self.forward(*input, **kwargs)
  File "/content/drive/My Drive/Bonus/PySegNet/model.py", line 278, in forward
    **x_4d = F.max_unpool2d(x_4, indices_4, kernel_size=2, stride=2, output_size=dim_4_try)**
  File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 671, in max_unpool2d
    return torch._C._nn.max_unpool2d(input, indices, output_size)
RuntimeError: torch_xla/csrc/tensor.cpp:817 : Check failed: xla_data != nullptr 
*** Begin stack trace ***
	tensorflow::CurrentStackTrace[abi:cxx11]()
	torch_xla::XLATensor::GatherTensorsXlaData(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> > const&, absl::Span<unsigned long const>, absl::Span<std::shared_ptr<xla::ComputationClient::Data> const>)
	torch_xla::XLATensor::GetTensorsFused(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
	torch_xla::XLATensor::GetTensors(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
	torch_xla::bridge::XlaCreateTensorList(c10::ArrayRef<at::Tensor> const&)
	torch_xla::AtenXlaTypeDefault::max_unpool2d(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
	c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, c10::ArrayRef<long> > >, at::Tensor (at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)>::call(c10::OperatorKernel*, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
	
	
	
	
	
	
	
	_PyEval_EvalFrameDefault
	
	
	
	_PyEval_EvalFrameDefault
	_PyFunction_FastCallDict
	
	PyObject_Call
	_PyEval_EvalFrameDefault
	
	_PyFunction_FastCallDict
	
	
	_PyObject_FastCallKeywords
	
	_PyEval_EvalFrameDefault
	
	
	_PyEval_EvalFrameDefault
	
	PyEval_EvalCode
	
	PyRun_FileExFlags
	PyRun_SimpleFileExFlags
	Py_Main
	main
	__libc_start_main
	_start
*** End stack trace ***

Here's a snippet of the code in model.py where the error is showing up.

        # Encoder Stage - 5
        dim_4 = x_3.size()
        x_40 = F.relu(self.encoder_conv_40(x_3))
        x_41 = F.relu(self.encoder_conv_41(x_40))
        x_42 = F.relu(self.encoder_conv_42(x_41))
        dim_4_try = x_42.size()
        x_4, indices_4 = F.max_pool2d(x_42, kernel_size=2, stride=2, return_indices=True)

        # Decoder
        # print("22===> input of decode = ", x_4.shape)
        dim_d = x_4.size()

        # Decoder Stage - 5
        **x_4d = F.max_unpool2d(x_4, indices_4, kernel_size=2, stride=2, output_size=dim_4_try)**
        x_42d = F.relu(self.decoder_convtr_42(x_4d))
        x_41d = F.relu(self.decoder_convtr_41(x_42d))
        x_40d = F.relu(self.decoder_convtr_40(x_41d))
        dim_4d = x_40d.size()

Can someone help me. understand how to go about debugging this. Thanks!

Metadata

Metadata

Assignees

Labels

staleHas not had recent activity

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions