-
Notifications
You must be signed in to change notification settings - Fork 560
Closed
Labels
staleHas not had recent activityHas not had recent activity
Description
❓ Questions and Help
Hi, i'm extremely new to this platform and I'm trying. my hand at using google colab's TPU.
I'm trying to train a SegNet Model using TPU and this is the error i get:
2020-06-03 03:25:28.796417: E tensorflow/compiler/xla/xla_client/tf_logging.cc:11] Check failed: xla_data != nullptr
*** Begin stack trace ***
tensorflow::CurrentStackTrace[abi:cxx11]()
torch_xla::XLATensor::GatherTensorsXlaData(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> > const&, absl::Span<unsigned long const>, absl::Span<std::shared_ptr<xla::ComputationClient::Data> const>)
torch_xla::XLATensor::GetTensorsFused(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
torch_xla::XLATensor::GetTensors(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
torch_xla::bridge::XlaCreateTensorList(c10::ArrayRef<at::Tensor> const&)
torch_xla::AtenXlaTypeDefault::max_unpool2d(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, c10::ArrayRef<long> > >, at::Tensor (at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)>::call(c10::OperatorKernel*, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
_PyEval_EvalFrameDefault
_PyEval_EvalFrameDefault
_PyFunction_FastCallDict
PyObject_Call
_PyEval_EvalFrameDefault
_PyFunction_FastCallDict
_PyObject_FastCallKeywords
_PyEval_EvalFrameDefault
_PyEval_EvalFrameDefault
PyEval_EvalCode
PyRun_FileExFlags
PyRun_SimpleFileExFlags
Py_Main
main
__libc_start_main
_start
*** End stack trace ***
Traceback (most recent call last):
File "train.py", line 128, in <module>
train()
File "train.py", line 57, in train
predicted_tensor, softmaxed_tensor = model(input_tensor)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/content/drive/My Drive/Bonus/PySegNet/model.py", line 278, in forward
**x_4d = F.max_unpool2d(x_4, indices_4, kernel_size=2, stride=2, output_size=dim_4_try)**
File "/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py", line 671, in max_unpool2d
return torch._C._nn.max_unpool2d(input, indices, output_size)
RuntimeError: torch_xla/csrc/tensor.cpp:817 : Check failed: xla_data != nullptr
*** Begin stack trace ***
tensorflow::CurrentStackTrace[abi:cxx11]()
torch_xla::XLATensor::GatherTensorsXlaData(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> > const&, absl::Span<unsigned long const>, absl::Span<std::shared_ptr<xla::ComputationClient::Data> const>)
torch_xla::XLATensor::GetTensorsFused(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
torch_xla::XLATensor::GetTensors(std::vector<torch_xla::XLATensor, std::allocator<torch_xla::XLATensor> >*)
torch_xla::bridge::XlaCreateTensorList(c10::ArrayRef<at::Tensor> const&)
torch_xla::AtenXlaTypeDefault::max_unpool2d(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
c10::detail::wrap_kernel_functor_unboxed_<c10::detail::WrapRuntimeKernelFunctor_<at::Tensor (*)(at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>), at::Tensor, c10::guts::typelist::typelist<at::Tensor const&, at::Tensor const&, c10::ArrayRef<long> > >, at::Tensor (at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)>::call(c10::OperatorKernel*, at::Tensor const&, at::Tensor const&, c10::ArrayRef<long>)
_PyEval_EvalFrameDefault
_PyEval_EvalFrameDefault
_PyFunction_FastCallDict
PyObject_Call
_PyEval_EvalFrameDefault
_PyFunction_FastCallDict
_PyObject_FastCallKeywords
_PyEval_EvalFrameDefault
_PyEval_EvalFrameDefault
PyEval_EvalCode
PyRun_FileExFlags
PyRun_SimpleFileExFlags
Py_Main
main
__libc_start_main
_start
*** End stack trace ***
Here's a snippet of the code in model.py where the error is showing up.
# Encoder Stage - 5
dim_4 = x_3.size()
x_40 = F.relu(self.encoder_conv_40(x_3))
x_41 = F.relu(self.encoder_conv_41(x_40))
x_42 = F.relu(self.encoder_conv_42(x_41))
dim_4_try = x_42.size()
x_4, indices_4 = F.max_pool2d(x_42, kernel_size=2, stride=2, return_indices=True)
# Decoder
# print("22===> input of decode = ", x_4.shape)
dim_d = x_4.size()
# Decoder Stage - 5
**x_4d = F.max_unpool2d(x_4, indices_4, kernel_size=2, stride=2, output_size=dim_4_try)**
x_42d = F.relu(self.decoder_convtr_42(x_4d))
x_41d = F.relu(self.decoder_convtr_41(x_42d))
x_40d = F.relu(self.decoder_convtr_40(x_41d))
dim_4d = x_40d.size()
Can someone help me. understand how to go about debugging this. Thanks!
Metadata
Metadata
Assignees
Labels
staleHas not had recent activityHas not had recent activity