Skip to content

An exception raised in a Multicore engine worker is uncatchable and causes a hang #334

@jacklovell

Description

@jacklovell

The following code causes a ValueError to be raised inside TargettedPixel._generate_rays, because the target's bounding sphere overlaps the pixel:

from raysect.core import Point3D, translate, SerialEngine
from raysect.optical import World
from raysect.optical.observer import TargettedPixel, RadiancePipeline0D
from raysect.optical.material import NullMaterial
from raysect.primitive import Box

world = World()

dx = 0.001
dy = 0.001
dz = 0.001
target = Box(lower=Point3D(-dx/2, -dy/2, -dz/2),
             upper=Point3D(dx/2, dy/2, dz/2),
             parent=world, material=NullMaterial())
target.transform = translate(0, 0, 0.8*dz)

pixel = TargettedPixel([target], targetted_path_prob=1.0, x_width=0.001, y_width=0.001,
                       spectral_bins=1, pixel_samples=10000, parent=world)
pixel.pipelines = [RadiancePipeline0D()]
pixel.render_engine = SerialEngine()

pixel.observe()

Running this results in the following:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/cherab/raysect_workflow_bug.py in <module>
     21 
---> 22 pixel.observe()

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer._ObserverBase.observe()
    263     @cython.boundscheck(False)
    264     @cython.wraparound(False)
--> 265     cpdef observe(self):
    266         """ Ask this Camera to Observe its world. """
    267 

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer._ObserverBase.observe()
    299         for slice_id, template in enumerate(templates):
    300 
--> 301             self.render_engine.run(
    302                 tasks, self._render_pixel, self._update_state,
    303                 render_args=(slice_id, template),

~/cherab/raysect/raysect/core/workflow.py in run(self, tasks, render, update, render_args, render_kwargs, update_args, update_kwargs)
    114 
    115         for task in tasks:
--> 116             result = render(task, *render_args, **render_kwargs)
    117             update(result, *update_args, **update_kwargs)
    118 

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer._ObserverBase._render_pixel()
    361     @cython.boundscheck(False)
    362     @cython.wraparound(False)
--> 363     cpdef object _render_pixel(self, tuple task, int slice_id, Ray template):
    364         """
    365         - passed in are ray_template and pipeline object references

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer._ObserverBase._render_pixel()
    388 
    389         # generate rays and obtain pixel processors from each pipeline
--> 390         rays = self._obtain_rays(task, template)
    391         pixel_processors = self._obtain_pixel_processors(task, slice_id)
    392 

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer.Observer0D._obtain_rays()
    671         cdef int samples
    672         samples, = task
--> 673         return self._generate_rays(template, samples)
    674 
    675     cpdef double _obtain_sensitivity(self, tuple task):

~/cherab/raysect/raysect/optical/observer/nonimaging/targetted_pixel.pyx in raysect.optical.observer.nonimaging.targetted_pixel.TargettedPixel._generate_rays()
    279 
    280             if pdf <= 0:
--> 281                 raise ValueError('Ray direction probability is zero. The target object extends beyond the pixel horizon.')
    282 

ValueError: Ray direction probability is zero. The target object extends beyond the pixel horizon.

With the serial engine, this exception is correctly propagated to the caller of the observe method, and can be caught and handled.

However, with the Multicore engine the exception is raised and prevents the results of the task being written to the results queue, leading to a hang as the consumer process waits for results to appear in the result queue. The only way out of this hang is a keyboard interrupt:

Process Process-2:
Traceback (most recent call last):
  File "/usr/local/depot/Python-3.7/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/local/depot/Python-3.7/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/jlovell/cherab/raysect/raysect/core/workflow.py", line 296, in _worker
    results.append(render(task, *args, **kwargs))
  File "raysect/optical/observer/base/observer.pyx", line 363, in raysect.optical.observer.base.observer._ObserverBase._render_pixel
    cpdef object _render_pixel(self, tuple task, int slice_id, Ray template):
  File "raysect/optical/observer/base/observer.pyx", line 390, in raysect.optical.observer.base.observer._ObserverBase._render_pixel
    rays = self._obtain_rays(task, template)
  File "raysect/optical/observer/base/observer.pyx", line 673, in raysect.optical.observer.base.observer.Observer0D._obtain_rays
    return self._generate_rays(template, samples)
  File "raysect/optical/observer/nonimaging/targetted_pixel.pyx", line 281, in raysect.optical.observer.nonimaging.targetted_pixel.TargettedPixel._generate_rays
    raise ValueError('Ray direction probability is zero. The target object extends beyond the pixel horizon.')
ValueError: Ray direction probability is zero. The target object extends beyond the pixel horizon.
^C---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
~/cherab/raysect_workflow_bug.py in <module>
     21 pixel.render_engine.processes = 1
     22 
---> 23 pixel.observe()

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer._ObserverBase.observe()
    263     @cython.boundscheck(False)
    264     @cython.wraparound(False)
--> 265     cpdef observe(self):
    266         """ Ask this Camera to Observe its world. """
    267 

~/cherab/raysect/raysect/optical/observer/base/observer.pyx in raysect.optical.observer.base.observer._ObserverBase.observe()
    299         for slice_id, template in enumerate(templates):
    300 
--> 301             self.render_engine.run(
    302                 tasks, self._render_pixel, self._update_state,
    303                 render_args=(slice_id, template),

~/cherab/raysect/raysect/core/workflow.py in run(self, tasks, render, update, render_args, render_kwargs, update_args, update_kwargs)
    217         remaining = len(tasks)
    218         while remaining:
--> 219             results = result_queue.get()
    220             for result in results:
    221                 update(result, *update_args, **update_kwargs)

/usr/local/depot/Python-3.7/lib/python3.7/multiprocessing/queues.py in get(self)
    350     def get(self):
    351         with self._rlock:
--> 352             res = self._reader.recv_bytes()
    353         # unserialize the data after having released the lock
    354         return _ForkingPickler.loads(res)

/usr/local/depot/Python-3.7/lib/python3.7/multiprocessing/connection.py in recv_bytes(self, maxlength)
    214         if maxlength is not None and maxlength < 0:
    215             raise ValueError("negative maxlength")
--> 216         buf = self._recv_bytes(maxlength)
    217         if buf is None:
    218             self._bad_message_length()

/usr/local/depot/Python-3.7/lib/python3.7/multiprocessing/connection.py in _recv_bytes(self, maxsize)
    405 
    406     def _recv_bytes(self, maxsize=None):
--> 407         buf = self._recv(4)
    408         size, = struct.unpack("!i", buf.getvalue())
    409         if maxsize is not None and size > maxsize:

/usr/local/depot/Python-3.7/lib/python3.7/multiprocessing/connection.py in _recv(self, size, read)
    377         remaining = size
    378         while remaining > 0:
--> 379             chunk = read(handle, remaining)
    380             n = len(chunk)
    381             if n == 0:

KeyboardInterrupt: 

It would be nice to propagate exceptions from worker processes to callers, either to allow error handling or to let the program terminate rather than hang, which is particularly unfortunate for long running batch processes where it may not be obvious that this has occurred.

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions