From 799ee784dca835d253efc714e0fd2be9f2b76c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibault=20Cl=C3=A9rice?= Date: Thu, 16 Feb 2023 10:38:05 +0100 Subject: [PATCH] Propagation of the --raise-on-error for raising non-blocking errors in blla segmentation Raises error instead of logging them when they are not-blocking, specifically for segmentation avoid stopping completely segmentation for a single wrong line --- kraken/blla.py | 19 ++++++++++++++++--- kraken/kraken.py | 3 ++- kraken/lib/segmentation.py | 8 ++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/kraken/blla.py b/kraken/blla.py index 252930829..f10188134 100644 --- a/kraken/blla.py +++ b/kraken/blla.py @@ -164,6 +164,7 @@ def vec_lines(heatmap: torch.Tensor, scal_im: np.ndarray = None, suppl_obj: List[np.ndarray] = None, topline: Optional[bool] = False, + raise_on_error: bool = False, **kwargs) -> List[Dict[str, Any]]: r""" Computes lines from a stack of heatmaps, a class mapping, and scaling @@ -185,6 +186,8 @@ def vec_lines(heatmap: torch.Tensor, polygonization. topline: True for a topline, False for baseline, or None for a centerline. + raise_on_error: Raises error instead of logging them when they are + not-blocking Returns: A list of dictionaries containing the baselines, bounding polygons, and @@ -222,7 +225,13 @@ def vec_lines(heatmap: torch.Tensor, if reg_pol.contains(mid_point): suppl_obj.append(regions[reg_idx]) - pol = calculate_polygonal_environment(baselines=[bl[1]], im_feats=im_feats, suppl_obj=suppl_obj, topline=topline) + pol = calculate_polygonal_environment( + baselines=[bl[1]], + im_feats=im_feats, + suppl_obj=suppl_obj, + topline=topline, + raise_on_error=raise_on_error + ) if pol[0] is not None: lines.append((bl[0], bl[1], pol[0])) @@ -239,7 +248,8 @@ def segment(im: PIL.Image.Image, mask: Optional[np.ndarray] = None, reading_order_fn: Callable = polygonal_reading_order, model: Union[List[vgsl.TorchVGSLModel], vgsl.TorchVGSLModel] = None, - device: str = 'cpu') -> Dict[str, Any]: + device: str = 'cpu', + raise_on_error: bool = False) -> Dict[str, Any]: r""" Segments a page into text lines using the baseline segmenter. @@ -260,6 +270,8 @@ def segment(im: PIL.Image.Image, model: One or more TorchVGSLModel containing a segmentation model. If none is given a default model will be loaded. device: The target device to run the neural network on. + raise_on_error: Raises error instead of logging them when they are + not-blocking Returns: A dictionary containing the text direction and under the key 'lines' a @@ -327,7 +339,8 @@ def segment(im: PIL.Image.Image, reading_order_fn=reading_order_fn, text_direction=text_direction, suppl_obj=suppl_obj, - topline=net.user_metadata['topline'] if 'topline' in net.user_metadata else False) + topline=net.user_metadata['topline'] if 'topline' in net.user_metadata else False, + raise_on_error=raise_on_error) if len(rets['cls_map']['baselines']) > 1: script_detection = True diff --git a/kraken/kraken.py b/kraken/kraken.py index 2b5c49cc1..362599729 100644 --- a/kraken/kraken.py +++ b/kraken/kraken.py @@ -158,7 +158,8 @@ def segmenter(legacy, model, text_direction, scale, maxcolseps, black_colseps, pad=pad, mask=mask) else: - res = blla.segment(im, text_direction, mask=mask, model=model, device=device) + res = blla.segment(im, text_direction, mask=mask, model=model, device=device, + raise_on_error=ctx.meta['raise_failed']) except Exception: if ctx.meta['raise_failed']: raise diff --git a/kraken/lib/segmentation.py b/kraken/lib/segmentation.py index 0f6f32758..6f6459ac5 100644 --- a/kraken/lib/segmentation.py +++ b/kraken/lib/segmentation.py @@ -639,7 +639,8 @@ def calculate_polygonal_environment(im: PIL.Image.Image = None, suppl_obj: Sequence[Sequence[Tuple[int, int]]] = None, im_feats: np.ndarray = None, scale: Tuple[int, int] = None, - topline: bool = False): + topline: bool = False, + raise_on_error: bool = False): """ Given a list of baselines and an input image, calculates a polygonal environment around each baseline. @@ -666,6 +667,8 @@ def calculate_polygonal_environment(im: PIL.Image.Image = None, be offset upwards, if set to True, baselines are on the top and will be offset downwards. If set to None, no offset will be applied. + raise_on_error: Raises error instead of logging them when they are + not-blocking Returns: List of lists of coordinates. If no polygonization could be compute for a baseline `None` is returned instead. @@ -727,7 +730,8 @@ def calculate_polygonal_environment(im: PIL.Image.Image = None, im_feats, bounds)) except Exception as e: - raise + if raise_on_error: + raise logger.warning(f'Polygonizer failed on line {idx}: {e}') polygons.append(None)