diff --git a/doctr/datasets/cord.py b/doctr/datasets/cord.py index fb2f774d88..ab890f556b 100644 --- a/doctr/datasets/cord.py +++ b/doctr/datasets/cord.py @@ -28,6 +28,7 @@ class CORD(VisionDataset): Args: train: whether the subset should be the training one sample_transforms: composable transformations that will be applied to each image + rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones) **kwargs: keyword arguments from `VisionDataset`. """ TRAIN = ('https://github.com/mindee/doctr/releases/download/v0.1.1/cord_train.zip', @@ -53,6 +54,7 @@ def __init__( self.train = train self.sample_transforms = sample_transforms for img_path in os.listdir(self.root): + # File existence check if not os.path.exists(os.path.join(self.root, img_path)): raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}") stem = Path(img_path).stem @@ -64,23 +66,25 @@ def __init__( if len(word["text"]) > 0: x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"] y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"] - if not rotated_bbox: - # Reduce 8 coords to 4 - left, right = min(x), max(x) - top, bot = min(y), max(y) - _targets.append((word["text"], [left, top, right, bot])) - else: + if rotated_bbox: x, y, w, h, alpha = fit_rbbox(np.array([ [x[0], y[0]], [x[1], y[1]], [x[2], y[2]], [x[3], y[3]], - ], np.float32)) - _targets.append((word["text"], [x, y, w, h, alpha])) + ], dtype=np.float32)) + box = [x, y, w, h, alpha] + else: + # Reduce 8 coords to 4 + box = [min(x), min(y), max(x), max(y)] + _targets.append((word['text'], box)) text_targets, box_targets = zip(*_targets) - self.data.append((img_path, dict(boxes=np.asarray(box_targets, dtype=np.int), labels=text_targets))) + self.data.append(( + img_path, + dict(boxes=np.asarray(box_targets, dtype=np.int).clip(min=0), labels=text_targets) + )) def extra_repr(self) -> str: return f"train={self.train}" diff --git a/doctr/datasets/detection.py b/doctr/datasets/detection.py index 308286f1f1..ce992a948f 100644 --- a/doctr/datasets/detection.py +++ b/doctr/datasets/detection.py @@ -27,6 +27,7 @@ class DetectionDataset(AbstractDataset): img_folder: folder with all the images of the dataset label_folder: folder with all the corresponding labels (stem needs to be identical) sample_transforms: composable transformations that will be applied to each image + rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones) """ def __init__( self, @@ -40,17 +41,18 @@ def __init__( self.data: List[Tuple[str, Dict[str, Any]]] = [] for img_path in os.listdir(self.root): + # File existence check if not os.path.exists(os.path.join(self.root, img_path)): raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}") with open(os.path.join(label_folder, img_path + '.json'), 'rb') as f: boxes = json.load(f) bboxes = np.asarray(boxes["boxes_1"] + boxes["boxes_2"] + boxes["boxes_3"], dtype=np.float32) - if not rotated_bbox: - # Switch to xmin, ymin, xmax, ymax - bboxes = np.concatenate((bboxes.min(axis=1), bboxes.max(axis=1)), axis=1) - else: + if rotated_bbox: # Switch to rotated rects bboxes = np.asarray([list(fit_rbbox(box)) for box in bboxes], dtype=np.float32) + else: + # Switch to xmin, ymin, xmax, ymax + bboxes = np.concatenate((bboxes.min(axis=1), bboxes.max(axis=1)), axis=1) is_ambiguous = [False] * (len(boxes["boxes_1"]) + len(boxes["boxes_2"])) + [True] * len(boxes["boxes_3"]) self.data.append((img_path, dict(boxes=bboxes, flags=np.asarray(is_ambiguous)))) diff --git a/doctr/datasets/funsd.py b/doctr/datasets/funsd.py index 2763319ef5..ac242376ab 100644 --- a/doctr/datasets/funsd.py +++ b/doctr/datasets/funsd.py @@ -27,6 +27,7 @@ class FUNSD(VisionDataset): Args: train: whether the subset should be the training one sample_transforms: composable transformations that will be applied to each image + rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones) **kwargs: keyword arguments from `VisionDataset`. """ @@ -53,6 +54,7 @@ def __init__( self.root = os.path.join(self._root, subfolder, 'images') self.data: List[Tuple[str, Dict[str, Any]]] = [] for img_path in os.listdir(self.root): + # File existence check if not os.path.exists(os.path.join(self.root, img_path)): raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}") stem = Path(img_path).stem diff --git a/doctr/datasets/ocr.py b/doctr/datasets/ocr.py index 3148de9ffa..f73072bd0a 100644 --- a/doctr/datasets/ocr.py +++ b/doctr/datasets/ocr.py @@ -24,6 +24,7 @@ class OCRDataset(AbstractDataset): img_folder: local path to image folder (all jpg at the root) label_file: local path to the label file sample_transforms: composable transformations that will be applied to each image + rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones) **kwargs: keyword arguments from `VisionDataset`. """ @@ -47,6 +48,7 @@ def __init__( for file_dic in data: # Get image path img_name = Path(os.path.basename(file_dic["raw-archive-filepath"])).stem + '.jpg' + # File existence check if not os.path.exists(os.path.join(self.root, img_name)): raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}") @@ -60,15 +62,14 @@ def __init__( for box in file_dic["coordinates"]: if rotated_bbox: x, y, w, h, alpha = fit_rbbox(np.asarray(box, dtype=np.float32)) + box = [x, y, w, h, alpha] is_valid.append(w > 0 and h > 0) - if is_valid[-1]: - box_targets.append([x, y, w, h, alpha]) else: xs, ys = zip(*box) box = [min(xs), min(ys), max(xs), max(ys)] is_valid.append(box[0] < box[2] and box[1] < box[3]) - if is_valid[-1]: - box_targets.append(box) + if is_valid[-1]: + box_targets.append(box) text_targets = [word for word, _valid in zip(file_dic["string"], is_valid) if _valid] self.data.append((img_name, dict(boxes=np.asarray(box_targets, dtype=np.float32), labels=text_targets))) diff --git a/doctr/datasets/recognition.py b/doctr/datasets/recognition.py index a77a102e7b..53e1c96120 100644 --- a/doctr/datasets/recognition.py +++ b/doctr/datasets/recognition.py @@ -39,6 +39,7 @@ def __init__( with open(labels_path) as f: labels = json.load(f) for img_path in os.listdir(self.root): + # File existence check if not os.path.exists(os.path.join(self.root, img_path)): raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}") label = labels.get(img_path) diff --git a/doctr/datasets/sroie.py b/doctr/datasets/sroie.py index b2449d0866..93b223557f 100644 --- a/doctr/datasets/sroie.py +++ b/doctr/datasets/sroie.py @@ -27,6 +27,7 @@ class SROIE(VisionDataset): Args: train: whether the subset should be the training one sample_transforms: composable transformations that will be applied to each image + rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones) **kwargs: keyword arguments from `VisionDataset`. """ @@ -55,6 +56,7 @@ def __init__( self.root = os.path.join(self._root, 'images') self.data: List[Tuple[str, Dict[str, Any]]] = [] for img_path in os.listdir(self.root): + # File existence check if not os.path.exists(os.path.join(self.root, img_path)): raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}") stem = Path(img_path).stem