Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fixed CORD dataset and refactored dataset constructors #299

Merged
merged 4 commits into from
Jun 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions doctr/datasets/cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class CORD(VisionDataset):
Args:
train: whether the subset should be the training one
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""
TRAIN = ('https://github.com/mindee/doctr/releases/download/v0.1.1/cord_train.zip',
Expand All @@ -53,6 +54,7 @@ def __init__(
self.train = train
self.sample_transforms = sample_transforms
for img_path in os.listdir(self.root):
# File existence check
if not os.path.exists(os.path.join(self.root, img_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}")
stem = Path(img_path).stem
Expand All @@ -64,23 +66,25 @@ def __init__(
if len(word["text"]) > 0:
x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
if not rotated_bbox:
# Reduce 8 coords to 4
left, right = min(x), max(x)
top, bot = min(y), max(y)
_targets.append((word["text"], [left, top, right, bot]))
else:
if rotated_bbox:
x, y, w, h, alpha = fit_rbbox(np.array([
[x[0], y[0]],
[x[1], y[1]],
[x[2], y[2]],
[x[3], y[3]],
], np.float32))
_targets.append((word["text"], [x, y, w, h, alpha]))
], dtype=np.float32))
box = [x, y, w, h, alpha]
else:
# Reduce 8 coords to 4
box = [min(x), min(y), max(x), max(y)]
_targets.append((word['text'], box))

text_targets, box_targets = zip(*_targets)

self.data.append((img_path, dict(boxes=np.asarray(box_targets, dtype=np.int), labels=text_targets)))
self.data.append((
img_path,
dict(boxes=np.asarray(box_targets, dtype=np.int).clip(min=0), labels=text_targets)
))

def extra_repr(self) -> str:
return f"train={self.train}"
10 changes: 6 additions & 4 deletions doctr/datasets/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class DetectionDataset(AbstractDataset):
img_folder: folder with all the images of the dataset
label_folder: folder with all the corresponding labels (stem needs to be identical)
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
"""
def __init__(
self,
Expand All @@ -40,17 +41,18 @@ def __init__(

self.data: List[Tuple[str, Dict[str, Any]]] = []
for img_path in os.listdir(self.root):
# File existence check
if not os.path.exists(os.path.join(self.root, img_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}")
with open(os.path.join(label_folder, img_path + '.json'), 'rb') as f:
boxes = json.load(f)
bboxes = np.asarray(boxes["boxes_1"] + boxes["boxes_2"] + boxes["boxes_3"], dtype=np.float32)
if not rotated_bbox:
# Switch to xmin, ymin, xmax, ymax
bboxes = np.concatenate((bboxes.min(axis=1), bboxes.max(axis=1)), axis=1)
else:
if rotated_bbox:
# Switch to rotated rects
bboxes = np.asarray([list(fit_rbbox(box)) for box in bboxes], dtype=np.float32)
else:
# Switch to xmin, ymin, xmax, ymax
bboxes = np.concatenate((bboxes.min(axis=1), bboxes.max(axis=1)), axis=1)

is_ambiguous = [False] * (len(boxes["boxes_1"]) + len(boxes["boxes_2"])) + [True] * len(boxes["boxes_3"])
self.data.append((img_path, dict(boxes=bboxes, flags=np.asarray(is_ambiguous))))
Expand Down
2 changes: 2 additions & 0 deletions doctr/datasets/funsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class FUNSD(VisionDataset):
Args:
train: whether the subset should be the training one
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -53,6 +54,7 @@ def __init__(
self.root = os.path.join(self._root, subfolder, 'images')
self.data: List[Tuple[str, Dict[str, Any]]] = []
for img_path in os.listdir(self.root):
# File existence check
if not os.path.exists(os.path.join(self.root, img_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}")
stem = Path(img_path).stem
Expand Down
9 changes: 5 additions & 4 deletions doctr/datasets/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class OCRDataset(AbstractDataset):
img_folder: local path to image folder (all jpg at the root)
label_file: local path to the label file
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand All @@ -47,6 +48,7 @@ def __init__(
for file_dic in data:
# Get image path
img_name = Path(os.path.basename(file_dic["raw-archive-filepath"])).stem + '.jpg'
# File existence check
if not os.path.exists(os.path.join(self.root, img_name)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}")

Expand All @@ -60,15 +62,14 @@ def __init__(
for box in file_dic["coordinates"]:
if rotated_bbox:
x, y, w, h, alpha = fit_rbbox(np.asarray(box, dtype=np.float32))
box = [x, y, w, h, alpha]
is_valid.append(w > 0 and h > 0)
if is_valid[-1]:
box_targets.append([x, y, w, h, alpha])
else:
xs, ys = zip(*box)
box = [min(xs), min(ys), max(xs), max(ys)]
is_valid.append(box[0] < box[2] and box[1] < box[3])
if is_valid[-1]:
box_targets.append(box)
if is_valid[-1]:
box_targets.append(box)

text_targets = [word for word, _valid in zip(file_dic["string"], is_valid) if _valid]
self.data.append((img_name, dict(boxes=np.asarray(box_targets, dtype=np.float32), labels=text_targets)))
1 change: 1 addition & 0 deletions doctr/datasets/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
with open(labels_path) as f:
labels = json.load(f)
for img_path in os.listdir(self.root):
# File existence check
if not os.path.exists(os.path.join(self.root, img_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}")
label = labels.get(img_path)
Expand Down
2 changes: 2 additions & 0 deletions doctr/datasets/sroie.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class SROIE(VisionDataset):
Args:
train: whether the subset should be the training one
sample_transforms: composable transformations that will be applied to each image
rotated_bbox: whether polygons should be considered as rotated bounding box (instead of straight ones)
**kwargs: keyword arguments from `VisionDataset`.
"""

Expand Down Expand Up @@ -55,6 +56,7 @@ def __init__(
self.root = os.path.join(self._root, 'images')
self.data: List[Tuple[str, Dict[str, Any]]] = []
for img_path in os.listdir(self.root):
# File existence check
if not os.path.exists(os.path.join(self.root, img_path)):
raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_path)}")
stem = Path(img_path).stem
Expand Down