open-mmlab · gaotongxiao · Nov 2, 2022 · Oct 24, 2022 · Oct 24, 2022 · Oct 26, 2022
diff --git a/.dev_scripts/covignore.cfg b/.dev_scripts/covignore.cfg
@@ -13,3 +13,6 @@ mmocr/models/textdet/detectors/mmdet_wrapper.py
 
 # It will be removed after KieVisualizer and TextSpotterVisualizer
 mmocr/visualization/visualize.py
+
+# Add tests for data preparers later
+mmocr/datasets/preparers
diff --git a/configs/kie/_base_/datasets/wildreceipt-openset.py b/configs/kie/_base_/datasets/wildreceipt-openset.py
@@ -1,4 +1,4 @@
-wildreceipt_openset_data_root = 'data/kie/wildreceipt/'
+wildreceipt_openset_data_root = 'data/wildreceipt/'
 
 wildreceipt_openset_train = dict(
     type='WildReceiptDataset',

diff --git a/configs/kie/_base_/datasets/wildreceipt.py b/configs/kie/_base_/datasets/wildreceipt.py
@@ -1,4 +1,4 @@
-wildreceipt_data_root = 'data/kie/wildreceipt/'
+wildreceipt_data_root = 'data/wildreceipt/'
 
 wildreceipt_train = dict(
     type='WildReceiptDataset',

diff --git a/configs/textdet/_base_/datasets/icdar2015.py b/configs/textdet/_base_/datasets/icdar2015.py
@@ -1,17 +1,17 @@
-ic15_det_data_root = 'data/det/icdar2015'
+ic15_det_data_root = 'data/icdar2015'
 
 ic15_det_train = dict(
     type='OCRDataset',
     data_root=ic15_det_data_root,
-    ann_file='instances_training.json',
+    ann_file='textdet_train.json',
     data_prefix=dict(img_path='imgs/'),
     filter_cfg=dict(filter_empty_gt=True, min_size=32),
     pipeline=None)
 
 ic15_det_test = dict(
     type='OCRDataset',
     data_root=ic15_det_data_root,
-    ann_file='instances_test.json',
+    ann_file='textdet_test.json',
     data_prefix=dict(img_path='imgs/'),
     test_mode=True,
     pipeline=None)
diff --git a/configs/textdet/_base_/datasets/totaltext.py b/configs/textdet/_base_/datasets/totaltext.py
@@ -0,0 +1,17 @@
+ic15_det_data_root = 'data/totaltext'
+
+ic15_det_train = dict(
+    type='OCRDataset',
+    data_root=ic15_det_data_root,
+    ann_file='textdet_train.json',
+    data_prefix=dict(img_path='imgs/'),
+    filter_cfg=dict(filter_empty_gt=True, min_size=32),
+    pipeline=None)
+
+ic15_det_test = dict(
+    type='OCRDataset',
+    data_root=ic15_det_data_root,
+    ann_file='textdet_test.json',
+    data_prefix=dict(img_path='imgs/'),
+    test_mode=True,
+    pipeline=None)
diff --git a/configs/textrecog/_base_/datasets/icdar2015.py b/configs/textrecog/_base_/datasets/icdar2015.py
@@ -1,15 +1,17 @@
-ic15_rec_data_root = 'data/rec/icdar_2015/'
+ic15_rec_data_root = 'data/icdar2015/'
 
 ic15_rec_train = dict(
     type='OCRDataset',
     data_root=ic15_rec_data_root,
-    ann_file='train_labels.json',
+    ann_file='textrecog_train.json',
+    data_prefix=dict(img_path='crops/'),
     test_mode=False,
     pipeline=None)
 
 ic15_rec_test = dict(
     type='OCRDataset',
     data_root=ic15_rec_data_root,
-    ann_file='test_labels.json',
+    ann_file='textrecog_test.json',
+    data_prefix=dict(img_path='crops/'),
     test_mode=True,
     pipeline=None)
diff --git a/configs/textrecog/_base_/datasets/totaltext.py b/configs/textrecog/_base_/datasets/totaltext.py
@@ -0,0 +1,17 @@
+ic15_rec_data_root = 'data/totaltext/'
+
+ic15_rec_train = dict(
+    type='OCRDataset',
+    data_root=ic15_rec_data_root,
+    ann_file='textrecog_train.json',
+    data_prefix=dict(img_path='crops/'),
+    test_mode=False,
+    pipeline=None)
+
+ic15_rec_test = dict(
+    type='OCRDataset',
+    data_root=ic15_rec_data_root,
+    ann_file='textrecog_test.json',
+    data_prefix=dict(img_path='crops/'),
+    test_mode=True,
+    pipeline=None)
diff --git a/dataset_zoo/icdar2015/metafile.yml b/dataset_zoo/icdar2015/metafile.yml
@@ -0,0 +1,29 @@
+Name: 'Incidental Scene Text IC15'
+Paper:
+  Title: ICDAR 2015 Competition on Robust Reading
+  URL: https://rrc.cvc.uab.es/files/short_rrc_2015.pdf
+  Venue: ICDAR
+  Year: '2015'
+  BibTeX: '@inproceedings{karatzas2015icdar,
+  title={ICDAR 2015 competition on robust reading},
+  author={Karatzas, Dimosthenis and Gomez-Bigorda, Lluis and Nicolaou, Anguelos and Ghosh, Suman and Bagdanov, Andrew and Iwamura, Masakazu and Matas, Jiri and Neumann, Lukas and Chandrasekhar, Vijay Ramaseshan and Lu, Shijian and others},
+  booktitle={2015 13th international conference on document analysis and recognition (ICDAR)},
+  pages={1156--1160},
+  year={2015},
+  organization={IEEE}}'
+Data:
+  Website: https://rrc.cvc.uab.es/?ch=4
+  Language:
+    - English
+  Scene:
+    - Natural Scene
+  Granularity:
+    - Word
+  Tasks:
+    - textdet
+    - textrecog
+    - textspotting
+  License:
+    Type: CC BY 4.0
+    Link: https://creativecommons.org/licenses/by/4.0/
+  Format: .txt
diff --git a/dataset_zoo/icdar2015/sample_anno.md b/dataset_zoo/icdar2015/sample_anno.md
@@ -0,0 +1,19 @@
+**Text Detection**
+
+```text
+# x1,y1,x2,y2,x3,y3,x4,y4,trans
+
+377,117,463,117,465,130,378,130,Genaxis Theatre
+493,115,519,115,519,131,493,131,[06]
+374,155,409,155,409,170,374,170,###
+```
+
+**Text Recognition**
+
+```text
+# img_name, "text"
+
+word_1.png, "Genaxis Theatre"
+word_2.png, "[06]"
+word_3.png, "62-03"
+```
diff --git a/dataset_zoo/icdar2015/textdet.py b/dataset_zoo/icdar2015/textdet.py
@@ -0,0 +1,51 @@
+data_root = 'data/icdar2015'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+    type='NaiveDataObtainer',
+    cache_path=cache_path,
+    data_root=data_root,
+    files=[
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/ch4_training_images.zip',
+            save_name='ic15_textdet_train_img.zip',
+            md5='c51cbace155dcc4d98c8dd19d378f30d',
+            split=['train'],
+            content=['image'],
+            mapping=[['ic15_textdet_train_img', 'imgs/train']]),
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/ch4_test_images.zip',
+            save_name='ic15_textdet_test_img.zip',
+            md5='97e4c1ddcf074ffcc75feff2b63c35dd',
+            split=['test'],
+            content=['image'],
+            mapping=[['ic15_textdet_test_img', 'imgs/test']]),
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/'
+            'ch4_training_localization_transcription_gt.zip',
+            save_name='ic15_textdet_train_gt.zip',
+            md5='3bfaf1988960909014f7987d2343060b',
+            split=['train'],
+            content=['annotation'],
+            mapping=[['ic15_textdet_train_gt', 'annotations/train']]),
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/'
+            'Challenge4_Test_Task4_GT.zip',
+            save_name='ic15_textdet_test_gt.zip',
+            md5='8bce173b06d164b98c357b0eb96ef430',
+            split=['test'],
+            content=['annotation'],
+            mapping=[['ic15_textdet_test_gt', 'annotations/test']]),
+    ])
+
+data_converter = dict(
+    type='TextDetDataConverter',
+    splits=['train', 'test'],
+    data_root=data_root,
+    gatherer=dict(
+        type='pair_gather',
+        suffixes=['.jpg', '.JPG'],
+        rule=[r'img_(\d+)\.([jJ][pP][gG])', r'gt_img_\1.txt']),
+    parser=dict(type='ICDAR2015TextDetAnnParser'),
+    dumper=dict(type='JsonDumper'),
+    delete=['annotations', 'ic15_textdet_test_img', 'ic15_textdet_train_img'])
diff --git a/dataset_zoo/icdar2015/textrecog.py b/dataset_zoo/icdar2015/textrecog.py
@@ -0,0 +1,42 @@
+data_root = 'data/icdar2015'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+    type='NaiveDataObtainer',
+    cache_path=cache_path,
+    data_root=data_root,
+    files=[
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/'
+            'ch4_training_word_images_gt.zip',
+            save_name='ic15_textrecog_train_img_gt.zip',
+            md5='600caf8c6a64a3dcf638839820edcca9',
+            split=['train'],
+            content=['image', 'annotation'],
+            mapping=[[
+                'ic15_textrecog_train_img_gt/gt.txt', 'annotations/train.txt'
+            ], ['ic15_textrecog_train_img_gt', 'crops/train']]),
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/ch4_test_word_images_gt.zip',
+            save_name='ic15_textrecog_test_img.zip',
+            md5='d7a71585f4cc69f89edbe534e7706d5d',
+            split=['test'],
+            content=['image'],
+            mapping=[['ic15_textrecog_test_img', 'crops/test']]),
+        dict(
+            url='https://rrc.cvc.uab.es/downloads/'
+            'Challenge4_Test_Task3_GT.txt',
+            save_name='ic15_textrecog_test_gt.txt',
+            md5='d7a71585f4cc69f89edbe534e7706d5d',
+            split=['test'],
+            content=['annotation'],
+            mapping=[['ic15_textrecog_test_gt.txt', 'annotations/test.txt']])
+    ])
+
+data_converter = dict(
+    type='TextRecogDataConverter',
+    splits=['train', 'test'],
+    data_root=data_root,
+    gatherer=dict(type='mono_gather', mapping="f'{split}.txt'"),
+    parser=dict(type='ICDAR2015TextRecogAnnParser'),
+    dumper=dict(type='JsonDumper'))
diff --git a/dataset_zoo/icdar2015/textspotting.py b/dataset_zoo/icdar2015/textspotting.py
@@ -0,0 +1,3 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextSpottingDataConverter')
diff --git a/dataset_zoo/totaltext/metafile.yml b/dataset_zoo/totaltext/metafile.yml
@@ -0,0 +1,30 @@
+Name: 'Total Text'
+Paper:
+  Title: "Total-Text: Towards Orientation Robustness in Scene Text Detection"
+  URL: https://link.springer.com/article/10.1007/s10032-019-00334-z
+  Venue: IJDAR
+  Year: '2020'
+  BibTeX: '@article{CK2019,
+  author = {Chee Kheng Chng and Chee Seng Chan and Chenglin Liu},
+  title = {Total-Text: Towards Orientation Robustness in Scene Text Detection},
+  journal = {International Journal on Document Analysis and Recognition (IJDAR)},
+  volume = {23},
+  pages = {31-52},
+  year = {2020},
+  doi = {10.1007/s10032-019-00334-z}}'
+Data:
+  Website: https://github.com/cs-chan/Total-Text-Dataset
+  Language:
+    - English
+  Scene:
+    - Natural Scene
+  Granularity:
+    - Word
+  Tasks:
+    - textdet
+    - textrecog
+    - textspotting
+  License:
+    Type: BSD-3
+    Link: https://github.com/cs-chan/Total-Text-Dataset/blob/master/LICENSE
+  Format: .txt
diff --git a/dataset_zoo/totaltext/sample_anno.md b/dataset_zoo/totaltext/sample_anno.md
@@ -0,0 +1,6 @@
+**Text Detection/Spotting**
+
+```text
+x: [[259 313 389 427 354 302]], y: [[542 462 417 459 507 582]], ornt: [u'c'], transcriptions: [u'PAUL']
+x: [[400 478 494 436]], y: [[398 380 448 465]], ornt: [u'#'], transcriptions: [u'#']
+```
diff --git a/dataset_zoo/totaltext/textdet.py b/dataset_zoo/totaltext/textdet.py
@@ -0,0 +1,39 @@
+data_root = 'data/totaltext'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+    type='NaiveDataObtainer',
+    cache_path=cache_path,
+    data_root=data_root,
+    files=[
+        dict(
+            url='https://universityofadelaide.box.com/shared/static/'
+            '8xro7hnvb0sqw5e5rxm73tryc59j6s43.zip',
+            save_name='totaltext.zip',
+            md5='5b56d71a4005a333cf200ff35ce87f75',
+            split=['train', 'test'],
+            content=['image'],
+            mapping=[['totaltext/Images/Train', 'imgs/train'],
+                     ['totaltext/Images/Test', 'imgs/test']]),
+        dict(
+            url='https://universityofadelaide.box.com/shared/static/'
+            '2vmpvjb48pcrszeegx2eznzc4izan4zf.zip',
+            save_name='txt_format.zip',
+            md5='97e4c1ddcf074ffcc75feff2b63c35dd',
+            split=['train', 'test'],
+            content=['annotation'],
+            mapping=[['txt_format/Train', 'annotations/train'],
+                     ['txt_format/Test', 'annotations/test']]),
+    ])
+
+data_converter = dict(
+    type='TextDetDataConverter',
+    splits=['train', 'test'],
+    data_root=data_root,
+    gatherer=dict(
+        type='pair_gather',
+        suffixes=['.jpg', '.JPG'],
+        rule=[r'img(\d+)\.([jJ][pP][gG])', r'poly_gt_img\1.txt']),
+    parser=dict(type='TotaltextTextDetAnnParser', data_root=data_root),
+    dumper=dict(type='JsonDumper'),
+    delete=['totaltext', 'txt_format', 'annotations'])
diff --git a/dataset_zoo/totaltext/textrecog.py b/dataset_zoo/totaltext/textrecog.py
@@ -0,0 +1,3 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextRecogCropConverter')
diff --git a/dataset_zoo/totaltext/textspotting.py b/dataset_zoo/totaltext/textspotting.py
@@ -0,0 +1,3 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextSpottingDataConverter')
diff --git a/dataset_zoo/wildreceipt/kie.py b/dataset_zoo/wildreceipt/kie.py
@@ -0,0 +1,32 @@
+data_root = 'data/wildreceipt'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+    type='NaiveDataObtainer',
+    cache_path=cache_path,
+    data_root=data_root,
+    files=[
+        dict(
+            url='https://download.openmmlab.com/mmocr/data/wildreceipt.tar',
+            save_name='wildreceipt.tar',
+            md5='2a2c4a1b4777fb4fe185011e17ad46ae',
+            split=['train', 'test'],
+            content=['image', 'annotation'],
+            mapping=[
+                ['wildreceipt/wildreceipt/class_list.txt', 'class_list.txt'],
+                ['wildreceipt/wildreceipt/dict.txt', 'dict.txt'],
+                ['wildreceipt/wildreceipt/test.txt', 'test.txt'],
+                ['wildreceipt/wildreceipt/train.txt', 'train.txt'],
+                ['wildreceipt/wildreceipt/image_files', 'image_files'],
+            ]),
+    ])
+
+data_converter = dict(
+    type='WildReceiptConverter',
+    splits=['train', 'test'],
+    data_root=data_root,
+    gatherer=dict(
+        type='mono_gather', mapping="f'{split}.txt'", ann_path=data_root),
+    parser=dict(type='WildreceiptKIEAnnParser', data_root=data_root),
+    dumper=dict(type='WildreceiptOpensetDumper'),
+    delete=['wildreceipt'])
diff --git a/dataset_zoo/wildreceipt/metafile.yml b/dataset_zoo/wildreceipt/metafile.yml
@@ -0,0 +1,30 @@
+Name: 'WildReceipt'
+Paper:
+  Title: "Spatial Dual-Modality Graph Reasoning for Key Information Extraction"
+  URL: https://link.springer.com/article/10.1007/s10032-019-00334-z
+  Venue: arXiv
+  Year: '2021'
+  BibTeX: '@article{sun2021spatial,
+  title={Spatial Dual-Modality Graph Reasoning for Key Information Extraction},
+  author={Sun, Hongbin and Kuang, Zhanghui and Yue, Xiaoyu and Lin, Chenhao and Zhang, Wayne},
+  journal={arXiv preprint arXiv:2103.14470},
+  year={2021}
+}
+'
+Data:
+  Website: https://github.com/cs-chan/Total-Text-Dataset
+  Language:
+    - English
+  Scene:
+    - Receipt
+  Granularity:
+    - Word
+  Tasks:
+    - kie
+    - textdet
+    - textrecog
+    - textspotting
+  License:
+    Type: N/A
+    Link: N/A
+  Format: .txt