Skip to content

Commit

Permalink
Fix Roboflow format: Disable roboflow_tfrecord when tf is not install…
Browse files Browse the repository at this point in the history
…ed (#1130)

<!-- Contributing guide:
https://github.com/openvinotoolkit/datumaro/blob/develop/CONTRIBUTING.md
-->

### Summary

<!--
Resolves #111 and #222.
Depends on #1000 (for series of dependent commits).

This PR introduces this capability to make the project better in this
and that.

- Added this feature
- Removed that feature
- Fixed the problem #1234
-->

### How to test
<!-- Describe the testing procedure for reviewers, if changes are
not fully covered by unit tests or manual testing can be complicated.
-->

### Checklist
<!-- Put an 'x' in all the boxes that apply -->
- [x] I have added unit tests to cover my changes.​
- [ ] I have added integration tests to cover my changes.​
- [x] I have added the description of my changes into
[CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​
- [ ] I have updated the
[documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs)
accordingly

### License

- [ ] I submit _my code changes_ under the same [MIT
License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE)
that covers the project.
  Feel free to contact the maintainers if that's a concern.
- [ ] I have updated the license header for each file (see an example
below).

```python
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
```
  • Loading branch information
wonjuleee committed Aug 29, 2023
1 parent 8ffe5f1 commit d19ec3e
Show file tree
Hide file tree
Showing 12 changed files with 264 additions and 72 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/1089>)
- Support video annotation import/export
(<https://github.com/openvinotoolkit/datumaro/pull/1124>)
- Add multiframework (PyTorch, Tensorflow) converter
(<https://github.com/openvinotoolkit/datumaro/pull/1125>)
- Add SAM OVMS and Triton server Docker image builders
(<https://github.com/openvinotoolkit/datumaro/pull/1129>)

Expand All @@ -29,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Bug fixes
- Fix bugs for Tile transform
(<https://github.com/openvinotoolkit/datumaro/pull/1123>)
- Disable Roboflow Tfrecord format when Tensorflow is not installed
(<https://github.com/openvinotoolkit/datumaro/pull/1130>)

## 27/07/2023 - Release 1.4.1
### Bug fixes
Expand Down
27 changes: 0 additions & 27 deletions src/datumaro/plugins/data_formats/roboflow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import errno
import os
import os.path as osp
import re
from typing import Dict, List, Optional, Union

from defusedxml import ElementTree
Expand All @@ -26,8 +25,6 @@
from datumaro.components.media import Image, ImageFromFile
from datumaro.plugins.data_formats.coco.base import _CocoBase
from datumaro.plugins.data_formats.coco.format import CocoImporterType, CocoTask
from datumaro.plugins.data_formats.tf_detection_api.base import TfDetectionApiBase
from datumaro.plugins.data_formats.tf_detection_api.format import TfrecordImporterType
from datumaro.plugins.data_formats.voc.base import VocBase
from datumaro.plugins.data_formats.voc.format import VocImporterType, VocTask
from datumaro.plugins.data_formats.yolo.base import YoloUltralyticsBase
Expand Down Expand Up @@ -319,27 +316,3 @@ def _load_items(self, path):
)

return items


class RoboflowTfrecord(TfDetectionApiBase):
def __init__(
self,
path: str,
*,
subset: Optional[str] = None,
ctx: Optional[ImportContext] = None,
):
super().__init__(
path=path, subset=subset, tfrecord_importer_type=TfrecordImporterType.roboflow, ctx=ctx
)

@staticmethod
def _parse_labelmap(text):
entry_pattern = r'name:\s*"([^"]+)"\s*,\s*id:\s*(\d+)'
entry_pattern = re.compile(entry_pattern)

matches = re.findall(entry_pattern, text)

labelmap = {name: int(id) for name, id in matches}

return labelmap
80 changes: 80 additions & 0 deletions src/datumaro/plugins/data_formats/roboflow/base_tfrecord.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import os
import re
from typing import Optional

from datumaro.components.importer import ImportContext, Importer
from datumaro.components.lazy_plugin import extra_deps
from datumaro.plugins.data_formats.tf_detection_api.base import TfDetectionApiBase
from datumaro.plugins.data_formats.tf_detection_api.format import TfrecordImporterType
from datumaro.util.tf_util import has_feature
from datumaro.util.tf_util import import_tf as _import_tf

tf = _import_tf()


@extra_deps("tensorflow")
class RoboflowTfrecordImporter(Importer):
@classmethod
def find_sources(cls, path):
sources = cls._find_sources_recursive(
path=path,
ext=".tfrecord",
extractor_name="roboflow_tfrecord",
)
if len(sources) == 0:
return []

undesired_feature = {
"image/source_id": tf.io.FixedLenFeature([], tf.string),
}

subsets = {}
for source in sources:
if has_feature(path=source["url"], feature=undesired_feature):
continue
subset_name = os.path.dirname(source["url"]).split(os.sep)[-1]
subsets[subset_name] = source["url"]

sources = [
{
"url": url,
"format": "roboflow_tfrecord",
"options": {
"subset": subset,
},
}
for subset, url in subsets.items()
]

return sources


class RoboflowTfrecordBase(TfDetectionApiBase):
def __init__(
self,
path: str,
*,
subset: Optional[str] = None,
ctx: Optional[ImportContext] = None,
):
super().__init__(
path=path,
subset=subset,
tfrecord_importer_type=TfrecordImporterType.roboflow,
ctx=ctx,
)

@staticmethod
def _parse_labelmap(text):
entry_pattern = r'name:\s*"([^"]+)"\s*,\s*id:\s*(\d+)'
entry_pattern = re.compile(entry_pattern)

matches = re.findall(entry_pattern, text)

labelmap = {name: int(id) for name, id in matches}

return labelmap
46 changes: 7 additions & 39 deletions src/datumaro/plugins/data_formats/roboflow/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from datumaro.components.errors import DatasetImportError
from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext
from datumaro.components.importer import Importer
from datumaro.components.lazy_plugin import extra_deps
from datumaro.components.merge.extractor_merger import ExtractorMerger


Expand All @@ -32,7 +31,7 @@ def detect(

@classmethod
def find_sources(cls, path):
subset_paths = glob(osp.join(path, "*", cls.ANN_FILE_NAME), recursive=True)
subset_paths = glob(osp.join(path, "**", cls.ANN_FILE_NAME), recursive=True)

sources = []
for subset_path in subset_paths:
Expand Down Expand Up @@ -103,7 +102,7 @@ def _filter_ann_file(fpath: str):
dirname=cls.ANN_DIR_NAME,
file_filter=_filter_ann_file,
filename="**/*",
max_depth=1,
max_depth=2,
recursive=True,
)
if len(sources) == 0:
Expand All @@ -115,20 +114,20 @@ def _filter_ann_file(fpath: str):
def find_sources(cls, path: str) -> List[Dict[str, Any]]:
sources = cls._get_sources(path)

subsets = defaultdict(list)
subsets = {}
for source in sources:
subset_name = osp.dirname(source["url"]).split(os.sep)[-1]
subsets[subset_name].append(source["url"])
subsets[subset_name] = osp.dirname(source["url"])

sources = [
{
"url": osp.join(path, subset),
"url": url,
"format": cls.FORMAT,
"options": {
"subset": subset,
},
}
for subset, _ in subsets.items()
for subset, url in subsets.items()
]

return sources
Expand Down Expand Up @@ -169,7 +168,7 @@ def find_sources(cls, path: str) -> List[Dict[str, Any]]:

sources = [
{
"url": osp.join(path, subset),
"url": osp.dirname(osp.dirname(urls[0])),
"format": cls.FORMAT,
"options": {
"subset": subset,
Expand Down Expand Up @@ -211,34 +210,3 @@ class RoboflowCreateMlImporter(RoboflowCocoImporter):
class RoboflowMulticlassImporter(RoboflowCocoImporter):
FORMAT = "roboflow_multiclass"
ANN_FILE_NAME = "_classes.csv"


@extra_deps("tensorflow")
class RoboflowTfrecord(Importer):
FORMAT = "roboflow_tfrecord"

@classmethod
def find_sources(cls, path):
sources = cls._find_sources_recursive(
path=path, ext=".tfrecord", extractor_name="roboflow_tfrecord", filename="cells"
)
if len(sources) == 0:
return []

subsets = defaultdict()
for source in sources:
subset_name = osp.dirname(source["url"]).split(os.sep)[-1]
subsets[subset_name] = source["url"]

sources = [
{
"url": url,
"format": cls.FORMAT,
"options": {
"subset": subset,
},
}
for subset, url in subsets.items()
]

return sources
29 changes: 28 additions & 1 deletion src/datumaro/plugins/data_formats/tf_detection_api/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from datumaro.components.lazy_plugin import extra_deps
from datumaro.components.media import Image
from datumaro.util.image import decode_image, lazy_image
from datumaro.util.tf_util import has_feature
from datumaro.util.tf_util import import_tf as _import_tf

from .format import DetectionApiPath, TfrecordImporterType
Expand Down Expand Up @@ -201,4 +202,30 @@ def _parse_tfrecord_file(self, filepath, subset, images_dir):
class TfDetectionApiImporter(Importer):
@classmethod
def find_sources(cls, path):
return cls._find_sources_recursive(path, ".tfrecord", "tf_detection_api")
sources = cls._find_sources_recursive(
path=path,
ext=".tfrecord",
extractor_name="tf_detection_api",
)
if len(sources) == 0:
return []

desired_feature = {
"image/source_id": tf.io.FixedLenFeature([], tf.string),
}

subsets = {}
for source in sources:
if has_feature(path=source["url"], feature=desired_feature):
subset_name = osp.basename(source["url"]).split(".")[-2]
subsets[subset_name] = source["url"]

sources = [
{
"url": url,
"format": "tf_detection_api",
}
for _, url in subsets.items()
]

return sources
4 changes: 2 additions & 2 deletions src/datumaro/plugins/specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@
"extra_deps": []
},
{
"import_path": "datumaro.plugins.data_formats.roboflow.base.RoboflowTfrecord",
"import_path": "datumaro.plugins.data_formats.roboflow.base_tfrecord.RoboflowTfrecordBase",
"plugin_name": "roboflow_tfrecord",
"plugin_type": "DatasetBase",
"extra_deps": [
Expand Down Expand Up @@ -748,7 +748,7 @@
"extra_deps": []
},
{
"import_path": "datumaro.plugins.data_formats.roboflow.importer.RoboflowTfrecord",
"import_path": "datumaro.plugins.data_formats.roboflow.base_tfrecord.RoboflowTfrecordImporter",
"plugin_name": "roboflow_tfrecord",
"plugin_type": "Importer",
"extra_deps": [
Expand Down
20 changes: 19 additions & 1 deletion src/datumaro/util/tf_util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2019-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT


import logging as log

enable_tf_check = False


Expand Down Expand Up @@ -90,3 +92,19 @@ def import_tf(check=None):
pass

return tf


def has_feature(path: str, feature) -> bool:
tf = import_tf()

dataset = tf.data.TFRecordDataset(path)

has_feature = False
for record in dataset:
try:
_ = tf.io.parse_single_example(record, feature)
has_feature = True
except Exception as e:
log.warning("Dataset doesn't have a feature: %s" % str(e))
break
return has_feature
10 changes: 10 additions & 0 deletions tests/assets/roboflow_dataset/tfrecord/train/label_map.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
item {
name: 'label_0'
id: 1
display_name: 'label_0'
}
item {
name: 'label_1'
id: 2
display_name: 'label_1'
}
Binary file not shown.
10 changes: 10 additions & 0 deletions tests/assets/roboflow_dataset/tfrecord/val/label_map.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
item {
name: 'label_0'
id: 1
display_name: 'label_0'
}
item {
name: 'label_1'
id: 2
display_name: 'label_1'
}
Binary file not shown.
Loading

0 comments on commit d19ec3e

Please sign in to comment.