Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance import performance for built-in plugins #1031

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## \[Unreleased\]
### New features
### Enhancements
- Enhance import performance for built-in plugins
(<https://github.com/openvinotoolkit/datumaro/pull/1031>)
### Bug fixes

## 26/05/2023 - Release 1.3.1
### Bug fixes
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ include README.md
include requirements-core.txt
include requirements-default.txt
include 3rd-party.txt
include datumaro/plugins/specs.json
15 changes: 8 additions & 7 deletions contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- [Design document](https://openvinotoolkit.github.io/datumaro/latest/docs/explanation/architecture)
- [Developer manual](https://openvinotoolkit.github.io/datumaro/latest/docs/reference/datumaro_module)

## Installation
## Installation for developer

### Prerequisites

Expand All @@ -23,20 +23,21 @@ python -m virtualenv venv
. venv/bin/activate
```

Then install all dependencies:

Install Datumaro with optional dependencies:
``` bash
pip install -r requirements.txt
cd /path/to/the/cloned/repo/
pip install -e .[default, tf]
```

Install Datumaro:
Then install test dependencies:

``` bash
pip install -e /path/to/the/cloned/repo/
pip install -r tests/requirements.txt
```

**Optional dependencies**

These components are only required for plugins and not installed by default:
Developer should install the following optional components for running our tests:

- OpenVINO
- Accuracy Checker
Expand Down
10 changes: 6 additions & 4 deletions datumaro/cli/commands/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os.path as osp

from datumaro.components.dataset import Dataset
from datumaro.components.project import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.util.os_util import make_file_name

from ..util import MultilineFormatter
Expand All @@ -17,8 +17,10 @@


def build_parser(parser_ctor=argparse.ArgumentParser):
builtin_readers = sorted(set(Environment().importers) | set(Environment().extractors))
builtin_writers = sorted(Environment().exporters)
builtin_readers = sorted(
set(DEFAULT_ENVIRONMENT.importers) | set(DEFAULT_ENVIRONMENT.extractors)
)
builtin_writers = sorted(DEFAULT_ENVIRONMENT.exporters)

parser = parser_ctor(
help="Convert an existing dataset to another format",
Expand Down Expand Up @@ -98,7 +100,7 @@ def get_sensitive_args():


def convert_command(args):
env = Environment()
env = DEFAULT_ENVIRONMENT

try:
exporter = env.exporters[args.output_format]
Expand Down
4 changes: 2 additions & 2 deletions datumaro/cli/commands/detect_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from datumaro.cli.util import MultilineFormatter
from datumaro.cli.util.project import load_project
from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.format_detection import RejectionReason
from datumaro.util import dump_json_file
Expand Down Expand Up @@ -77,7 +77,7 @@ def detect_format_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

report = {"rejected_formats": {}}

Expand Down
6 changes: 3 additions & 3 deletions datumaro/cli/commands/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
import sys
from typing import Dict

from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.extractor_tfds import (
AVAILABLE_TFDS_DATASETS,
TFDS_EXTRACTOR_AVAILABLE,
TfdsDatasetRemoteMetadata,
)
from datumaro.components.project import Environment
from datumaro.util import dump_json
from datumaro.util.os_util import make_file_name

Expand Down Expand Up @@ -48,7 +48,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser):


def build_get_subparser(subparsers: argparse._SubParsersAction):
builtin_writers = sorted(Environment().exporters)
builtin_writers = sorted(DEFAULT_ENVIRONMENT.exporters)
if TFDS_EXTRACTOR_AVAILABLE:
available_datasets = ", ".join(f"tfds:{name}" for name in AVAILABLE_TFDS_DATASETS)
else:
Expand Down Expand Up @@ -137,7 +137,7 @@ def get_sensitive_args():


def download_command(args):
env = Environment()
env = DEFAULT_ENVIRONMENT

if args.dataset_id.startswith("tfds:"):
if TFDS_EXTRACTOR_AVAILABLE:
Expand Down
4 changes: 2 additions & 2 deletions datumaro/cli/commands/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import os.path as osp

from datumaro.components.dataset import DEFAULT_FORMAT
from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.hl_ops import HLOps
from datumaro.components.merge.intersect_merge import IntersectMerge
Expand Down Expand Up @@ -232,7 +232,7 @@ def merge_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

try:
exporter = env.exporters[args.format]
Expand Down
4 changes: 2 additions & 2 deletions datumaro/cli/commands/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import os.path as osp

from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.util.scope import scope_add, scoped

Expand Down Expand Up @@ -131,7 +131,7 @@ def patch_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

target_dataset, _project = parse_full_revpath(args.target, project)
if _project is not None:
Expand Down
6 changes: 3 additions & 3 deletions datumaro/cli/commands/require_project/modification/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import argparse
import logging as log

from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.util.scope import on_error_do, scope_add, scoped

Expand All @@ -21,7 +21,7 @@


def build_parser(parser_ctor=argparse.ArgumentParser):
env = Environment()
env = DEFAULT_ENVIRONMENT
builtins = sorted(set(env.extractors) | set(env.importers))

parser = parser_ctor(
Expand Down Expand Up @@ -125,7 +125,7 @@ def add_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

fmt = args.format
if fmt in env.importers:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import os.path as osp

from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.project import ProjectBuildTargets
from datumaro.util.os_util import make_file_name
Expand All @@ -24,7 +24,7 @@


def build_parser(parser_ctor=argparse.ArgumentParser):
builtins = sorted(Environment().exporters)
builtins = sorted(DEFAULT_ENVIRONMENT.exporters)

parser = parser_ctor(
help="Export project",
Expand Down Expand Up @@ -143,7 +143,7 @@ def export_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

try:
exporter = env.exporters[args.format]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging as log
import os

from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.util.scope import on_error_do, scope_add, scoped

Expand All @@ -21,7 +21,7 @@


def build_parser(parser_ctor=argparse.ArgumentParser):
env = Environment()
env = DEFAULT_ENVIRONMENT
builtins = sorted(set(env.extractors) | set(env.importers))

parser = parser_ctor(
Expand Down Expand Up @@ -131,7 +131,7 @@ def import_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

fmt = args.format
if fmt in env.importers:
Expand Down
6 changes: 3 additions & 3 deletions datumaro/cli/commands/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import os.path as osp

from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.project import ProjectBuildTargets
from datumaro.util import str_to_bool
Expand All @@ -19,7 +19,7 @@


def build_parser(parser_ctor=argparse.ArgumentParser):
builtins = sorted(Environment().transforms)
builtins = sorted(DEFAULT_ENVIRONMENT.transforms)

parser = parser_ctor(
help="Transform project",
Expand Down Expand Up @@ -166,7 +166,7 @@ def transform_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

try:
transform = env.transforms[args.transform]
Expand Down
4 changes: 2 additions & 2 deletions datumaro/cli/commands/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import argparse
import logging as log

from datumaro.components.environment import Environment
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.validator import TaskType
from datumaro.util import dump_json_file
Expand Down Expand Up @@ -113,7 +113,7 @@ def validate_command(args):
if project is not None:
env = project.env
else:
env = Environment()
env = DEFAULT_ENVIRONMENT

try:
validator_type = env.validators[args.task]
Expand Down
3 changes: 2 additions & 1 deletion datumaro/components/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
from datumaro.components.merge import DEFAULT_MERGE_POLICY
from datumaro.components.progress_reporting import NullProgressReporter, ProgressReporter
from datumaro.components.transformer import ItemTransform, Transform
from datumaro.plugins.transforms import ProjectLabels
from datumaro.util import is_method_redefined
from datumaro.util.log_utils import logging_disabled
from datumaro.util.meta_file_util import load_hash_key
Expand Down Expand Up @@ -626,6 +625,8 @@ def update(self, source: Union[DatasetPatch, IDataset, Iterable[DatasetItem]]):
else:
self.put(source.data.get(*item_id))
elif isinstance(source, IDataset):
from datumaro.plugins.transforms import ProjectLabels

for item in ProjectLabels(
source, self.categories().get(AnnotationType.label, LabelCategories())
):
Expand Down
Loading