Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refactor] Refactor fileio but without breaking bc #533

Merged
merged 19 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions mmengine/fileio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .backends import register_backend
from .file_client import (BaseStorageBackend, FileClient, HardDiskBackend,
HTTPBackend, LmdbBackend, MemcachedBackend,
PetrelBackend)
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
from .io import dump, load, register_handler
from .io import (copy_if_symlink_fails, copyfile, copyfile_from_local,
copyfile_to_local, copytree, copytree_from_local,
copytree_to_local, dump, exists, generate_presigned_url,
get_bytes, get_file_backend, get_local_path, get_text, isdir,
isfile, join_path, list_dir_or_file, load, put_bytes,
put_text, register_handler, rmfile, rmtree)
from .parse import dict_from_file, list_from_file

__all__ = [
'BaseStorageBackend', 'FileClient', 'PetrelBackend', 'MemcachedBackend',
'LmdbBackend', 'HardDiskBackend', 'HTTPBackend', 'load', 'dump',
'register_handler', 'BaseFileHandler', 'JsonHandler', 'PickleHandler',
'YamlHandler', 'list_from_file', 'dict_from_file'
'LmdbBackend', 'HardDiskBackend', 'HTTPBackend', 'copy_if_symlink_fails',
'copyfile', 'copyfile_from_local', 'copyfile_to_local', 'copytree',
'copytree_from_local', 'copytree_to_local', 'exists',
'generate_presigned_url', 'get_bytes', 'get_file_backend',
'get_local_path', 'get_text', 'isdir', 'isfile', 'join_path',
'list_dir_or_file', 'put_bytes', 'put_text', 'rmfile', 'rmtree', 'load',
'dump', 'register_handler', 'BaseFileHandler', 'JsonHandler',
'PickleHandler', 'YamlHandler', 'list_from_file', 'dict_from_file',
'register_backend'
]
14 changes: 14 additions & 0 deletions mmengine/fileio/backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .base import BaseStorageBackend
from .http_backend import HTTPBackend
from .lmdb_backend import LmdbBackend
from .local_backend import LocalBackend
from .memcached_backend import MemcachedBackend
from .petrel_backend import PetrelBackend
from .registry_utils import backends, prefix_to_backends, register_backend

__all__ = [
'BaseStorageBackend', 'LocalBackend', 'HTTPBackend', 'LmdbBackend',
'MemcachedBackend', 'PetrelBackend', 'register_backend', 'backends',
'prefix_to_backends'
]
25 changes: 25 additions & 0 deletions mmengine/fileio/backends/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) OpenMMLab. All rights reserved.
from abc import ABCMeta, abstractmethod


class BaseStorageBackend(metaclass=ABCMeta):
"""Abstract class of storage backends.

All backends need to implement two apis: :meth:`get_bytes()` and
:meth:`get_text()`.

- :meth:`get_bytes()` reads the file as a byte stream.
- :meth:`get_text()` reads the file as texts.
"""

@property
def name(self):
return self.__class__.__name__

@abstractmethod
def get_bytes(self, filepath):
pass

@abstractmethod
def get_text(self, filepath):
pass
74 changes: 74 additions & 0 deletions mmengine/fileio/backends/http_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Generator, Union
from urllib.request import urlopen

from .base import BaseStorageBackend


class HTTPBackend(BaseStorageBackend):
"""HTTP and HTTPS storage bachend."""

def get_bytes(self, filepath: str) -> bytes:
"""ead bytes from a given ``filepath``.
zhouzaida marked this conversation as resolved.
Show resolved Hide resolved

Args:
filepath (str): Path to read data.

Returns:
bytes: Expected bytes object.

Examples:
>>> backend = HTTPBackend()
>>> backend.get_bytes('http://path/of/file')
b'hello world'
"""
return urlopen(filepath).read()

def get_text(self, filepath, encoding='utf-8') -> str:
"""Read text from a given ``filepath``.

Args:
filepath (str): Path to read data.
encoding (str): The encoding format used to open the ``filepath``.
Defaults to 'utf-8'.

Returns:
str: Expected text reading from ``filepath``.

Examples:
>>> backend = HTTPBackend()
>>> backend.get_text('http://path/of/file')
'hello world'
"""
return urlopen(filepath).read().decode(encoding)

@contextmanager
def get_local_path(
self, filepath: str) -> Generator[Union[str, Path], None, None]:
"""Download a file from ``filepath``.

``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
can be called with ``with`` statement, and when exists from the
``with`` statement, the temporary path will be released.

Args:
filepath (str): Download a file from ``filepath``.

Examples:
>>> backend = HTTPBackend()
>>> # After existing from the ``with`` clause,
>>> # the path will be removed
>>> with backend.get_local_path('http://path/of/file') as path:
... # do something here
"""
zhouzaida marked this conversation as resolved.
Show resolved Hide resolved
try:
f = tempfile.NamedTemporaryFile(delete=False)
f.write(self.get_bytes(filepath))
f.close()
yield f.name
finally:
os.remove(f.name)
80 changes: 80 additions & 0 deletions mmengine/fileio/backends/lmdb_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) OpenMMLab. All rights reserved.
from pathlib import Path
from typing import Union

from .base import BaseStorageBackend


class LmdbBackend(BaseStorageBackend):
"""Lmdb storage backend.

Args:
db_path (str): Lmdb database path.
readonly (bool): Lmdb environment parameter. If True, disallow any
write operations. Defaults to True.
lock (bool): Lmdb environment parameter. If False, when concurrent
access occurs, do not lock the database. Defaults to False.
readahead (bool): Lmdb environment parameter. If False, disable the OS
filesystem readahead mechanism, which may improve random read
performance when a database is larger than RAM. Defaults to False.

zhouzaida marked this conversation as resolved.
Show resolved Hide resolved
Attributes:
db_path (str): Lmdb database path.
"""

def __init__(self,
db_path,
readonly=True,
lock=False,
readahead=False,
**kwargs):
try:
import lmdb # noqa: F401
except ImportError:
raise ImportError(
'Please run "pip install lmdb" to enable LmdbBackend.')

self.db_path = str(db_path)
self.readonly = readonly
self.lock = lock
self.readahead = readahead
self.kwargs = kwargs
self._client = None

def get_bytes(self, filepath: Union[str, Path]) -> bytes:
"""Get values according to the filepath.

Args:
filepath (str or Path): Here, filepath is the lmdb key.

Returns:
bytes: Expected bytes object.

Examples:
>>> backend = LmdbBackend('path/to/lmdb')
>>> backend.get_bytes('key')
b'hello world'
"""
if self._client is None:
self._client = self._get_client()

filepath = str(filepath)
with self._client.begin(write=False) as txn:
value_buf = txn.get(filepath.encode('ascii'))
return value_buf

def get_text(self, filepath, encoding=None):
raise NotImplementedError

def _get_client(self):
import lmdb

return lmdb.open(
self.db_path,
readonly=self.readonly,
lock=self.lock,
readahead=self.readahead,
**self.kwargs)

def __del__(self):
self._client.close()
zhouzaida marked this conversation as resolved.
Show resolved Hide resolved
Loading