Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add force_type option to from_url #218

Merged
merged 2 commits into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 46 additions & 8 deletions pfio/v2/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def open_url(url: str, mode: str = 'r', **kwargs) -> 'IOBase':
f.read()

.. note:: Some FS resouces won't be closed when using this
functionality.
functionality. See ``from_url`` for keyword arguments.

Returns:
a FileObject that must be closed.
Expand All @@ -298,22 +298,65 @@ def open_url(url: str, mode: str = 'r', **kwargs) -> 'IOBase':
def from_url(url: str, **kwargs) -> 'FS':
'''Factory pattern implementation, creates FS from URI

If ``force_type`` is set with archive type, not scheme,
it ignores the suffix and tries the specified archive
format by opening the blob file.

If ``force_type`` is set with scheme type, the FS will
built from it accordingly. The URL path is supposed to
be a directory for file systems or a path prefix for S3.

Arguments:
url (str): A URL string compliant with RFC 1738.

force_type (str): Force type of FS to be returned.
One of "zip", "hdfs", "s3", or "file", returned
respectively. Default is ``"file"``.

.. note:: Some FS resouces won't be closed when using this
functionality.

'''
parsed = urlparse(url)
force_type = kwargs.pop('force_type', None)

if parsed.scheme:
scheme = parsed.scheme
else:
scheme = 'file' # Default is local

if parsed.path.endswith('.zip'):
# When ``force_type`` is defined, it must be equal with given one.
if force_type is not None and force_type != "zip":
if force_type != scheme:
raise ValueError("URL scheme mismatch with forced type")

# force_type \ suffix | .zip | other
# --------------------+---------+------
# zip | ok | try zip
# (other) | try dir | try dir
# None | try zip | try dir
if force_type == 'zip':
dirname, filename = os.path.split(parsed.path)
fs = _from_scheme(scheme, dirname, kwargs, bucket=parsed.netloc)
fs = fs.open_zip(filename)

elif force_type is None:
if parsed.path.endswith('.zip'):
dirname, filename = os.path.split(parsed.path)
fs = _from_scheme(scheme, dirname, kwargs, bucket=parsed.netloc)
fs = fs.open_zip(filename)
else:
dirname = parsed.path
fs = _from_scheme(scheme, dirname, kwargs, bucket=parsed.netloc)

else:
dirname = parsed.path
fs = _from_scheme(scheme, dirname, kwargs, bucket=parsed.netloc)

return fs


def _from_scheme(scheme, dirname, kwargs, bucket=None):
if scheme == 'file':
from .local import Local
fs = Local(dirname, **kwargs)
Expand All @@ -322,15 +365,10 @@ def from_url(url: str, **kwargs) -> 'FS':
fs = Hdfs(dirname, **kwargs)
elif scheme == 's3':
from .s3 import S3

fs = S3(bucket=parsed.netloc, prefix=dirname, **kwargs)

fs = S3(bucket=bucket, prefix=dirname, **kwargs)
else:
raise RuntimeError("Scheme {} is not supported", scheme)

if parsed.path.endswith('.zip'):
return fs.open_zip(filename)

return fs


Expand Down
2 changes: 2 additions & 0 deletions pfio/v2/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ def __init__(self, cwd=None):
else:
self.cwd = os.path.join(self.cwd, cwd)

assert self.isdir('')

def _get_principal_name(self):
# get the default principal name from `klist` cache
principal_name = _get_principal_name_from_klist()
Expand Down
3 changes: 3 additions & 0 deletions pfio/v2/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def __init__(self, cwd=None):
else:
self._cwd = cwd

if not self.isdir(''):
raise ValueError('{} must be a directory'.format(self._cwd))

@property
def cwd(self):
if self._cwd:
Expand Down
54 changes: 53 additions & 1 deletion tests/v2_tests/test_fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import os
import tempfile

import pytest
from moto import mock_s3
from parameterized import parameterized

from pfio.testing import ZipForTest, randstring
from pfio.v2 import S3, Local, from_url, lazify, open_url
from pfio.v2 import S3, Local, Zip, from_url, lazify, open_url


@contextlib.contextmanager
Expand Down Expand Up @@ -118,6 +119,57 @@ def test_factory_open():
with open_url('s3://foobar/boom/bom.txt', 'r') as fp:
assert 'hello' == fp.read()

with from_url('s3://foobar/') as fs:
assert isinstance(fs, S3)

with from_url('s3://foobar/path/') as fs:
assert isinstance(fs, S3)


def test_force_type():
with from_url(".", force_type='file') as fs:
assert isinstance(fs, Local)

with pytest.raises(ValueError):
from_url(".", force_type='hdfs')

with pytest.raises(ValueError):
from_url(".", force_type='s3')

with pytest.raises(ValueError):
from_url(".", force_type='foobar')

with tempfile.TemporaryDirectory() as d:
zipfilename = os.path.join(d, "test.zip")
ZipForTest(zipfilename)

with from_url(zipfilename, force_type='zip') as fs:
assert isinstance(fs, Zip)

# Without forced type, try to open according to the suffix
with from_url(zipfilename) as fs:
assert isinstance(fs, Zip)

with pytest.raises(ValueError):
# In type 'file' is forced, target path should be a
# directory regardless of the suffix
from_url(zipfilename, force_type='file')

testfilename = os.path.join(d, "test.txt")
with open_url(testfilename, 'w') as fp:
fp.write('hello')

with open_url(testfilename, 'r', force_type='file') as fp:
assert 'hello' == fp.read()

with pytest.raises(ValueError):
with open_url(testfilename, 'r', force_type='hdfs'):
pass

with pytest.raises(IsADirectoryError):
with open_url(testfilename, 'r', force_type='zip'):
pass


@parameterized.expand(["s3", "local"])
@mock_s3
Expand Down