Skip to content

Commit

Permalink
Rewrite how schemeless.relative_to() works
Browse files Browse the repository at this point in the history
Previously there was specialization code in the file scheme
that tried to work with file and scheme-less URIs. This code
worked except it failed to understand that we need S3 URIs
to be the parent for a scheme-less child.

Change the logic so that file is now a completely standard
implementation using the base class, and schemeless converts
the parameters to handle the different scenarios before calling
the base implementation.
  • Loading branch information
timj committed Aug 6, 2021
1 parent 6890496 commit a7969bf
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 58 deletions.
8 changes: 8 additions & 0 deletions python/lsst/daf/butler/core/_butlerUri/_butlerUri.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,14 @@ def relative_to(self, other: ButlerURI) -> Optional[str]:
Returns `None` if there is no parent child relationship.
Scheme and netloc must match.
"""
# Scheme-less absolute other is treated as if it's a file scheme.
# Scheme-less relative other can only return non-None if self
# is also scheme-less relative and that is handled specifically
# in a subclass.
if not other.scheme and other.isabs():
other = other.abspath()

# Scheme-less self is handled elsewhere.
if self.scheme != other.scheme or self.netloc != other.netloc:
return None

Expand Down
58 changes: 0 additions & 58 deletions python/lsst/daf/butler/core/_butlerUri/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

from typing import (
TYPE_CHECKING,
cast,
Iterator,
List,
Optional,
Expand Down Expand Up @@ -104,63 +103,6 @@ def _as_local(self) -> Tuple[str, bool]:
"""
return self.ospath, False

def relative_to(self, other: ButlerURI) -> Optional[str]:
"""Return the relative path from this URI to the other URI.
Parameters
----------
other : `ButlerURI`
URI to use to calculate the relative path. Must be a parent
of this URI.
Returns
-------
subpath : `str`
The sub path of this URI relative to the supplied other URI.
Returns `None` if there is no parent child relationship.
Scheme and netloc must match but for file URIs schemeless
is also used. If this URI is a relative URI but the other is
absolute, it is assumed to be in the parent completely unless it
starts with ".." (in which case the path is combined and tested).
If both URIs are relative, the relative paths are compared
for commonality.
Notes
-----
By definition a relative path will be relative to the enclosing
absolute parent URI. It will be returned unchanged if it does not
use a parent directory specification.
"""
# We know self is a file so check the other. Anything other than
# file or schemeless means by definition these have no paths in common
if other.scheme and other.scheme != "file":
return None

# for case where both URIs are relative use the normal logic
# where a/b/c.txt and a/b/ returns c.txt.
if not self.isabs() and not other.isabs():
return super().relative_to(other)

# if we have a relative path convert it to absolute
# relative to the supplied parent. This is solely to handle
# the case where the relative path includes ".." but somehow
# then goes back inside the directory of the parent
if not self.isabs():
childUri = other.join(self.path)
return childUri.relative_to(other)

# By this point if the schemes are identical we can use the
# base class implementation.
if self.scheme == other.scheme:
return super().relative_to(other)

# if one is schemeless and the other is not the base implementation
# will fail so we need to fix that -- they are both absolute so
# forcing to file is fine.
# Use a cast to convince mypy that other has to be a ButlerFileURI
# in order to get to this part of the code.
return self.abspath().relative_to(cast(ButlerFileURI, other).abspath())

def read(self, size: int = -1) -> bytes:
"""Return the entire content of the file as bytes."""
with open(self.ospath, "rb") as fh:
Expand Down
54 changes: 54 additions & 0 deletions python/lsst/daf/butler/core/_butlerUri/schemeless.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,60 @@ def abspath(self) -> ButlerURI:
return ButlerURI(str(self), forceAbsolute=True, forceDirectory=self.isdir(),
isTemporary=self.isTemporary)

def relative_to(self, other: ButlerURI) -> Optional[str]:
"""Return the relative path from this URI to the other URI.
Parameters
----------
other : `ButlerURI`
URI to use to calculate the relative path.
Returns
-------
subpath : `str`
The sub path of this URI relative to the supplied other URI.
Returns `None` if there is no parent child relationship.
If this URI is a relative URI but the other is
absolute, it is assumed to be in the parent completely unless it
starts with ".." (in which case the path is combined and tested).
If both URIs are relative, the relative paths are compared
for commonality.
Notes
-----
By definition a relative path will be relative to the enclosing
absolute parent URI. It will be returned unchanged if it does not
use a parent directory specification.
"""
# In some scenarios below a new derived child URI needs to be created
# to convert from scheme-less to absolute URI.
child = None

if not self.isabs() and not other.isabs():
# Both are schemeless relative. Use parent implementation
# rather than trying to convert both to file: first since schemes
# match.
pass
elif not self.isabs() and other.isabs():
# Append child to other. This can account for .. in child path.
child = other.join(self.path)
elif self.isabs() and not other.isabs():
# Finding common paths is not possible if the parent is
# relative and the child is absolute.
return None
elif self.isabs() and other.isabs():
# Both are absolute so convert schemeless to file
# if necessary.
child = self.abspath()
if not other.scheme:
other = other.abspath()
else:
raise RuntimeError(f"Unexpected combination of {child}.relative_to({other}).")

if child is None:
return super().relative_to(other)
return child.relative_to(other)

@classmethod
def _fixupPathUri(cls, parsed: urllib.parse.ParseResult, root: Optional[Union[str, ButlerURI]] = None,
forceAbsolute: bool = False,
Expand Down
23 changes: 23 additions & 0 deletions tests/test_uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,29 @@ def testRelative(self):
child = ButlerURI("../c/e/f/g.txt", forceAbsolute=False)
self.assertEqual(child.relative_to(parent), "e/f/g.txt")

# Test non-file root with relative path.
child = ButlerURI("e/f/g.txt", forceAbsolute=False)
parent = ButlerURI("s3://hello/a/b/c/")
self.assertEqual(child.relative_to(parent), "e/f/g.txt")

# Test with different netloc
child = ButlerURI("http://my.host/a/b/c.txt")
parent = ButlerURI("http://other.host/a/")
self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")

# Schemeless absolute child.
# Schemeless absolute URI is constructed using root= parameter.
parent = ButlerURI("file:///a/b/c/")
child = ButlerURI("d/e.txt", root=parent)
self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")

parent = ButlerURI("c/", root="/a/b/")
self.assertEqual(child.relative_to(parent), "d/e.txt", f"{child}.relative_to({parent})")

# Absolute schemeless child with relative parent will always fail.
parent = ButlerURI("d/e.txt", forceAbsolute=False)
self.assertIsNone(child.relative_to(parent), f"{child}.relative_to({parent})")

def testParents(self):
"""Test of splitting and parent walking."""
parent = ButlerURI(self.tmpdir, forceDirectory=True, forceAbsolute=True)
Expand Down

0 comments on commit a7969bf

Please sign in to comment.