Skip to content

Commit

Permalink
Rewrite Location and LocationFactory to be more correct
Browse files Browse the repository at this point in the history
Previously we were a bit loose with our definition of
URI.  This rewrite changes the API to Location such that
it now takes a URI ParseResult as the root of the datastore
and a relative path reflecting the position in the datastore.
This means that we no longer have to deal with strange
relative path URIs that are non-standard for files.

This change removes fromUri from the LocationFactory
API since it makes no sense to define a root URI for the
datastore and then allow another URI to be provided.
No butler code called fromUri.
It also changes the Location constructor but
only LocationFactory was calling it.

There are some shenanigans with trying to deal
with the difference between a posixpath and an os.path
but given that I am testing this on a posix system
there might be some inconsistencies still.

Tests have been added.
  • Loading branch information
timj committed May 30, 2019
1 parent aedce88 commit 19609f5
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 48 deletions.
222 changes: 174 additions & 48 deletions python/lsst/daf/butler/core/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,129 @@
import os.path
import urllib
import posixpath
from pathlib import Path

if os.sep == posixpath.sep:
IS_POSIX = True
else:
IS_POSIX = False


def os2posix(path):
"""Convert a local path description to a POSIX path description.
Parameters
----------
path : `str`
Path using the local path separator.
Returns
-------
posix : `str`
Path using POSIX path separator
"""
if IS_POSIX:
return path

# Split on OS path separator but have to restore root directory
# Untested on Windows
ospaths = path.split(os.sep)
if ospaths[0] == "":
ospaths[0] = Path(path).root

return posixpath.join(*ospaths)


def posix2os(path):
"""Convert a POSIX path description to a local path description.
Parameters
----------
path : `str`
Path using the POSIX path separator.
Returns
-------
ospath : `str`
Path using OS path separator
"""
if IS_POSIX:
return path

# Have to restore the root directory after splitting
posixpaths = path.split(posixpath.sep)
if posixpaths[0] == "":
posixpaths[0] = posixpath.sep
return os.path.join(*posixpaths)


class Location:
"""Identifies a location in the `Datastore`.
"""Identifies a location within the `Datastore`.
Parameters
----------
datastoreRootUri : `urllib.parse.ParseResult`
Base URI for this datastore, must include an absolute path.
path : `str`
Relative path within datastore. Assumed to be using the local
path separator if a ``file`` scheme is being used for the URI,
else a POSIX separator.
"""

__slots__ = ("_datastoreRoot", "_uri")
__slots__ = ("_datastoreRootUri", "_path")

def __init__(self, datastoreRootUri, path):
if not isinstance(datastoreRootUri, urllib.parse.ParseResult):
raise ValueError("Datastore root must be a URI ParseResult instance")

if not posixpath.isabs(datastoreRootUri.path):
raise ValueError(f"Supplied URI must be an absolute path (given {datastoreRootUri}).")

if not datastoreRootUri.scheme:
datastoreRootUri = datastoreRootUri._replace(scheme="file")

self._datastoreRootUri = datastoreRootUri

if self._datastoreRootUri.scheme == "file":
pathModule = os.path
else:
pathModule = posixpath

def __init__(self, datastoreRoot, uri):
self._datastoreRoot = datastoreRoot
self._uri = urllib.parse.urlparse(uri)
if pathModule.isabs(path):
raise ValueError("Path within datastore must be relative not absolute")

self._path = path

def __str__(self):
return self.uri

@property
def uri(self):
"""URI corresponding to location.
"""URI corresponding to fully-specified location in datastore.
"""
return self._uri.geturl()
uriPath = os2posix(self.path)
return self._datastoreRootUri._replace(path=uriPath).geturl()

@property
def path(self):
"""Path corresponding to location.
This path includes the root of the `Datastore`.
This path includes the root of the `Datastore`, but does not include
non-path components of the root URI. If a file URI scheme is being
used the path will be returned with the local OS path separator.
"""
return self._uri.path
if self._datastoreRootUri.scheme != "file":
return posixpath.join(self._datastoreRootUri.path, self.pathInStore)
else:
return os.path.join(posix2os(self._datastoreRootUri.path), self.pathInStore)

@property
def pathInStore(self):
"""Path corresponding to location relative to `Datastore` root.
Uses the same path separator as supplied to the object constructor.
"""
return self._uri.path.split(self._datastoreRoot)[-1].lstrip(os.sep)
return self._path

def updateExtension(self, ext):
"""Update the file extension associated with this `Location`.
Expand All @@ -71,64 +160,99 @@ def updateExtension(self, ext):
"""
if ext is None:
return
path, _ = os.path.splitext(self._uri.path)

path, _ = os.path.splitext(self.pathInStore)

# Ensure that we have a leading "." on file extension (and we do not
# try to modify the empty string)
if ext and not ext.startswith("."):
ext = "." + ext

parts = list(self._uri)
parts[2] = path + ext
self._uri = urllib.parse.urlparse(urllib.parse.urlunparse(parts))
self._path = path + ext


class LocationFactory:
"""Factory for `Location` instances.
The factory is constructed from the root location of the datastore.
This location can be a path on the file system (absolute or relative)
or as a URI.
Parameters
----------
datastoreRoot : `str`
Root location of the `Datastore` either as a path in the local
filesystem or as a URI. File scheme URIs can be used. If a local
filesystem path is used without URI scheme, it will be converted
to an absolute path and any home directory indicators expanded.
If a file scheme is used with a relative path, the path will
be treated as a posixpath but then converted to an absolute path.
"""

def __init__(self, datastoreRoot):
"""Constructor
parsed = urllib.parse.urlparse(datastoreRoot)
parsed = self._fixupFileUri(parsed)
self._datastoreRootUri = parsed

Parameters
----------
datastoreRoot : `str`
Root location of the `Datastore` in the filesystem. Assumed
relative to current directory unless absolute.
"""
self._datastoreRoot = os.path.abspath(os.path.expanduser(datastoreRoot))

def fromUri(self, uri):
"""Factory function to create a `Location` from a URI.
@staticmethod
def _fixupFileUri(parsed, root=None):
"""Fix up relative paths in file URI instances.
Parameters
----------
uri : `str`
A valid Universal Resource Identifier.
parsed : `~urllib.parse.ParseResult`
The result from parsing a URI using `urllib.parse`.
root : `str`, optional
Path to use as root when converting relative to absolute.
If `None`, it will be the current working directory.
Returns
location : `Location`
The equivalent `Location`.
-------
modified : `~urllib.parse.ParseResult`
Update result if a file URI is being handled.
Notes
-----
If a file URI is supplied with a relative path, the returned result
will be converted to an absolute path. If the supplied result has
no scheme defined, ``file`` scheme will be assumed and the path
will be converted to an absolute path.
Relative paths are explicitly not supported by RFC8089 but `urllib`
does accept URIs of the form `file:relative/path.ext`, but they need
to be turned into absolute paths before they can be used.
"""
if uri is None or not isinstance(uri, str):
raise ValueError("URI must be a string and not {}".format(uri))

parsed = urllib.parse.urlparse(uri)
if parsed.scheme == 'file' and parsed.netloc:
# joining with an empty string produces an '/' we don't want
if not parsed.path:
path = posixpath.join(self._datastoreRoot, parsed.netloc)
if not parsed.scheme or parsed.scheme == "file":

if root is None:
root = os.path.abspath(os.path.curdir)

if not parsed.scheme:
# if there was no scheme this is a local OS file path
# which can support tilde expansion.
expandedPath = os.path.expanduser(parsed.path)
elif parsed.scheme == "file":
# file URI implies POSIX path separators so split as posix,
# then join as os, and convert to abspath. Do not handle
# home directories since `file` scheme is explicitly documented
# to not do tilde expansion.
expandedPath = posix2os(parsed.path)
else:
path = posixpath.join(self._datastoreRoot, parsed.netloc,
parsed.path.lstrip('/'))
# scheme, netloc, path, query, fragment, user, pass, host, port
uri = urllib.parse.urlunparse((parsed.scheme, '', path, '', '', parsed.fragment))
parsed = urllib.parse.urlparse(uri)
if posixpath.commonprefix([parsed.path, self._datastoreRoot]) != self._datastoreRoot:
raise ValueError((f'URI {uri} does not share the same datastore root '
f'used by this Location factory: {self._datastoreRoot}.'))
raise RuntimeError("Unexpectedly got confused by URI scheme")

return Location(self._datastoreRoot, uri)
if not os.path.isabs(expandedPath):
expandedPath = os.path.join(root, expandedPath)

# Recalculate the path by converting from os path to posixpath
uriPath = os2posix(expandedPath)

# ParseResult is a NamedTuple so _replace is standard API
parsed = parsed._replace(path=uriPath, scheme="file")

return parsed

def __str__(self):
return f"{self.__class__.__name__}@{self._datastoreRootUri.geturl()}"

def fromPath(self, path):
"""Factory function to create a `Location` from a POSIX path.
Expand All @@ -139,8 +263,10 @@ def fromPath(self, path):
A standard POSIX path, relative to the `Datastore` root.
Returns
-------
location : `Location`
The equivalent `Location`.
"""
uri = 'file://' + path
return self.fromUri(uri)
if os.path.isabs(path):
raise ValueError("LocationFactory path must be relative to datastore, not absolute.")
return Location(self._datastoreRootUri, path)
68 changes: 68 additions & 0 deletions tests/test_location.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest
import os.path
import posixpath

from lsst.daf.butler import LocationFactory


class LocationTestCase(unittest.TestCase):
"""Tests for Location within datastore
"""

def testFileLocation(self):
root = os.path.abspath(os.path.curdir)
factory = LocationFactory(root)
print(f"Factory created: {factory}")

pathInStore = "relative/path/file.ext"
loc1 = factory.fromPath(pathInStore)

self.assertEqual(loc1.path, os.path.join(root, pathInStore))
self.assertEqual(loc1.pathInStore, pathInStore)
self.assertTrue(loc1.uri.startswith("file:///"))
self.assertTrue(loc1.uri.endswith("file.ext"))
loc1.updateExtension("fits")
self.assertTrue(loc1.uri.endswith("file.fits"))
loc1.updateExtension(None)
self.assertTrue(loc1.uri.endswith("file.fits"))
loc1.updateExtension("")
self.assertTrue(loc1.uri.endswith("file"))

def testHttpLocation(self):
root = "https://www.lsst.org/butler/datastore"
factory = LocationFactory(root)
print(f"Factory created: {factory}")

pathInStore = "relative/path/file.ext"
loc1 = factory.fromPath(pathInStore)

self.assertEqual(loc1.path, posixpath.join("/butler/datastore", pathInStore))
self.assertEqual(loc1.pathInStore, pathInStore)
self.assertTrue(loc1.uri.startswith("https://"))
self.assertTrue(loc1.uri.endswith("file.ext"))
loc1.updateExtension("fits")


if __name__ == "__main__":
unittest.main()

0 comments on commit 19609f5

Please sign in to comment.