Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

daf_persistence Tickets/dm 9039 #43

Merged
merged 35 commits into from
Feb 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f820b99
remove commented-out merge block
n8pease Jan 18, 2017
a855a1f
add Butler._initV1Args
n8pease Jan 19, 2017
4900710
add test for PosixStorage class
n8pease Jan 19, 2017
27ccc56
add createRepoDatas loop
n8pease Jan 19, 2017
c70c766
add buildLookupList and getDefaultMapper
n8pease Jan 24, 2017
fa9fc28
add repo init
n8pease Jan 24, 2017
91315fd
change cfg 'legcyRepo' to 'v1Repo'
n8pease Jan 24, 2017
9f723f3
disallow initializing a repo with no mapper
n8pease Jan 24, 2017
069b7cd
default mapper get-fix and add assigner
n8pease Jan 24, 2017
2a9a09c
raise if default mapper needed but not determinable
n8pease Jan 27, 2017
f048e85
add support for tags
n8pease Jan 24, 2017
892661b
change 'legacy' repository to 'V1' repository
n8pease Jan 25, 2017
7243e5b
formalize handling of isV1Repository
n8pease Jan 25, 2017
437d6b6
do init the butler objectCache
n8pease Jan 26, 2017
f39779d
persist the repoCfg when initing a new non-legacy repo
n8pease Jan 26, 2017
f3ae397
move new and v1 repo tracking out of repoCfg
n8pease Jan 26, 2017
547ce4f
adjust TestMovedRepositoryCfg for updated butler internals
n8pease Jan 26, 2017
00fe670
allow setting cfg.mapper if it is currently None
n8pease Jan 26, 2017
ad89b3b
improve root/cfgRoot lookup, put repoCfg in named location
n8pease Jan 26, 2017
7275f9d
fix typo in repository test
n8pease Jan 26, 2017
d618f7a
copy args before modification and assert valid mode
n8pease Jan 27, 2017
f2b4291
fix getParentsList algorithm
n8pease Jan 27, 2017
6c24190
add overwrite proection to repoArgs.mapper
n8pease Jan 27, 2017
33b21dc
fix test ButlerSubsetTestCase.testSingleIteration
n8pease Jan 27, 2017
c576a08
don't get cfg for v1 repos
n8pease Jan 27, 2017
068a630
allow _parent to not be a symlink
n8pease Jan 28, 2017
17dbf56
fix repositoryArgs test
n8pease Jan 28, 2017
0f37bb0
use URI when setting root in depersisted cfg
n8pease Jan 28, 2017
bb73fef
fix test repositoryCfg
n8pease Jan 30, 2017
5a72607
in subLevel, allow multiple repos if mapper is the same
n8pease Jan 30, 2017
e3259eb
fixup - improve v1RepoExists
n8pease Feb 15, 2017
8016014
improve repoArgs cfg/root getter
n8pease Feb 15, 2017
e857e9b
fixup - raise if no mapper for Repository.map
n8pease Feb 15, 2017
718f58e
fixup - expand documentation about Storage.isPosix
n8pease Feb 15, 2017
a211d6c
improve excepion, add test for v1 api with v2 api
n8pease Feb 17, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
495 changes: 311 additions & 184 deletions python/lsst/daf/persistence/butler.py

Large diffs are not rendered by default.

22 changes: 12 additions & 10 deletions python/lsst/daf/persistence/butlerSubset.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,22 +240,24 @@ def subItems(self, level=None):
default lower level for the original ButlerSubset level and dataset
type is used.

As currently implemented, the default sublevels for all the
repositories used by this Butler instance must match for the Butler to
be able to select a default sublevel to get the subset.

@param level (str) the hierarchy level to descend to.
@returns (ButlerSubset) resulting from the lower-level query or () if
there is no lower level.
"""

if level is None:
mappers = []
for repoData in self.butlerSubset.butler._repos.all():
if repoData.repo._mapper not in mappers:
mappers.append(repoData.repo._mapper)
if len(mappers) != 1:
raise RuntimeError("Support for multiple repositories not yet implemented!")
mapper = mappers[0]

# todo: getDefaultSubLevel is not in the mapper API!
level = mapper.getDefaultSubLevel(self.butlerSubset.level)
levelSet = set()
for repoData in self.butlerSubset.butler._repos.all().values():
levelSet.add(repoData.repo._mapper.getDefaultSubLevel(
self.butlerSubset.level))
if len(levelSet) > 1:
raise RuntimeError(
"Support for multiple levels not implemented.")
level = levelSet.pop()
if level is None:
return ()
return self.butlerSubset.butler.subset(self.butlerSubset.datasetType,
Expand Down
62 changes: 46 additions & 16 deletions python/lsst/daf/persistence/posixStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _getRepositoryCfg(uri):
with open(loc, 'r') as f:
repositoryCfg = yaml.load(f)
if repositoryCfg.root is None:
repositoryCfg.root = parseRes.path
repositoryCfg.root = uri
return repositoryCfg

@staticmethod
Expand All @@ -80,25 +80,10 @@ def getRepositoryCfg(uri):
if repositoryCfg is not None:
return repositoryCfg

# if no repository cfg, is it a legacy repository?
parseRes = urllib.parse.urlparse(uri)
if repositoryCfg is None:
mapper = PosixStorage.getMapperClass(parseRes.path)
if mapper is not None:
repositoryCfg = RepositoryCfg(mapper=mapper,
root=parseRes.path,
mapperArgs=None,
parents=None,
isLegacyRepository=True,
policy=None)
return repositoryCfg

@staticmethod
def putRepositoryCfg(cfg, loc=None):
if cfg.isLegacyRepository:
# don't write cfgs to legacy repositories; they take care of themselves in other ways (e.g. by
# the _parent symlink)
return
if loc is None or cfg.root == loc:
# the cfg is at the root location of the repository so don't write root, let it be implicit in the
# location of the cfg.
Expand Down Expand Up @@ -160,6 +145,32 @@ def getMapperClass(root):

return None

@staticmethod
def getParentSymlinkPath(root):
"""For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
symlink.

Parameters
----------
root : string
A path to the folder on the local filesystem.

Returns
-------
string or None
A path to the parent folder indicated by the _parent symlink, or None if there is no _parent
symlink at root.
"""
linkpath = os.path.join(root, '_parent')
if os.path.exists(linkpath):
try:
return os.readlink(os.path.join(root, '_parent'))
except OSError:
# some of the unit tests rely on a folder called _parent instead of a symlink to aother
# location. Allow that; return the path of that folder.
return os.path.join(root, '_parent')
return None

def mapperClass(self):
"""Get the class object for the mapper specified in the stored repository"""
return PosixStorage.getMapperClass(self.root)
Expand Down Expand Up @@ -317,6 +328,25 @@ def lookup(self, *args, **kwargs):
"""Perform a lookup in the registry"""
return self.registry.lookup(*args, **kwargs)

@staticmethod
def v1RepoExists(root):
"""Test if a Version 1 Repository exists.

Version 1 Repositories only exist in posix storages and do not have a RepositoryCfg file.
To "exist" the folder at root must exist and contain files or folders.

Parameters
----------
root : string
A path to a folder on the local filesystem.

Returns
-------
bool
True if the repository at root exists, else False.
"""
return os.path.exists(root) and bool(os.listdir(root))


Storage.registerStorageClass(scheme='', cls=PosixStorage)
Storage.registerStorageClass(scheme='file', cls=PosixStorage)
51 changes: 33 additions & 18 deletions python/lsst/daf/persistence/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,37 @@ class RepositoryArgs(object):

def __init__(self, root=None, cfgRoot=None, mapper=None, mapperArgs=None, tags=None,
mode=None, policy=None):
self.root = root
self._root = root
self._cfgRoot = cfgRoot
self.mapper = mapper
self._mapper = mapper
self.mapperArgs = mapperArgs
self.tags = set(listify(tags))
self.mode = mode
self.policy = Policy(policy) if policy is not None else None
self.isLegacyRepository = False


def __repr__(self):
return "%s(root=%r, cfgRoot=%r, mapper=%r, mapperArgs=%r, tags=%s, mode=%r, policy=%s)" % (
self.__class__.__name__, self.root, self._cfgRoot, self.mapper, self.mapperArgs, self.tags,
self.__class__.__name__, self.root, self._cfgRoot, self._mapper, self.mapperArgs, self.tags,
self.mode, self.policy)

@property
def mapper(self):
return self._mapper

@mapper.setter
def mapper(self, mapper):
if mapper is not None and self._mapper:
raise RuntimeError("Explicity clear mapper (set to None) before changing its value.")
self._mapper = mapper

@property
def cfgRoot(self):
if self._cfgRoot is not None:
return self._cfgRoot
else:
return self.root
return self._cfgRoot if self._cfgRoot is not None else self.root

@property
def root(self):
return self._root if self._root is not None else self._cfgRoot

@staticmethod
def inputRepo(storage, tags=None):
Expand All @@ -78,15 +89,19 @@ class Repository(object):
"""Represents a repository of persisted data and has methods to access that data.
"""

def __init__(self, repositoryCfg):
'''Initialize a Repository with parameters input via config.
def __init__(self, repoData):
"""Initialize a Repository with parameters input via RepoData.

:param args: an instance of RepositoryArgs
:return:
'''
self._storage = Storage.makeFromURI(repositoryCfg.root)
self._mapperArgs = repositoryCfg.mapperArgs # keep for reference in matchesArgs
self._initMapper(repositoryCfg)
Parameters
----------
repoData : RepoData
Object that contains the parameters with which to init the Repository.
"""
self._storage = Storage.makeFromURI(repoData.cfg.root)
if repoData.isNewRepository and not repoData.isV1Repository:
self._storage.putRepositoryCfg(repoData.cfg, repoData.args.cfgRoot)
self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs
self._initMapper(repoData.cfg)

def _initMapper(self, repositoryCfg):
'''Initialize and keep the mapper in a member var.
Expand Down Expand Up @@ -131,7 +146,7 @@ def _initMapper(self, repositoryCfg):

def __repr__(self):
return 'config(id=%s, storage=%s, parent=%s, mapper=%s, mapperArgs=%s, cls=%s)' % \
(self.id, self._storage, self.parent, self.mapper, self.mapperArgs, self.cls)
(self.id, self._storage, self.parent, self._mapper, self.mapperArgs, self.cls)

# todo want a way to make a repository read-only
def write(self, butlerLocation, obj):
Expand Down Expand Up @@ -179,7 +194,7 @@ def map(self, *args, **kwargs):
:return: The type of item is dependent on the mapper being used but is typically a ButlerLocation.
"""
if self._mapper is None:
return None
raise RuntimeError("No mapper assigned to Repository")
loc = self._mapper.map(*args, **kwargs)
if loc is None:
return None
Expand Down
25 changes: 11 additions & 14 deletions python/lsst/daf/persistence/repositoryCfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,12 @@
class RepositoryCfg(yaml.YAMLObject):
yaml_tag = u"!RepositoryCfg_v1"

def __init__(self, root, mapper, mapperArgs, parents, policy, isLegacyRepository=False):
def __init__(self, root, mapper, mapperArgs, parents, policy):
self._root = root
self._mapper = mapper
self._mapperArgs = mapperArgs
self._parents = iterify(parents)
self._policy = policy
self._isLegacyRepository = isLegacyRepository

@staticmethod
def v1Constructor(loader, node):
Expand All @@ -62,8 +61,7 @@ def v1Constructor(loader, node):
"""
d = loader.construct_mapping(node)
cfg = RepositoryCfg(root=d['_root'], mapper=d['_mapper'], mapperArgs=d['_mapperArgs'],
parents=d['_parents'], isLegacyRepository=d['_isLegacyRepository'],
policy=d.get('_policy', None))
parents=d['_parents'], policy=d.get('_policy', None))
return cfg

def __eq__(self, other):
Expand All @@ -72,8 +70,7 @@ def __eq__(self, other):
return self._root == other._root and \
self.mapper == other._mapper and \
self.mapperArgs == other._mapperArgs and \
self.parents == other._parents and \
self._isLegacyRepository == other._isLegacyRepository
self.parents == other._parents

def __ne__(self, other):
return not self.__eq__(other)
Expand All @@ -92,6 +89,12 @@ def root(self, root):
def mapper(self):
return self._mapper

@mapper.setter
def mapper(self, mapper):
if self._mapper != None:
raise RuntimeError("Should not set mapper over previous not-None value.")
self._mapper = mapper

@property
def mapperArgs(self):
return self._mapperArgs
Expand All @@ -110,10 +113,6 @@ def addParents(self, newParents):
if newParent not in self._parents:
self._parents.append(newParent)

@property
def isLegacyRepository(self):
return self._isLegacyRepository

@property
def policy(self):
return self._policy
Expand All @@ -124,7 +123,6 @@ def makeFromArgs(repositoryArgs, parents):
mapper=repositoryArgs.mapper,
mapperArgs=repositoryArgs.mapperArgs,
parents=parents,
isLegacyRepository=repositoryArgs.isLegacyRepository,
policy=repositoryArgs.policy)
return cfg

Expand Down Expand Up @@ -154,13 +152,12 @@ def matchesArgs(self, repositoryArgs):
return True

def __repr__(self):
return "%s(root=%r, mapper=%r, mapperArgs=%r, parents=%s, policy=%s, isLegacyRepository=%s)" % (
return "%s(root=%r, mapper=%r, mapperArgs=%r, parents=%s, policy=%s)" % (
self.__class__.__name__,
self._root,
self._mapper,
self._mapperArgs,
self._parents,
self._policy,
self._isLegacyRepository)
self._policy)

yaml.add_constructor(u"!RepositoryCfg_v1", RepositoryCfg.v1Constructor)
27 changes: 27 additions & 0 deletions python/lsst/daf/persistence/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,30 @@ def makeFromURI(uri):
else:
raise RuntimeError("No storage registered for scheme %s" % parseRes.scheme)
return ret

@staticmethod
def isPosix(uri):
"""Test if a URI is for a local filesystem storage.

This is mostly for backward compatibility; Butler V1 repositories were only ever on the local
filesystem. They may exist but not have a RepositoryCfg class. This enables conditional checking for a
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doc string length

V1 Repository.

This function treats 'file' and '' (no scheme) as posix storages, see
the class docstring for more details.

Parameters
----------
uri : string
URI to the root of a Repository.

Returns
-------
Bool
True if the URI is associated with a posix storage, else false.
"""
parseRes = urllib.parse.urlparse(uri)
if parseRes.scheme in ('file', ''):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add to the function documentation that you are checking for the explicit file scheme in the uri or interpreting the lack of a specific scheme to tacitly imply thefile scheme. I say this just because this line looks quite funny at first glance - it's not apparent that you are accepting the lack of a scheme at first glance.

return True
return False

4 changes: 2 additions & 2 deletions tests/butlerSubset.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def registerAliases(butler):
return butler

def testSingleIteration(self):
args = dafPersist.RepositoryArgs(mode='r', root=os.path.join(ROOT, 'butlerSubset'),
args = dafPersist.RepositoryArgs(mode='rw', root=os.path.join(ROOT, 'butlerSubset'),
mapper=ImgMapper())
butler = dafPersist.Butler(inputs=args)
butler = dafPersist.Butler(outputs=args)

ButlerSubsetTestCase.registerAliases(butler)

Expand Down
4 changes: 2 additions & 2 deletions tests/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ def test(self):
os.rename(os.path.join(ROOT, 'TestMovedRepositoryCfg/a/repositoryCfg.yaml'),
os.path.join(ROOT, 'TestMovedRepositoryCfg/b/repositoryCfg.yaml'))
butler = dp.Butler(inputs=os.path.join(ROOT, 'TestMovedRepositoryCfg/b'))
self.assertEqual(butler._repos.all()[0].cfg,
self.assertEqual(list(butler._repos.all().values())[0].cfg,
dp.RepositoryCfg(root=os.path.join(ROOT, 'TestMovedRepositoryCfg/b'),
mapper=MapperForTestWriting,
mapperArgs=None,
Expand Down Expand Up @@ -742,7 +742,7 @@ def test(self):
'TestOutputAlreadyHasParent/a')],
policy=None))

# load 'b' as 'read only' and make sure 'a' does not get used as an input.
# load 'b' as 'write only' and make sure 'a' does not get used as an input.
butler = dp.Butler(outputs=os.path.join(ROOT, 'TestOutputAlreadyHasParent/b'))
self.assertEqual(len(butler._repos.inputs()), 0)
self.assertEqual(len(butler._repos.outputs()), 1)
Expand Down