Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

daf_persistence: Tickets/dm 10340 #61

Merged
merged 15 commits into from
Jun 27, 2017
1 change: 1 addition & 0 deletions python/lsst/daf/persistence/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from .access import *
from .repositoryCfg import *
from .posixStorage import *
from .fmtPosixRepositoryCfg import *
from .mapper import *
from .repositoryMapper import *
from .repository import *
Expand Down
1,089 changes: 760 additions & 329 deletions python/lsst/daf/persistence/butler.py

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions python/lsst/daf/persistence/butlerExceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,11 @@ def __init__(self, message, datasetType, dataId, locations):
message += ' ' + str(location)
super(MultipleResults, self).__init__(message)
self.locations = locations


class ParentsMismatch(RuntimeError):
"""Raised when issues arise related to the list of parents in a RepositoryCfg not matching the expected
value.
"""
def __init__(self, message):
super(ParentsMismatch, self).__init__(message)
2 changes: 1 addition & 1 deletion python/lsst/daf/persistence/butlerSubset.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def subItems(self, level=None):

if level is None:
levelSet = set()
for repoData in self.butlerSubset.butler._repos.all().values():
for repoData in self.butlerSubset.butler._repos.all():
levelSet.add(repoData.repo._mapper.getDefaultSubLevel(
self.butlerSubset.level))
if len(levelSet) > 1:
Expand Down
121 changes: 121 additions & 0 deletions python/lsst/daf/persistence/fmtPosixRepositoryCfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#
# LSST Data Management System
# Copyright 2017 LSST Corporation.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.
#

import copy
import errno
import fcntl
import yaml
import os
import urllib
from . import PosixStorage, RepositoryCfg, safeFileIo, ParentsMismatch
from lsst.log import Log


class RepositoryCfgPosixFormatter():

@classmethod
def write(cls, cfg, butlerLocation):
"""Serialize a RepositoryCfg to a location.

When the location is the same as cfg.root, the RepositoryCfg is to be written at the root location of
the repository. In that case, root is not written in the serialized cfg; it is implicit in the
location of the cfg. This allows the cfg to move from machine to machine without modification.

Parameters
----------
cfg : RepositoryCfg instance
The RepositoryCfg to be serialized.
butlerLocation : ButlerLocation
The location to write the RepositoryCfg.
"""
def setRoot(cfg, loc):
loc = os.path.split(loc)[0] # remove the `repoistoryCfg.yaml` file name
if loc is None or cfg.root == loc:
cfg = copy.copy(cfg)
loc = cfg.root
cfg.root = None
return cfg

# This class supports schema 'file' and also treats no schema as 'file'.
# Split the URI and take only the path; remove the schema from loc if it's there.
loc = butlerLocation.storage.root
parseRes = urllib.parse.urlparse(loc if loc is not None else cfg.root)
loc = os.path.join(parseRes.path, butlerLocation.getLocations()[0])
try:
with safeFileIo.SafeLockedFileForRead(loc) as f:
existingCfg = RepositoryCfgPosixFormatter._read(f, parseRes.path)
if existingCfg == cfg:
cfg.dirty = False
return
except IOError as e:
if e.errno != errno.ENOENT: # ENOENT is 'No such file or directory'
raise e
with safeFileIo.SafeLockedFileForWrite(loc) as f:
existingCfg = RepositoryCfgPosixFormatter._read(f, parseRes.path)
if existingCfg is None:
cfgToWrite = setRoot(cfg, loc)
else:
if existingCfg == cfg:
cfg.dirty = False
return
try:
existingCfg.extend(cfg)
cfgToWrite = setRoot(existingCfg, loc)
except ParentsMismatch as e:
raise RuntimeError("Can not extend existing repository cfg because: {}".format(e))
yaml.dump(cfgToWrite, f)
cfg.dirty = False

@classmethod
def _read(cls, fileObject, uri):
"""Get a persisted RepositoryCfg from an open file object.

Parameters
----------
fileObject : an open file object
the file that contains the RepositoryCfg.

Returns
-------
A RepositoryCfg instance or None
"""
repositoryCfg = yaml.load(fileObject)
if repositoryCfg is not None:
if repositoryCfg.root is None:
repositoryCfg.root = uri
return repositoryCfg

@classmethod
def read(cls, butlerLocation):
repositoryCfg = None
loc = butlerLocation.storage.root
fileLoc = os.path.join(loc, butlerLocation.getLocations()[0])
try:
with safeFileIo.SafeLockedFileForRead(fileLoc) as f:
repositoryCfg = RepositoryCfgPosixFormatter._read(f, loc)
except IOError as e:
if e.errno != errno.ENOENT: # ENOENT is 'No such file or directory'
raise e
return repositoryCfg


PosixStorage.registerFormatter(RepositoryCfg, RepositoryCfgPosixFormatter)
91 changes: 26 additions & 65 deletions python/lsst/daf/persistence/posixStorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,19 @@
import urllib.parse
import glob
import shutil
import fcntl

import yaml

from . import (LogicalLocation, Persistence, Policy, StorageList,
StorageInterface, Storage, safeFileIo, ButlerLocation,
NoRepositroyAtRoot)
StorageInterface, Storage, ButlerLocation,
NoRepositroyAtRoot, RepositoryCfg)
from lsst.log import Log
import lsst.pex.policy as pexPolicy
from .safeFileIo import SafeFilename, safeMakeDir
from future import standard_library
standard_library.install_aliases()


class PosixStorage(StorageInterface):
"""Defines the interface for a storage location on the local filesystem.

Expand Down Expand Up @@ -126,29 +126,6 @@ def absolutePath(fromPath, relativePath):
fromPath = os.path.realpath(fromPath)
return os.path.normpath(os.path.join(fromPath, relativePath))

@staticmethod
def _getRepositoryCfg(uri):
"""Get a persisted RepositoryCfg

Parameters
----------
uri : URI or path to a RepositoryCfg
Description

Returns
-------
A RepositoryCfg instance or None
"""
repositoryCfg = None
parseRes = urllib.parse.urlparse(uri)
loc = os.path.join(parseRes.path, 'repositoryCfg.yaml')
if os.path.exists(loc):
with open(loc, 'r') as f:
repositoryCfg = yaml.load(f)
if repositoryCfg.root is None:
repositoryCfg.root = uri
return repositoryCfg

@staticmethod
def getRepositoryCfg(uri):
"""Get a persisted RepositoryCfg
Expand All @@ -162,46 +139,31 @@ def getRepositoryCfg(uri):
-------
A RepositoryCfg instance or None
"""
repositoryCfg = PosixStorage._getRepositoryCfg(uri)
if repositoryCfg is not None:
return repositoryCfg

return repositoryCfg
storage = Storage.makeFromURI(uri)
formatter = storage._getFormatter(RepositoryCfg)
return formatter.read(ButlerLocation(pythonType=None,
cppType=None,
storageName=None,
locationList='repositoryCfg.yaml',
dataId={},
mapper=None,
storage=storage,
usedDataId=None,
datasetType=None))

@staticmethod
def putRepositoryCfg(cfg, loc=None):
"""Serialize a RepositoryCfg to a location.

When loc == cfg.root, the RepositoryCfg is to be writtenat the root
location of the repository. In that case, root is not written, it is
implicit in the location of the cfg. This allows the cfg to move from
machine to machine without modification.

Parameters
----------
cfg : RepositoryCfg instance
The RepositoryCfg to be serailized.
loc : None, optional
The location to write the RepositoryCfg. If loc is None, the
location will be read from the root parameter of loc.

Returns
-------
None
"""
if loc is None or cfg.root == loc:
cfg = copy.copy(cfg)
loc = cfg.root
cfg.root = None
# This class supports schema 'file' and also treats no schema as 'file'.
# Split the URI and take only the path; remove the schema fom loc if it's there.
parseRes = urllib.parse.urlparse(loc)
loc = parseRes.path
if not os.path.exists(loc):
os.makedirs(loc)
loc = os.path.join(loc, 'repositoryCfg.yaml')
with safeFileIo.FileForWriteOnceCompareSame(loc) as f:
yaml.dump(cfg, f)
storage = Storage.makeFromURI(cfg.root if loc is None else loc, create=True)
formatter = storage._getFormatter(type(cfg))
formatter.write(cfg, ButlerLocation(pythonType=None,
cppType=None,
storageName=None,
locationList='repositoryCfg.yaml',
dataId={},
mapper=None,
storage=storage,
usedDataId=None,
datasetType=None))

@staticmethod
def getMapperClass(root):
Expand All @@ -224,7 +186,7 @@ def getMapperClass(root):
if not (root):
return None

cfg = PosixStorage._getRepositoryCfg(root)
cfg = PosixStorage.getRepositoryCfg(root)
if cfg is not None:
return cfg.mapper

Expand Down Expand Up @@ -643,6 +605,5 @@ def storageExists(uri):
"""
return os.path.exists(PosixStorage._pathFromURI(uri))


Storage.registerStorageClass(scheme='', cls=PosixStorage)
Storage.registerStorageClass(scheme='file', cls=PosixStorage)
4 changes: 2 additions & 2 deletions python/lsst/daf/persistence/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ def __init__(self, repoData):
Object that contains the parameters with which to init the Repository.
"""
self._storage = Storage.makeFromURI(repoData.cfg.root)
if repoData.isNewRepository and not repoData.isV1Repository:
self._storage.putRepositoryCfg(repoData.cfg, repoData.args.cfgRoot)
if repoData.cfg.dirty and not repoData.isV1Repository:
self._storage.putRepositoryCfg(repoData.cfg, repoData.cfgRoot)
self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs
self._initMapper(repoData)

Expand Down