Skip to content

Commit

Permalink
Merge pull request #240 from lsst/tickets/DM-23931
Browse files Browse the repository at this point in the history
DM-23931: Allow makeRepo to fail if config already exists
  • Loading branch information
timj committed Mar 20, 2020
2 parents fb2be8f + 1080b80 commit 641c825
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 14 deletions.
11 changes: 9 additions & 2 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ def __init__(self, config: Union[Config, str, None] = None, *,
@staticmethod
def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: bool = False,
createRegistry: bool = True, searchPaths: Optional[List[str]] = None,
forceConfigRoot: bool = True, outfile: Optional[str] = None) -> Config:
forceConfigRoot: bool = True, outfile: Optional[str] = None,
overwrite: bool = False) -> Config:
"""Create an empty data repository by adding a butler.yaml config
to a repository root directory.
Expand Down Expand Up @@ -226,6 +227,10 @@ def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: boo
location rather than into the repository itself. Can be a URI
string. Can refer to a directory that will be used to write
``butler.yaml``.
overwrite : `bool`, optional
Create a new configuration file even if one already exists
in the specified output location. Default is to raise
an exception.
Returns
-------
Expand All @@ -238,6 +243,8 @@ def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: boo
Raised if a ButlerConfig or ConfigSubset is passed instead of a
regular Config (as these subclasses would make it impossible to
support ``standalone=False``).
FileExistsError
Raised if the output config file already exists.
os.error
Raised if the directory does not exist, exists but is not a
directory, or cannot be created.
Expand Down Expand Up @@ -301,7 +308,7 @@ def makeRepo(root: str, config: Union[Config, str, None] = None, standalone: boo
configURI = outfile
else:
configURI = uri
config.dumpToUri(configURI)
config.dumpToUri(configURI, overwrite=overwrite)

# Create Registry and populate tables
Registry.fromConfig(config, create=createRegistry, butlerRoot=root)
Expand Down
54 changes: 44 additions & 10 deletions python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,8 @@ def dump(self, output):
if data:
yaml.safe_dump(data, output, default_flow_style=False)

def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml"):
def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml",
overwrite=True):
"""Writes the config to location pointed to by given URI.
Currently supports 's3' and 'file' URI schemes.
Expand All @@ -794,53 +795,86 @@ def dumpToUri(self, uri, updateFile=True, defaultFileName="butler.yaml"):
defaultFileName : bool, optional
The file name that will be appended to target uri if updateFile is
True and uri does not end on a file with an extension.
overwrite : bool, optional
If True the configuration will be written even if it already
exists at that location.
"""
if isinstance(uri, str):
uri = ButlerURI(uri)

if not uri.scheme or uri.scheme == "file":
if os.path.isdir(uri.path) and updateFile:
uri = ButlerURI(os.path.join(uri.ospath, defaultFileName))
self.dumpToFile(uri.ospath)
self.dumpToFile(uri.ospath, overwrite=overwrite)
elif uri.scheme == "s3":
head, filename = posixpath.split(uri.path)
if "." not in filename:
uri.updateFile(defaultFileName)
self.dumpToS3File(uri.netloc, uri.relativeToPathRoot)
self.dumpToS3File(uri, overwrite=overwrite)
else:
raise ValueError(f"Unrecognized URI scheme: {uri.scheme}")

def dumpToFile(self, path):
def dumpToFile(self, path, *, overwrite=True):
"""Writes the config to a file.
Parameters
----------
path : `str`
Path to the file to use for output.
overwrite : `bool`, optional
If True any existing file will be over written.
Notes
-----
The name of the config file is stored in the Config object.
Raises
------
FileExistsError
Raised if the file already exists but overwrite is False.
"""
with open(path, "w") as f:
if overwrite:
mode = "w"
else:
mode = "x"
with open(path, mode) as f:
self.dump(f)
self.configFile = path

def dumpToS3File(self, bucket, key):
def dumpToS3File(self, uri, *, overwrite=True):
"""Writes the config to a file in S3 Bucket.
Parameters
----------
bucketname: `str`
Name of the Bucket into which config will be written.
key : `str`
Path to the file to use for output, relative to the bucket.
uri : `ButlerURI`
S3 URI where the configuration should be stored.
overwrite : `bool`, optional
If False, a check will be made to see if the key already
exists.
Raises
------
FileExistsError
Raised if the configuration already exists at this location
and overwrite is set to `False`.
"""
if boto3 is None:
raise ModuleNotFoundError("Could not find boto3. "
"Are you sure it is installed?")

if uri.scheme != "s3":
raise ValueError(f"Must provide S3 URI not {uri}")

s3 = boto3.client("s3")

if not overwrite:
from .s3utils import s3CheckFileExists
if s3CheckFileExists(uri, client=s3)[0]:
raise FileExistsError(f"Config already exists at {uri}")

bucket = uri.netloc
key = uri.relativeToPathRoot

with io.StringIO() as stream:
self.dump(stream)
stream.seek(0)
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/dimensions/coordinate.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def standardize(mapping: Optional[Mapping[str, Any]] = None, *,
try:
values = tuple(d[name] for name in graph.required.names)
except KeyError as err:
raise KeyError(f"No value in data ID for required dimension {err}.") from err
raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
return DataCoordinate(graph, values)

def byName(self) -> Dict[str, Any]:
Expand Down
6 changes: 5 additions & 1 deletion tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,7 @@ def testMakeRepo(self):
limited = Config(self.configFile)
butler1 = Butler(butlerConfig)
butlerConfig = Butler.makeRepo(self.root, standalone=True, createRegistry=False,
config=Config(self.configFile))
config=Config(self.configFile), overwrite=True)
full = Config(self.tmpConfigFile)
butler2 = Butler(butlerConfig)
# Butlers should have the same configuration regardless of whether
Expand All @@ -618,6 +618,10 @@ def testMakeRepo(self):
with self.assertRaises(ValueError):
Butler(butlerConfig)

with self.assertRaises(FileExistsError):
Butler.makeRepo(self.root, standalone=True, createRegistry=False,
config=Config(self.configFile), overwrite=False)

def testStringification(self):
butler = Butler(self.tmpConfigFile, run="ingest")
butlerStr = str(butler)
Expand Down
27 changes: 27 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import contextlib
import collections
import itertools
import shutil
import tempfile

from lsst.daf.butler import ConfigSubset, Config

Expand Down Expand Up @@ -543,5 +545,30 @@ def testIncludeConfigs(self):
Config(os.path.join(self.configDir, "configIncludesEnv.yaml"))


class FileWriteConfigTestCase(unittest.TestCase):

def setUp(self):
self.tmpdir = tempfile.mkdtemp()

def tearDown(self):
if os.path.exists(self.tmpdir):
shutil.rmtree(self.tmpdir, ignore_errors=True)

def testDump(self):
"""Test that we can write and read a configuration."""

c = Config({"1": 2, "3": 4, "key3": 6, "dict": {"a": 1, "b": 2}})

outpath = os.path.join(self.tmpdir, "test.yaml")
c.dumpToUri(outpath)

c2 = Config(outpath)
self.assertEqual(c2, c)

c.dumpToUri(outpath, overwrite=True)
with self.assertRaises(FileExistsError):
c.dumpToUri(outpath, overwrite=False)


if __name__ == "__main__":
unittest.main()

0 comments on commit 641c825

Please sign in to comment.