Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-11987: ap_verify should allow output to non-empty repos #7

Merged
merged 2 commits into from
Nov 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 36 additions & 12 deletions python/lsst/ap/verify/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,36 @@
from .config import Config


def _nicecopy(src, dst):
"""Recursively copy a directory from src to dst, ignoring any files
that already exist.

Parameters
----------
src: `str`
The directory whose contents will be copied. Symbolic links will
be duplicated in `dst`, but will not be followed.
dst: `str`
The directory to which `src` and its contents will be copied.
"""
# Can't use exceptions to distinguish pre-existing directory from I/O failures until Python 3
if not os.path.exists(dst):
os.makedirs(dst)

for baseName in os.listdir(src):
old = os.path.join(src, baseName)
new = os.path.join(dst, baseName)

if not os.path.islink(old) and os.path.isdir(old):
_nicecopy(old, new)
elif not os.path.exists(new):
if os.path.islink(old):
target = os.readlink(old)
os.symlink(target, new)
else:
shutil.copy2(old, new)


class Dataset(object):
"""A dataset supported by ap_verify.

Expand Down Expand Up @@ -213,19 +243,13 @@ def _validate_package(self):
def make_output_repo(self, output_dir):
"""Set up a directory as an output repository compatible with this dataset.

If the directory already exists, any files required by the dataset will
be added if absent; otherwise the directory will remain unchanged.

Parameters
----------
output_dir: `str`
The directory where the output repository will be created. Must be
empty or non-existent.
The directory where the output repository will be created.
"""
if os.path.exists(output_dir):
if not os.path.isdir(output_dir):
raise IOError(output_dir + 'is not a directory')
if os.listdir(output_dir):
raise IOError(output_dir + 'is already occupied')

# copytree does not allow empty destination directories
shutil.rmtree(output_dir)

shutil.copytree(self._stub_input_repo, output_dir)
# shutil.copytree has wrong behavior for existing destinations, do it by hand
_nicecopy(self._stub_input_repo, output_dir)
5 changes: 4 additions & 1 deletion python/lsst/ap/verify/pipeline_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import json

import lsst.log
import lsst.daf.base as dafBase
import lsst.ap.pipe as ap_pipe
from lsst.verify import Job

Expand Down Expand Up @@ -289,7 +290,9 @@ def run_ap_pipe(dataset, working_repo, parsed_cmd_line, metrics_job):
"""
log = lsst.log.Log.getLogger('ap.verify.pipeline_driver.run_ap_pipe')

metadata = _ingest_raws(dataset, working_repo, metrics_job)
# Easiest way to defend against None return values
metadata = dafBase.PropertySet()
metadata.combine(_ingest_raws(dataset, working_repo, metrics_job))
metadata.combine(_ingest_calibs(dataset, working_repo, metrics_job))
log.info('Data ingested')

Expand Down
16 changes: 10 additions & 6 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,11 @@ def test_output(self):
'Output directory must have a _mapper file.')
finally:
if os.path.exists(test_dir):
shutil.rmtree(test_dir)
shutil.rmtree(test_dir, ignore_errors=True)

def test_bad_output(self):
"""Verify that a Dataset will not create an output directory if it is unsafe to do so.
def test_existing_output(self):
"""Verify that a Dataset can handle pre-existing output directories,
including directories made by external code.
"""
test_dir = tempfile.mkdtemp(dir=os.path.dirname(__file__))
output_dir = os.path.join(test_dir, 'badOut')
Expand All @@ -90,11 +91,14 @@ def test_bad_output(self):
with open(output, 'w') as dummy:
dummy.write('This is a test!')

with self.assertRaises(IOError):
self._testbed.make_output_repo(output_dir)
self._testbed.make_output_repo(output_dir)
self.assertTrue(os.path.exists(output_dir), 'Output directory must exist.')
self.assertTrue(os.listdir(output_dir), 'Output directory must not be empty.')
self.assertTrue(os.path.exists(os.path.join(output_dir, '_mapper')),
'Output directory must have a _mapper file.')
finally:
if os.path.exists(test_dir):
shutil.rmtree(test_dir)
shutil.rmtree(test_dir, ignore_errors=True)


class MemoryTester(lsst.utils.tests.MemoryTestCase):
Expand Down