Skip to content

Commit

Permalink
Chain template repo to input repo instead of linking.
Browse files Browse the repository at this point in the history
Changing Dataset.makeCompatibleRepo from a filesystem copy to a Butler
chaining runs afoul of a couple of Butler 2 bugs, but it overall
reduces the amount of low-level repository manipulation in ap_verify.
  • Loading branch information
kfindeisen committed Apr 21, 2018
1 parent ab5af45 commit 9fcd047
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 81 deletions.
49 changes: 15 additions & 34 deletions python/lsst/ap/verify/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from __future__ import absolute_import, division, print_function

import os
import shutil
from future.utils import raise_from

from lsst.daf.persistence import Butler
Expand All @@ -34,36 +33,6 @@
from .config import Config


def _nicecopy(src, dst):
"""Recursively copy a directory, ignoring any files that already exist at
the destination.
Parameters
----------
src : `str`
The directory whose contents will be copied. Symbolic links will
be duplicated in `dst`, but will not be followed.
dst : `str`
The directory to which `src` and its contents will be copied.
"""
# Can't use exceptions to distinguish pre-existing directory from I/O failures until Python 3
if not os.path.exists(dst):
os.makedirs(dst)

for baseName in os.listdir(src):
old = os.path.join(src, baseName)
new = os.path.join(dst, baseName)

if not os.path.islink(old) and os.path.isdir(old):
_nicecopy(old, new)
elif not os.path.exists(new):
if os.path.islink(old):
target = os.readlink(old)
os.symlink(target, new)
else:
shutil.copy2(old, new)


class Dataset(object):
"""A dataset supported by ``ap_verify``.
Expand Down Expand Up @@ -243,7 +212,7 @@ def _validatePackage(self):
raise RuntimeError('Dataset is missing reference catalog directory at ' + self.refcatsLocation)
if not os.path.exists(self._stubInputRepo):
raise RuntimeError('Dataset at ' + self.datasetRoot + 'is missing stub repo')
if not os.path.exists(os.path.join(self._stubInputRepo, '_mapper')):
if not _isRepo(self._stubInputRepo):
raise RuntimeError('Stub repo at ' + self._stubInputRepo + 'is missing mapper file')

def makeCompatibleRepo(self, repoDir):
Expand All @@ -257,5 +226,17 @@ def makeCompatibleRepo(self, repoDir):
repoDir : `str`
The directory where the output repository will be created.
"""
# shutil.copytree has wrong behavior for existing destinations, do it by hand
_nicecopy(self._stubInputRepo, repoDir)
if _isRepo(self.templateLocation):
# Stub repo is not a parent because can't mix v1 and v2 repositories in parents list
Butler(inputs=[{"root": self.templateLocation, "mode": "r"}],
outputs=[{"root": repoDir, "mode": "rw"}])
else:
Butler(inputs=[{"root": self._stubInputRepo, "mode": "r"}],
outputs=[{"root": repoDir, "mode": "rw"}])


def _isRepo(repoDir):
"""Test whether a directory has been set up as a repository.
"""
return os.path.exists(os.path.join(repoDir, '_mapper')) \
or os.path.exists(os.path.join(repoDir, 'repositoryCfg.yaml'))
49 changes: 6 additions & 43 deletions python/lsst/ap/verify/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def run(self, dataset, workspace):
self._ingestCalibs(dataset, workspace)
self._ingestDefects(dataset, workspace)
self._ingestRefcats(dataset, workspace)
self._ingestTemplates(dataset, workspace)

def _makeRepos(self, dataset, workspace):
"""Create empty repositories to ingest into.
Expand Down Expand Up @@ -260,7 +259,9 @@ def _doIngestCalibs(self, repo, calibRepo, calibDataFiles):
but may include flats, biases, darks, fringes, or sky. May contain
wildcards.
"""
args = [repo, "--calib", calibRepo, "--mode", "link", "--validity", str(self.config.calibValidity)]
# TODO: --output is workaround for DM-11668
args = [repo, "--calib", calibRepo, "--output", os.path.join(calibRepo, "dummy"),
"--mode", "link", "--validity", str(self.config.calibValidity)]
args.extend(calibDataFiles)
try:
_runIngestTask(self.calibIngester, args)
Expand Down Expand Up @@ -316,7 +317,9 @@ def _doIngestDefects(self, repo, calibRepo, defectTarball):
tarfile.open(defectTarball, "r").extractall(defectDir)
defectFiles = _findMatchingFiles(defectDir, ["*.*"])

defectargs = [repo, "--calib", calibRepo, "--calibType", "defect",
# TODO: --output is workaround for DM-11668
defectargs = [repo, "--calib", calibRepo, "--output", os.path.join(calibRepo, "dummy"),
"--calibType", "defect",
"--mode", "skip", "--validity", str(self.config.defectValidity)]
defectargs.extend(defectFiles)
try:
Expand Down Expand Up @@ -368,46 +371,6 @@ def _doIngestRefcats(self, repo, refcats):
refcatDir = os.path.join(repo, "ref_cats", refcatName)
tarfile.open(tarball, "r").extractall(refcatDir)

def _ingestTemplates(self, dataset, workspace):
"""Ingest the templates for use by LSST.
After this method returns, the data repository in ``workspace`` shall
contain the templates from ``dataset``. Butler operations on the
repository shall not be able to modify ``dataset`` or its template
repository.
Parameters
----------
dataset : `lsst.ap.verify.dataset.Dataset`
The dataset on which the pipeline will be run.
workspace : `lsst.ap.verify.workspace.Workspace`
The location containing all ingestion repositories.
"""
# TODO: this check will need to be rewritten when Butler directories change, ticket TBD
if os.path.exists(os.path.join(workspace.templateRepo, "deepCoadd")) \
or os.path.exists(os.path.join(workspace.templateRepo, "goodSeeingCoadd")):
self.log.info("Templates were previously ingested, skipping...")
else:
self.log.info("Ingesting templates...")
self._doIngestTemplates(workspace.templateRepo, dataset.templateLocation)
self.log.info("Templates are now visible to {0}".format(workspace.dataRepo))

def _doIngestTemplates(self, templateRepo, inputTemplates):
"""Ingest templates into the input repository.
Parameters
----------
templateRepo: `str`
The output repository location on disk for templates. Must exist.
inputTemplates: `str`
The input repository location where templates have been previously computed.
"""
# TODO: chain inputTemplates to templateRepo once DM-12662 resolved
for baseName in os.listdir(inputTemplates):
oldDir = os.path.abspath(os.path.join(inputTemplates, baseName))
if os.path.isdir(oldDir):
os.symlink(oldDir, os.path.join(templateRepo, baseName))


def ingestDataset(dataset, workspace):
"""Ingest the contents of a dataset into a Butler repository.
Expand Down
10 changes: 6 additions & 4 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,9 @@ def testOutput(self):
self._testbed.makeCompatibleRepo(outputDir)
self.assertTrue(os.path.exists(outputDir), 'Output directory must exist.')
self.assertTrue(os.listdir(outputDir), 'Output directory must not be empty.')
self.assertTrue(os.path.exists(os.path.join(outputDir, '_mapper')),
'Output directory must have a _mapper file.')
self.assertTrue(os.path.exists(os.path.join(outputDir, '_mapper')) or
os.path.exists(os.path.join(outputDir, 'repositoryCfg.yaml')),
'Output directory must have a _mapper or repositoryCfg.yaml file.')
finally:
if os.path.exists(testDir):
shutil.rmtree(testDir, ignore_errors=True)
Expand All @@ -107,8 +108,9 @@ def testExistingOutput(self):
self._testbed.makeCompatibleRepo(outputDir)
self.assertTrue(os.path.exists(outputDir), 'Output directory must exist.')
self.assertTrue(os.listdir(outputDir), 'Output directory must not be empty.')
self.assertTrue(os.path.exists(os.path.join(outputDir, '_mapper')),
'Output directory must have a _mapper file.')
self.assertTrue(os.path.exists(os.path.join(outputDir, '_mapper')) or
os.path.exists(os.path.join(outputDir, 'repositoryCfg.yaml')),
'Output directory must have a _mapper or repositoryCfg.yaml file.')
finally:
if os.path.exists(testDir):
shutil.rmtree(testDir, ignore_errors=True)
Expand Down

0 comments on commit 9fcd047

Please sign in to comment.