Skip to content

Commit

Permalink
Add Gen 3 repository.
Browse files Browse the repository at this point in the history
The repository passes basic consistency checks but, like the rest of
testdata, is not guaranteed to be useful for analysis.
  • Loading branch information
kfindeisen committed Jun 4, 2020
1 parent 046e974 commit e8b6a03
Show file tree
Hide file tree
Showing 18 changed files with 366 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
*.fits filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.fz filter=lfs diff=lfs merge=lfs -text
*.sqlite3 filter=lfs diff=lfs merge=lfs -text

# These look like fits files, but are really symbolic links
preloaded/calib/LSST-ImSim/bias !filter !diff !merge
preloaded/calib/LSST-ImSim/flat !filter !diff !merge
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ path | description
`config` | Dataset-specific configs to help Stack code work with this dataset.
`templates` | To be populated with `TemplateCoadd` images produced by a compatible version of the LSST pipelines. Must be organized as a filesystem-based Butler repo. Currently empty.
`repo` | A template for a Butler raw data repository. This directory must never be written to; instead, it should be copied to a separate location, and data ingested into the copy (this is handled automatically by `ap_verify`, see below). Currently contains the appropriate `obs_lsst` `_mapper` file.
`preloaded` | A Gen 3 Butler repository containing the data in `calib` and `refcats`.
`refcats` | A small Gaia reference catalog.
`scripts` | A custom script for generating the `preloaded` directory.
`dataIds.list` | List of dataIds in this repo. For use in running Tasks. Currently set to run all Ids.


Expand Down
3 changes: 3 additions & 0 deletions config/convertRepo_calibs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Config overrides for convert_gen2_repo_to_gen3.py

config.datasetIgnorePatterns.extend(["raw", "*Coadd_skyMap", "ref_cat", "defects"])
12 changes: 12 additions & 0 deletions config/convertRepo_copied.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

# Config overrides for convert_gen2_repo_to_gen3.py

config.datasetIncludePatterns = ["ref_cat", "defects"]

config.refCats = ['gaia']
for refcat in config.refCats:
config.runs[refcat] = "refcats"

# Already stored in convertRepo_calibs.py
config.doRegisterInstrument = False
config.doWriteCuratedCalibrations = False
158 changes: 158 additions & 0 deletions config/export.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
description: Butler Data Repository Export
version: 0
data:
- type: dimension
element: instrument
records:
- name: LSST-ImSim
visit_max: 9999999999
exposure_max: 9999999999
detector_max: 999
class_name: lsst.obs.lsst.LsstImSim
- type: dimension
element: calibration_label
records:
- instrument: LSST-ImSim
name: unbounded
datetime_begin: 1969-12-31 23:59:51.999918
datetime_end: 2099-12-31 23:59:23
- instrument: LSST-ImSim
name: gen2/bias_2022-01-01_037
datetime_begin: 1994-08-16 23:59:31
datetime_end: 2049-05-18 23:59:23
- instrument: LSST-ImSim
name: gen2/flat_2022-08-06_037_i
datetime_begin: 1995-03-21 23:59:31
datetime_end: 2049-12-21 23:59:23
- type: dimension
element: detector
records:
- instrument: LSST-ImSim
id: 37
full_name: R11_S01
name_in_raft: S01
raft: R11
purpose: SCIENCE
- type: dimension
element: physical_filter
records:
- instrument: LSST-ImSim
name: i
abstract_filter: i
- type: dataset_type
name: camera
dimensions:
- instrument
- calibration_label
storage_class: Camera
- type: run
name: calib/LSST-ImSim
- type: dataset
dataset_type: camera
run: calib/LSST-ImSim
records:
- dataset_id:
- 1
data_id:
- instrument: LSST-ImSim
calibration_label: unbounded
path: calib/LSST-ImSim/camera/camera_unbounded_LSST-ImSim_calib_LSST-ImSim.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- type: dataset_type
name: bias
dimensions:
- instrument
- calibration_label
- detector
storage_class: ExposureF
- type: run
name: calib/LSST-ImSim
- type: dataset
dataset_type: bias
run: calib/LSST-ImSim
records:
- dataset_id:
- 2
data_id:
- instrument: LSST-ImSim
calibration_label: gen2/bias_2022-01-01_037
detector: 37
path: calib/LSST-ImSim/bias/bias_gen2_bias_2022-01-01_037_37_LSST-ImSim_calib_LSST-ImSim.fits
formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
- type: dataset_type
name: flat
dimensions:
- abstract_filter
- instrument
- calibration_label
- detector
- physical_filter
storage_class: ExposureF
- type: run
name: calib/LSST-ImSim
- type: dataset
dataset_type: flat
run: calib/LSST-ImSim
records:
- dataset_id:
- 3
data_id:
- instrument: LSST-ImSim
calibration_label: gen2/flat_2022-08-06_037_i
detector: 37
physical_filter: i
path: calib/LSST-ImSim/flat/i/i/flat_i_i_gen2_flat_2022-08-06_037_i_37_LSST-ImSim_calib_LSST-ImSim.fits
formatter: lsst.obs.base.fitsExposureFormatter.FitsExposureFormatter
- type: dataset_type
name: gaia
dimensions:
- htm7
storage_class: SimpleCatalog
- type: run
name: refcats
- type: dataset
dataset_type: gaia
run: refcats
records:
- dataset_id:
- 4
data_id:
- htm7: 232347
path: refcats/gaia/gaia_232347_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- dataset_id:
- 5
data_id:
- htm7: 232324
path: refcats/gaia/gaia_232324_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- dataset_id:
- 6
data_id:
- htm7: 232376
path: refcats/gaia/gaia_232376_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- dataset_id:
- 7
data_id:
- htm7: 232327
path: refcats/gaia/gaia_232327_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- dataset_id:
- 8
data_id:
- htm7: 232326
path: refcats/gaia/gaia_232326_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- dataset_id:
- 9
data_id:
- htm7: 232344
path: refcats/gaia/gaia_232344_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
- dataset_id:
- 10
data_id:
- htm7: 232345
path: refcats/gaia/gaia_232345_refcats.fits
formatter: lsst.obs.base.fitsGenericFormatter.FitsGenericFormatter
7 changes: 7 additions & 0 deletions preloaded/butler.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
datastore:
cls: lsst.daf.butler.datastores.posixDatastore.PosixDatastore
records:
table: posix_datastore_records
root: <butlerRoot>
registry:
db: sqlite:///<butlerRoot>/gen3.sqlite3
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/gen3.sqlite3
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232324_refcats.fits
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232326_refcats.fits
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232327_refcats.fits
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232344_refcats.fits
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232345_refcats.fits
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232347_refcats.fits
Git LFS file not shown
3 changes: 3 additions & 0 deletions preloaded/refcats/gaia/gaia_232376_refcats.fits
Git LFS file not shown
150 changes: 150 additions & 0 deletions scripts/add_gen3_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env python

"""Convert a Gen 2 dataset to a Gen 3 dataset.
By default, this creates a hybrid Gen 2/3 dataset with shared files. A flag
lets a dataset be permanently migrated to Gen 3 instead.
"""

import argparse
import os
import shutil
import tempfile

import lsst.log
import lsst.daf.butler as daf_butler
from lsst.obs.base.script import convert
import lsst.ap.verify as ap_verify


# Hack to ensure script knows about this dataset
ap_verify.config.Config.instance._allInfo['datasets.test'] = 'ap_verify_testdata'


class _Parser(argparse.ArgumentParser):
def __init__(self, **kwargs):
# super() causes problems with program name
argparse.ArgumentParser.__init__(
self,
description="Copy the test dataset's Gen 2 files into the Gen 3 format, overwriting a previous "
"copy if necessary. This creates a hybrid Gen 2/3 dataset unless the --drop-gen2 "
"flag is provided. DO NOT delete the Gen 2 files unless this flag has been used, "
"as the Gen 3 part of a hybrid dataset depends on them.\n\n"
"Assumes that the dataset's config directory has two configs for "
"obs.base.gen2to3.ConvertRepoTask: convertRepo_calibs.py and convertRepo_copied.py. "
"See ap_verify_dataset_template/config for examples.",
**kwargs)
self.add_argument("--drop-gen2", action="store_true",
help="Create a standalone Gen 3 repo instead of sharing files with Gen 2. "
"Intended for use only once ap_verify no longer supports Gen 2.")


def main():
args = _Parser().parse_args()
log = lsst.log.Log.getLogger("add_gen3_repo")

# To convert consistently, don't use any previous output
dataset = ap_verify.dataset.Dataset("test")
gen3_repo = os.path.join(dataset.datasetRoot, "preloaded")
if os.path.exists(gen3_repo):
log.warn("Clearing out %s and making it from scratch...", gen3_repo)
shutil.rmtree(gen3_repo)
os.makedirs(gen3_repo)

mode = "copy" if args.drop_gen2 else "relsymlink"

log.info("Converting calibs...")
with tempfile.TemporaryDirectory() as tmp:
workspace = ap_verify.workspace.Workspace(tmp)
ap_verify.ingestion.ingestDataset(dataset, workspace)

gen2_repo = workspace.dataRepo
gen2_calibs = workspace.calibRepo
# Files stored in the Gen 2 part of the dataset, can be safely linked
_migrate_gen2_to_gen3(dataset, gen2_repo, gen2_calibs, gen3_repo, mode,
config_file="convertRepo_calibs.py")
# Our refcats and defects are temporary files, and must not be linked
_migrate_gen2_to_gen3(dataset, gen2_repo, gen2_calibs, gen3_repo, mode="copy",
config_file="convertRepo_copied.py")

log.info("Exporting Gen 3 registry to configure new repos...")
_export_for_copy(dataset, gen3_repo)


def _migrate_gen2_to_gen3(dataset, gen2_repo, gen2_calib_repo, gen3_repo, mode, config_file):
"""Convert a Gen 2 repository into a Gen 3 repository.
Parameters
----------
dataset : `lsst.ap.verify.dataset.Dataset`
The dataset being migrated.
gen2_repo, gen2_calib_repo : `str`
The locations of the original repositories.
gen3_repo : `str`
The location of the Gen 3 repository. Must exist, but need not be
initialized as a repository.
mode : {'relsymlink', 'copy'}
Whether the Gen 3 repo should contain symbolic links to the Gen 2
datasets, or an independent copy.
config_file : `str`
The config file (in the dataset config directory) with a configuration
for `~lsst.obs.base.gen2to3.ConvertRepoTask`
"""
instrument = _get_instrument_class(dataset.camera)
config = os.path.join(dataset.configLocation, config_file)

# Call the script instead of calling ConvertRepoTask directly, to
# avoid manually having to do a lot of setup that may change in the future
# calib/<instrument>, refcats, and skymaps collections created by default
convert(gen3_repo, gen2_repo, instrument,
skymap_name=None, skymap_config=None, reruns=None,
calibs=gen2_calib_repo,
config_file=config,
transfer=mode)


def _export_for_copy(dataset, repo):
"""Export a Gen 3 repository so that a dataset can make copies later.
Parameters
----------
dataset : `lsst.ap.verify.dataset.Dataset`
The dataset needing the ability to copy the repository.
repo : `str`
The location of the Gen 3 repository.
"""
butler = daf_butler.Butler(repo)
with butler.export(directory=dataset.configLocation, format="yaml") as contents:
contents.saveDatasets(butler.registry.queryDatasets(datasetType=..., collections=..., expand=True))


def _get_instrument_class(instrument):
"""Convert a Gen 2 instrument name to a Gen 3 instrument class.
Parameters
----------
instrument : `str`
A name in the format returned by
`lsst.obs.base.CameraMapper.getCameraName`.
Returns
-------
instrumentClass : `str`
The fully-qualified `~lsst.obs.base.Instrument` class for the
corresponding instrument.
"""
classes = {
"decam": "lsst.obs.decam.DarkEnergyCamera",
"hsc": "lsst.obs.subaru.HyperSuprimeCam",
"imsim": "lsst.obs.lsst.LsstImSim",
}

try:
return classes[instrument]
except KeyError:
raise ValueError(f"Unsupported instrument {instrument}; consider adding it to the "
"ap_verify_testdata's add_gen3_repo.py script.")


if __name__ == "__main__":
main()

0 comments on commit e8b6a03

Please sign in to comment.