Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tickets/dm 8232 #58

Merged
merged 5 commits into from
Jan 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions python/lsst/meas/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from .astrometrySourceSelector import *
from .ingestIndexReferenceTask import *
from .loadIndexedReferenceObjects import *
from .indexerRegistry import *

from .version import *

Expand Down
14 changes: 12 additions & 2 deletions python/lsst/meas/algorithms/htmIndexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ def __init__(self, depth=8):
@param[in] depth depth of the hierarchy to construct
"""
self.htm = esutil.htm.HTM(depth)
# HACK need to call intersect first otherwise it segfaults
self.htm.intersect(1., 2., 0.00001)

def get_pixel_ids(self, ctrCoord, radius):
"""!Get all shards that touch a circular aperture
Expand All @@ -60,3 +58,15 @@ def index_points(self, ra_list, dec_list):
@param[out] A list of pixel ids
"""
return self.htm.lookup_id(ra_list, dec_list)

@staticmethod
def make_data_id(pixel_id, dataset_name):
"""!Make a data id. Meant to be overridden.
@param[in] pixel_id An identifier for the pixel in question.
@param[in] dataset_name Name of the dataset to use.
@param[out] dataId (dictionary)
"""
if pixel_id is None:
# NoneType doesn't format, so make dummy pixel
pixel_id = 0
return {'pixel_id': pixel_id, 'name':dataset_name}
46 changes: 46 additions & 0 deletions python/lsst/meas/algorithms/indexerRegistry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from __future__ import absolute_import, division
#
# LSST Data Management System
# Copyright 2016 LSST Corporation.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.
#
from lsst.pex.config import Config, makeRegistry, Field
from .htmIndexer import HtmIndexer

__all__ = ["IndexerRegistry"]

IndexerRegistry = makeRegistry(
"""Registry of indexing algorithms
"""
)

class HtmIndexerConfig(Config):
depth = Field(
doc = """Depth of the HTM tree to make. Default is depth=7 which gives
~ 0.3 sq. deg. per trixel.""",
dtype = int,
default = 7,
)

def makeHtmIndexer(config):
"""Make an HtmIndexer
"""
return HtmIndexer(depth=config.depth)
makeHtmIndexer.ConfigClass = HtmIndexerConfig
IndexerRegistry.register("HTM", makeHtmIndexer)
44 changes: 21 additions & 23 deletions python/lsst/meas/algorithms/ingestIndexReferenceTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
import lsst.afw.coord as afwCoord
import lsst.afw.geom as afwGeom
from lsst.afw.image import fluxFromABMag, fluxErrFromABMagErr
from .htmIndexer import HtmIndexer as Indexer
from .indexerRegistry import IndexerRegistry
from .readTextCatalogTask import ReadTextCatalogTask

__all__ = ["IngestIndexedReferenceConfig", "IngestIndexedReferenceTask"]
__all__ = ["IngestIndexedReferenceConfig", "IngestIndexedReferenceTask", "DatasetConfig"]


class IngestReferenceRunner(pipeBase.TaskRunner):
Expand All @@ -58,17 +58,21 @@ def run(self, parsedCmd):
result=result,
)


class IngestIndexedReferenceConfig(pexConfig.Config):
class DatasetConfig(pexConfig.Config):
ref_dataset_name = pexConfig.Field(
dtype=str,
default='cal_ref_cat',
doc='String to pass to the butler to retrieve persisted files.',
)
level = pexConfig.Field(
dtype=int,
default=8,
doc='Default HTM level. Level 8 gives ~0.08 sq deg per trixel.',
indexer = IndexerRegistry.makeField(
default='HTM',
doc='Name of indexer algoritm to use. Default is HTM',
)

class IngestIndexedReferenceConfig(pexConfig.Config):
dataset_config = pexConfig.ConfigField(
dtype=DatasetConfig,
doc="Configuration for reading the ingested data",
)
file_reader = pexConfig.ConfigurableField(
target=ReadTextCatalogTask,
Expand Down Expand Up @@ -160,7 +164,7 @@ def __init__(self, *args, **kwargs):
"""
self.butler = kwargs.pop('butler')
pipeBase.Task.__init__(self, *args, **kwargs)
self.indexer = Indexer(self.config.level)
self.indexer = IndexerRegistry[self.config.dataset_config.indexer.name](self.config.dataset_config.indexer.active)
self.makeSubtask('file_reader')

def create_indexed_catalog(self, files):
Expand All @@ -177,27 +181,21 @@ def create_indexed_catalog(self, files):
if first:
schema, key_map = self.make_schema(arr.dtype)
# persist empty catalog to hold the master schema
dataId = self.make_data_id('master_schema')
self.butler.put(self.get_catalog(dataId, schema), self.config.ref_dataset_name,
dataId = self.indexer.make_data_id('master_schema', self.config.dataset_config.ref_dataset_name)
self.butler.put(self.get_catalog(dataId, schema), 'ref_cat',
dataId=dataId)
first = False
pixel_ids = set(index_list)
for pixel_id in pixel_ids:
dataId = self.make_data_id(pixel_id)
dataId = self.indexer.make_data_id(pixel_id, self.config.dataset_config.ref_dataset_name)
catalog = self.get_catalog(dataId, schema)
els = np.where(index_list == pixel_id)
for row in arr[els]:
record = catalog.addNew()
rec_num = self._fill_record(record, row, rec_num, key_map)
self.butler.put(catalog, self.config.ref_dataset_name, dataId=dataId)

@staticmethod
def make_data_id(pixel_id):
"""!Make a data id. Meant to be overridden.
@param[in] pixel_id An identifier for the pixel in question.
@param[out] dataId (dictionary)
"""
return {'pixel_id': pixel_id}
self.butler.put(catalog, 'ref_cat', dataId=dataId)
dataId = self.indexer.make_data_id(None, self.config.dataset_config.ref_dataset_name)
self.butler.put(self.config.dataset_config, 'ref_cat_config', dataId=dataId)

@staticmethod
def compute_coord(row, ra_name, dec_name):
Expand Down Expand Up @@ -284,8 +282,8 @@ def get_catalog(self, dataId, schema):
@param[in] schema Schema to use in catalog creation if the butler can't get it
@param[out] afwTable.SourceCatalog for the specified identifier
"""
if self.butler.datasetExists(self.config.ref_dataset_name, dataId=dataId):
return self.butler.get(self.config.ref_dataset_name, dataId=dataId)
if self.butler.datasetExists('ref_cat', dataId=dataId):
return self.butler.get('ref_cat', dataId=dataId)
return afwTable.SourceCatalog(schema)

def make_schema(self, dtype):
Expand Down
27 changes: 13 additions & 14 deletions python/lsst/meas/algorithms/loadIndexedReferenceObjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,27 @@
import lsst.afw.table as afwTable
import lsst.pex.config as pexConfig
import lsst.pipe.base as pipeBase
from .indexerRegistry import IndexerRegistry
__all__ = ["LoadIndexedReferenceObjectsConfig", "LoadIndexedReferenceObjectsTask"]


class LoadIndexedReferenceObjectsConfig(LoadReferenceObjectsConfig):
ingest_config_name = pexConfig.Field(
ref_dataset_name = pexConfig.Field(
dtype=str,
default='IngestIndexedReferenceTask_config',
doc='Name of the config dataset used to ingest the reference'
)
default='cal_ref_cat',
doc='Name of the ingested reference dataset'
)


class LoadIndexedReferenceObjectsTask(LoadReferenceObjectsTask):
ConfigClass = LoadIndexedReferenceObjectsConfig
_DefaultName = 'LoadIndexedReferenceObjectsTask'

def __init__(self, butler, ingest_factory=IngestIndexedReferenceTask, *args, **kwargs):
def __init__(self, butler, *args, **kwargs):
LoadReferenceObjectsTask.__init__(self, *args, **kwargs)
ingest_config = butler.get(self.config.ingest_config_name, immediate=True)
ingester = ingest_factory(butler=butler, config=ingest_config)
self.indexer = ingester.indexer
self.make_data_id = ingester.make_data_id
self.ref_dataset_name = ingester.config.ref_dataset_name
dataset_config = butler.get("ref_cat_config", name=self.config.ref_dataset_name, immediate=True)
self.indexer = IndexerRegistry[dataset_config.indexer.name](dataset_config.indexer.active)
self.ref_dataset_name = dataset_config.ref_dataset_name
self.butler = butler

@pipeBase.timeMethod
Expand All @@ -70,7 +69,7 @@ def loadSkyCircle(self, ctrCoord, radius, filterName=None):
"""
id_list, boundary_mask = self.indexer.get_pixel_ids(ctrCoord, radius)
shards = self.get_shards(id_list)
refCat = self.butler.get(self.ref_dataset_name, dataId=self.make_data_id('master_schema'),
refCat = self.butler.get('ref_cat', dataId=self.indexer.make_data_id('master_schema', self.ref_dataset_name),
immediate=True)
self._addFluxAliases(refCat.schema)
fluxField = getRefFluxField(schema=refCat.schema, filterName=filterName)
Expand Down Expand Up @@ -113,9 +112,9 @@ def get_shards(self, id_list):
"""
shards = []
for pixel_id in id_list:
if self.butler.datasetExists(self.ref_dataset_name, dataId=self.make_data_id(pixel_id)):
shards.append(self.butler.get(self.ref_dataset_name,
dataId=self.make_data_id(pixel_id), immediate=True))
if self.butler.datasetExists('ref_cat', dataId=self.indexer.make_data_id(pixel_id, self.ref_dataset_name)):
shards.append(self.butler.get('ref_cat',
dataId=self.indexer.make_data_id(pixel_id, self.ref_dataset_name), immediate=True))
return shards

def _trim_to_circle(self, catalog_shard, ctrCoord, radius):
Expand Down
32 changes: 25 additions & 7 deletions tests/testHtmIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import lsst.daf.persistence as dafPersist
from lsst.meas.algorithms import (IngestIndexedReferenceTask, LoadIndexedReferenceObjectsTask,
LoadIndexedReferenceObjectsConfig, getRefFluxField)
from lsst.meas.algorithms import IndexerRegistry
import lsst.utils

obs_test_dir = lsst.utils.getPackageDir('obs_test')
Expand Down Expand Up @@ -118,6 +119,10 @@ def setUpClass(cls):
cls.test_decs = [-90., -51., -30.1, 0., 27.3, 62., 90.]
cls.search_radius = 3. * afwGeom.degrees
cls.comp_cats = {} # dict of center coord: list of IDs of stars within cls.search_radius of center
config = IndexerRegistry['HTM'].ConfigClass()
# Match on disk comparison file
config.depth = 8
cls.indexer = IndexerRegistry['HTM'](config)
for ra in cls.test_ras:
for dec in cls.test_decs:
tupl = (ra, dec)
Expand All @@ -130,6 +135,8 @@ def setUpClass(cls):

cls.test_repo_path = cls.out_path+"/test_repo"
config = IngestIndexedReferenceTask.ConfigClass()
# To match on disk test data
config.dataset_config.indexer.active.depth = 8
config.ra_name = 'ra_icrs'
config.dec_name = 'dec_icrs'
config.mag_column_list = ['a', 'b']
Expand Down Expand Up @@ -166,10 +173,10 @@ def testSanity(self):

def testAgainstPersisted(self):
pix_id = 671901
data_id = IngestIndexedReferenceTask.make_data_id(pix_id)
dataset_name = IngestIndexedReferenceTask.ConfigClass().ref_dataset_name
self.assertTrue(self.test_butler.datasetExists(dataset_name, data_id))
ref_cat = self.test_butler.get(dataset_name, data_id)
dataset_name = IngestIndexedReferenceTask.ConfigClass().dataset_config.ref_dataset_name
data_id = self.indexer.make_data_id(pix_id, dataset_name)
self.assertTrue(self.test_butler.datasetExists('ref_cat', data_id))
ref_cat = self.test_butler.get('ref_cat', data_id)
ex1 = ref_cat.extract('*')
ex2 = self.test_cat.extract('*')
# compare sets as the order may be different
Expand Down Expand Up @@ -206,8 +213,8 @@ def testIngest(self):
default_config.dec_name = 'dec'
default_config.mag_column_list = ['a', 'b']
default_config.mag_err_column_map = {'a': 'a_err', 'b': 'b_err'}
default_config.ref_dataset_name = 'other_photo_astro_ref'
default_config.level = 10
default_config.dataset_config.ref_dataset_name = 'myrefcat'
default_config.dataset_config.indexer.active.depth = 10
default_config.is_photometric_name = 'is_phot'
default_config.is_resolved_name = 'is_res'
default_config.is_variable_name = 'is_var'
Expand All @@ -222,13 +229,24 @@ def testIngest(self):
args=[input_dir, "--output", self.out_path+"/output_override",
self.sky_catalog_file_delim], config=default_config)

# Test if we can get back the catalog with a non-standard dataset name
butler = dafPersist.Butler(self.out_path+"/output_override")
config = LoadIndexedReferenceObjectsConfig()
config.ref_dataset_name = "myrefcat"
loader = LoadIndexedReferenceObjectsTask(butler=butler, config=config)
# This location is known to have objects
tupl = (93.0, -90.0)
cent = make_coord(*tupl)
cat = loader.loadSkyCircle(cent, self.search_radius, filterName='a')
self.assertTrue(len(cat) > 0)

def testLoadIndexedReferenceConfig(self):
"""Make sure LoadIndexedReferenceConfig has needed fields."""
"""
Including at least one from the base class LoadReferenceObjectsConfig
"""
config = LoadIndexedReferenceObjectsConfig()
self.assertEqual(config.ingest_config_name, "IngestIndexedReferenceTask_config")
self.assertEqual(config.ref_dataset_name, "cal_ref_cat")
self.assertEqual(config.defaultFilter, "")

def testLoadSkyCircle(self):
Expand Down