Skip to content

Commit

Permalink
Reorganization to split file reading from indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonKrughoff committed Mar 31, 2016
1 parent ee000d0 commit bbd76ce
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 96 deletions.
4 changes: 2 additions & 2 deletions bin.src/ingestReferenceCatalog.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/usr/bin/env python
from lsst.pipe.tasks.htmIndexEngine import HtmIndexEngine
HtmIndexEngine.parseAndRun()
from lsst.pipe.tasks.indexReferenceTask import IngestIndexedReferenceTask
IngestIndexedReferenceTask.parseAndRun()
58 changes: 58 additions & 0 deletions python/lsst/pipe/tasks/htmIndexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# LSST Data Management System
# Copyright 2008-2016 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <https://www.lsstcorp.org/LegalNotices/>.
#
import esutil


class HtmIndexer(object):
def __init__(self, depth=8):
"""!Construct the indexer object
@param[in] depth depth of the hierarchy to construct
"""
self.htm = esutil.htm.HTM(depth)
# HACK need to call intersect first otherwise it segfaults
self.htm.intersect(1., 2., 0.00001)

def get_pixel_ids(self, ctrCoord, radius):
"""!Get all shards that touch a circular aperture
@param[in] ctrCoord afwCoord.Coord object of the center of the aperture
@param[in] radius afwGeom.Angle object of the aperture radius
@param[out] A pipeBase.Struct with the list of shards, shards, and a boolean arry, boundary_mask,
indicating whether the shard touches the boundary (True) or is fully contained (False).
"""
pixel_id_list = self.htm.intersect(ctrCoord.getRa().asDegrees(), ctrCoord.getDec().asDegrees(),
radius.asDegrees(), inclusive=True)
covered_pixel_id_list = self.htm.intersect(ctrCoord.getRa().asDegrees(),
ctrCoord.getDec().asDegrees(),
radius.asDegrees(), inclusive=False)
is_on_boundary = (pixel_id not in covered_pixel_id_list for pixel_id in pixel_id_list)
return pixel_id_list, is_on_boundary

def index_points(self, ra_list, dec_list):
"""!Generate trixel ids for each row in an input file
@param[in] ra_list List of RA coordinate in degrees
@param[in] dec_list List of Dec coordinate in degrees
@param[out] A list of pixel ids
"""
return self.htm.lookup_id(ra_list, dec_list)
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,84 @@
import lsst.afw.coord as afwCoord
import lsst.afw.geom as afwGeom
from lsst.afw.image import fluxFromABMag, fluxErrFromABMagErr
from .htmIndexer import HtmIndexer as Indexer

import esutil
import numpy


class TextReaderConfig(pexConfig.Config):
header_lines = pexConfig.Field(
dtype=int,
default=0,
doc='Number of lines to skip when reading the text reference file.'
)
colnames = pexConfig.ListField(
dtype=str,
default=[],
doc="""An ordered list of column names to use in ingesting the catalog. With an empty
list, column names will be discovered from the first line after the skipped header lines."""
)
delimiter = pexConfig.Field(
dtype=str,
default=',',
doc='Delimiter to use when reading text reference files. Comma is default.'
)


class TextReaderTask(pipeBase.Task):
_DefaultName = 'TextReaderTask'
ConfigClass = TextReaderConfig

def readFile(self, filename):
names = True
if self.config.colnames:
names = self.config.colnames
arr = numpy.genfromtxt(filename, dtype=None, skip_header=self.config.header_lines,
delimiter=self.config.delimiter,
names=names)
# Just in case someone has only one line in the file.
return numpy.atleast_1d(arr)


class IngestReferenceRunner(pipeBase.TaskRunner):
"""!Task runner for the reference catalog ingester
Data IDs are ignored so the runner should just run the task on the parsed command.
"""
def run(self, parsedCmd):
"""!Run the task.
Several arguments need to be collected to send on to the task methods.
@param[in] parsedCmd Parsed command including command line arguments.
@param[out] Struct containing the result of the indexing.
"""
files = parsedCmd.files
butler = parsedCmd.butler
task = self.TaskClass(config=self.config, log=self.log, butler=butler)
task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)

result = task.create_indexed_catalog(files)
if self.doReturnResults:
return pipeBase.Struct(
result = result,
)


class IngestIndexedReferenceConfig(pexConfig.Config):
ref_dataset_name = pexConfig.Field(
dtype=str,
default='cal_ref_cat',
doc='String to pass to the butler to retrieve persisted files.',
)
level = pexConfig.Field(
dtype=int,
default=8,
doc='Default HTM level. Level 8 gives ~0.08 sq deg per trixel.',
)
file_reader = pexConfig.ConfigurableField(
target=TextReaderTask,
doc='Task to use to read the files. Default is to expect text files.'
)
ra_name = pexConfig.Field(
dtype=str,
doc="Name of RA column",
Expand Down Expand Up @@ -80,27 +147,6 @@ class IngestIndexedReferenceConfig(pexConfig.Config):
default=[],
doc='Extra columns to add to the reference catalog.'
)
header_lines = pexConfig.Field(
dtype=int,
default=0,
doc='Number of lines to skip when reading the text reference file.'
)
colnames = pexConfig.ListField(
dtype=str,
default=[],
doc="""An ordered list of column names to use in ingesting the catalog. With an empty
list, column names will be discovered from the first line after the skipped header lines."""
)
delimiter = pexConfig.Field(
dtype=str,
default=',',
doc='Delimiter to use when reading text reference files. Comma is default.'
)
level = pexConfig.Field(
dtype=int,
default=8,
doc='Default HTM level. Level 8 gives ~0.08 sq deg per trixel.',
)

def validate(self):
pexConfig.Config.validate(self)
Expand All @@ -111,28 +157,6 @@ def validate(self):
raise ValueError("If magnitude errors are provided, all magnitudes must have an error column")


class IngestReferenceRunner(pipeBase.TaskRunner):
"""!Task runner for the reference catalog ingester
"""
def run(self, parsedCmd):
"""!Run the task.
Several arguments need to be collected to send on to the task methods.
@param[in] parsedCmd Parsed command including command line arguments.
@param[out] Struct containing the result of the indexing.
"""
files = parsedCmd.files
butler = parsedCmd.butler
task = self.TaskClass(config=self.config, log=self.log, butler=butler)
task.writeConfig(parsedCmd.butler, clobber=self.clobberConfig, doBackup=self.doBackup)

result = task.create_indexed_catalog(files)
if self.doReturnResults:
return pipeBase.Struct(
result = result,
)


class IngestIndexedReferenceTask(pipeBase.CmdLineTask):
"""!Class for both producing indexed reference catalogs and for loading them.
Expand All @@ -141,6 +165,7 @@ class IngestIndexedReferenceTask(pipeBase.CmdLineTask):
shards. In this case each shard contains the entries from the catalog in a single
HTM trixel
"""
canMultiprocess = False
ConfigClass = IngestIndexedReferenceConfig
RunnerClass = IngestReferenceRunner
_DefaultName = 'IngestIndexedReferenceTask'
Expand All @@ -150,8 +175,10 @@ class IngestIndexedReferenceTask(pipeBase.CmdLineTask):
@classmethod
def _makeArgumentParser(cls):
"""Create an argument parser
This overrides the original because we need the file arguments
"""
parser = pipeBase.ArgumentParser(name=cls._DefaultName)
parser = pipeBase.InputOnlyArgumentParser(name=cls._DefaultName)
parser.add_argument("files", nargs="+", help="Names of files to index")
return parser

Expand All @@ -162,7 +189,8 @@ def __init__(self, *args, **kwargs):
"""
self.butler = kwargs.pop('butler')
pipeBase.Task.__init__(self, *args, **kwargs)
self.indexer = HtmIndexer(self.config.level)
self.indexer = Indexer(self.config.level)
self.makeSubtask('file_reader')

def create_indexed_catalog(self, files):
"""!Index a set of files comprising a reference catalog. Outputs are persisted in the
Expand All @@ -173,14 +201,7 @@ def create_indexed_catalog(self, files):
rec_num = 0
first = True
for filename in files:
names = True
if self.config.colnames:
names = self.config.colnames
arr = numpy.genfromtxt(filename, dtype=None, skip_header=self.config.header_lines,
delimiter=self.config.delimiter,
names=names)
# Just in case someone has only one line in the file.
arr = numpy.atleast1d(arr)
arr = self.file_reader.readFile(filename)
index_list = self.indexer.index_points(arr[self.config.ra_name], arr[self.config.dec_name])
if first:
schema, key_map = self.make_schema(arr.dtype)
Expand Down Expand Up @@ -325,39 +346,3 @@ def add_field(name):
for col in self.config.extra_col_names:
key_map[col] = add_field(col)
return schema, key_map


class HtmIndexer(object):
def __init__(self, depth=8):
"""!Construct the indexer object
@param[in] depth depth of the hierarchy to construct
"""
self.htm = esutil.htm.HTM(depth)
# HACK need to call intersect first otherwise it segfaults
self.htm.intersect(1., 2., 0.00001)

def get_pixel_ids(self, ctrCoord, radius):
"""!Get all shards that touch a circular aperture
@param[in] ctrCoord afwCoord.Coord object of the center of the aperture
@param[in] radius afwGeom.Angle object of the aperture radius
@param[out] A pipeBase.Struct with the list of shards, shards, and a boolean arry, boundary_mask,
indicating whether the shard touches the boundary (True) or is fully contained (False).
"""
pixel_id_list = self.htm.intersect(ctrCoord.getRa().asDegrees(), ctrCoord.getDec().asDegrees(),
radius.asDegrees(), inclusive=True)
covered_pixel_id_list = self.htm.intersect(ctrCoord.getRa().asDegrees(),
ctrCoord.getDec().asDegrees(),
radius.asDegrees(), inclusive=False)
is_on_boundary = (pixel_id not in covered_pixel_id_list for pixel_id in pixel_id_list)
return pixel_id_list, is_on_boundary

def index_points(self, ra_list, dec_list):
"""!Generate trixel ids for each row in an input file
@param[in] ra_list List of RA coordinate in degrees
@param[in] dec_list List of Dec coordinate in degrees
@param[out] A list of pixel ids
"""
return self.htm.lookup_id(ra_list, dec_list)
8 changes: 4 additions & 4 deletions tests/testHtmIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import lsst.afw.geom as afwGeom
import lsst.afw.coord as afwCoord
import lsst.daf.persistence as dafPersist
from lsst.pipe.tasks.htmIndexEngine import IngestIndexedReferenceTask
from lsst.pipe.tasks.indexReferenceTask import IngestIndexedReferenceTask
from lsst.pipe.tasks.loadIndexedReferenceObjects import LoadIndexedReferenceObjectsTask

obs_test_dir = lsst.utils.getPackageDir('obs_test')
Expand Down Expand Up @@ -179,10 +179,10 @@ def testIngest(self):
default_config.is_variable_name = 'is_var'
default_config.id_name = 'id'
default_config.extra_col_names = ['val1', 'val2', 'val3']
default_config.header_lines = 1
default_config.colnames = ['id', 'ra', 'dec', 'a', 'a_err', 'b', 'b_err', 'is_phot', 'is_res',
default_config.file_reader.header_lines = 1
default_config.file_reader.colnames = ['id', 'ra', 'dec', 'a', 'a_err', 'b', 'b_err', 'is_phot', 'is_res',
'is_var', 'val1', 'val2', 'val3']
default_config.delimiter = '|'
default_config.file_reader.delimiter = '|'
# this also tests changing the delimiter
IngestIndexedReferenceTask.parseAndRun(
args=[input_dir, "--output", self.out_path+"/output_override",
Expand Down

0 comments on commit bbd76ce

Please sign in to comment.