lsst · r-owen · Jun 28, 2016 · Jun 22, 2016 · Jun 22, 2016 · Jun 22, 2016
diff --git a/python/lsst/pipe/tasks/htmIndexer.py b/python/lsst/pipe/tasks/htmIndexer.py
@@ -19,6 +19,7 @@
 # the GNU General Public License along with this program.  If not,
 # see <https://www.lsstcorp.org/LegalNotices/>.
 #
+from __future__ import absolute_import, division, print_function
 import esutil
 
 

diff --git a/python/lsst/pipe/tasks/indexReferenceTask.py b/python/lsst/pipe/tasks/indexReferenceTask.py
@@ -19,49 +19,17 @@
 # the GNU General Public License along with this program.  If not,
 # see <https://www.lsstcorp.org/LegalNotices/>.
 #
+from __future__ import absolute_import, division, print_function
+import numpy as np
+
 import lsst.pex.config as pexConfig
 import lsst.pipe.base as pipeBase
 import lsst.afw.table as afwTable
 import lsst.afw.coord as afwCoord
 import lsst.afw.geom as afwGeom
 from lsst.afw.image import fluxFromABMag, fluxErrFromABMagErr
 from .htmIndexer import HtmIndexer as Indexer
-
-import numpy
-
-
-class TextReaderConfig(pexConfig.Config):
-    header_lines = pexConfig.Field(
-        dtype=int,
-        default=0,
-        doc='Number of lines to skip when reading the text reference file.'
-    )
-    colnames = pexConfig.ListField(
-        dtype=str,
-        default=[],
-        doc="""An ordered list of column names to use in ingesting the catalog.  With an empty
-list, column names will be discovered from the first line after the skipped header lines."""
-    )
-    delimiter = pexConfig.Field(
-        dtype=str,
-        default=',',
-        doc='Delimiter to use when reading text reference files.  Comma is default.'
-    )
-
-
-class TextReaderTask(pipeBase.Task):
-    _DefaultName = 'TextReaderTask'
-    ConfigClass = TextReaderConfig
-
-    def readFile(self, filename):
-            names = True
-            if self.config.colnames:
-                names = self.config.colnames
-            arr = numpy.genfromtxt(filename, dtype=None, skip_header=self.config.header_lines,
-                                   delimiter=self.config.delimiter,
-                                   names=names)
-            # Just in case someone has only one line in the file.
-            return numpy.atleast_1d(arr)
+from .readCatalog import ReadTextCatalogTask
 
 
 class IngestReferenceRunner(pipeBase.TaskRunner):
@@ -84,7 +52,7 @@ def run(self, parsedCmd):
         result = task.create_indexed_catalog(files)
         if self.doReturnResults:
             return pipeBase.Struct(
-                result = result,
+                result=result,
             )
 
 
@@ -100,7 +68,7 @@ class IngestIndexedReferenceConfig(pexConfig.Config):
         doc='Default HTM level.  Level 8 gives ~0.08 sq deg per trixel.',
     )
     file_reader = pexConfig.ConfigurableField(
-        target=TextReaderTask,
+        target=ReadTextCatalogTask,
         doc='Task to use to read the files.  Default is to expect text files.'
     )
     ra_name = pexConfig.Field(
@@ -201,7 +169,7 @@ def create_indexed_catalog(self, files):
         rec_num = 0
         first = True
         for filename in files:
-            arr = self.file_reader.readFile(filename)
+            arr = self.file_reader.run(filename)
             index_list = self.indexer.index_points(arr[self.config.ra_name], arr[self.config.dec_name])
             if first:
                 schema, key_map = self.make_schema(arr.dtype)
@@ -214,7 +182,7 @@ def create_indexed_catalog(self, files):
             for pixel_id in pixel_ids:
                 dataId = self.make_data_id(pixel_id)
                 catalog = self.get_catalog(dataId, schema)
-                els = numpy.where(index_list == pixel_id)
+                els = np.where(index_list == pixel_id)
                 for row in arr[els]:
                     record = catalog.addNew()
                     rec_num = self._fill_record(record, row, rec_num, key_map)
@@ -230,7 +198,7 @@ def make_data_id(pixel_id):
 
     @staticmethod
     def compute_coord(row, ra_name, dec_name):
-        """!Create a afwCoord object from a numpy.array row
+        """!Create a afwCoord object from a np.array row
         @param[in] row  dict like object with ra/dec info in degrees
         @param[in] ra_name  name of RA key
         @param[in] dec_name  name of Dec key
@@ -310,7 +278,7 @@ def get_catalog(self, dataId, schema):
     def make_schema(self, dtype):
         """!Make the schema to use in constructing the persisted catalogs.
 
-        @param[in] dtype  A numpy.dtype to use in constructing the schema
+        @param[in] dtype  A np.dtype to use in constructing the schema
         @param[out] The schema for the output source catalog.
         @param[out] A map of catalog keys to use in filling the record
         """

diff --git a/python/lsst/pipe/tasks/readCatalog.py b/python/lsst/pipe/tasks/readCatalog.py
@@ -0,0 +1,211 @@
+#
+# LSST Data Management System
+# Copyright 2016 AURA/LSST.
+#
+# This product includes software developed by the
+# LSST Project (http://www.lsst.org/).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the LSST License Statement and
+# the GNU General Public License along with this program.  If not,
+# see <https://www.lsstcorp.org/LegalNotices/>.
+#
+from __future__ import absolute_import, division, print_function
+import numpy as np
+
+from astropy.io import fits
+from astropy.table import Table
+
+import lsst.pex.config as pexConfig
+import lsst.pipe.base as pipeBase
+
+__all__ = ["ReadTextCatalogConfig", "ReadTextCatalogTask", "ReadFitsCatalogConfig", "ReadFitsCatalogTask"]
+
+
+class ReadTextCatalogConfig(pexConfig.Config):
+    header_lines = pexConfig.Field(
+        dtype=int,
+        default=0,
+        doc='Number of lines to skip when reading the text reference file.'
+    )
+    colnames = pexConfig.ListField(
+        dtype=str,
+        default=[],
+        doc="An ordered list of column names to use in ingesting the catalog. "
+            "With an empty list, column names will be discovered from the first line "
+            "after the skipped header lines."
+    )
+    delimiter = pexConfig.Field(
+        dtype=str,
+        default=',',
+        doc='Delimiter to use when reading text reference files.  Comma is default.'
+    )
+
+## \addtogroup LSST_task_documentation
+## \{
+## \page ReadTextCatalogTask
+## \ref ReadTextCatalogTask_ "ReadTextCatalogTask"
+## \copybrief ReadTextCatalogTask
+## \}
+
+
+class ReadTextCatalogTask(pipeBase.Task):
+    """!Read an object catalog from a text file
+
+    @anchor ReadTextCatalogTask_
+
+    @section pipe_tasks_readTextCatalog_Contents  Contents
+
+     - @ref pipe_tasks_readTextCatalog_Purpose
+     - @ref pipe_tasks_readTextCatalog_Initialize
+     - @ref pipe_tasks_readTextCatalog_Config
+     - @ref pipe_tasks_readTextCatalog_Example
+
+    @section pipe_tasks_readTextCatalog_Purpose  Description
+
+    Read an object catalog from a text file. Designed to read foreign catalogs
+    so they can be written out in a form suitable for IngestIndexedReferenceTask.
+
+    @section pipe_tasks_readTextCatalog_Initialize  Task initialisation
+
+    @copydoc \_\_init\_\_
+
+    @section pipe_tasks_readTextCatalog_Config  Configuration parameters
+
+    See @ref ReadTextCatalogConfig
+
+    @section pipe_tasks_readTextCatalog_Example   A complete example of using ReadTextCatalogTask
+
+    Given a file named `table.csv` containing the following:
+
+        ra      dec     flux
+        5.5,    -45.2,  12453
+        19.6,   34.2,   32123
+
+    you can read this file with the following code:
+
+        from lsst.pipe.tasks.readCatalog import ReadTextCatalogTask
+        task = ReadTextCatalogTask()
+        catalogArray = task.run("table.csv")
+
+    The resulting `catalogArray` is a numpy structured array containing three fields
+    ("ra", "dec" and "flux") and two rows of data. For more complex cases,
+    config parameters allow you to specify the names of the columns (instead of using automatic discovery)
+    and set the number of rows to skip.
+    """
+    _DefaultName = 'readCatalog'
+    ConfigClass = ReadTextCatalogConfig
+
+    def run(self, filename):
+        """Read an object catalog from the specified text file
+
+        @param[in] filename  path to text file
+        @return a numpy structured array containing the specified columns
+        """
+        names = True
+        if self.config.colnames:
+            names = self.config.colnames
+        arr = np.genfromtxt(filename, dtype=None, skip_header=self.config.header_lines,
+                            delimiter=self.config.delimiter,
+                            names=names)
+        # Just in case someone has only one line in the file.
+        return np.atleast_1d(arr)
+
+
+class ReadFitsCatalogConfig(pexConfig.Config):
+    hdu = pexConfig.Field(
+        dtype=int,
+        default=1,
+        doc="HDU containing the desired binary table, 0-based but a binary table never occurs in HDU 0",
+    )
+    column_map = pexConfig.DictField(
+        doc="Mapping of input column name: output column name; each specified column must exist, "
+            "but additional columns in the input data are written using their original name. ",
+        keytype=str,
+        itemtype=str,
+        default={},
+    )
+
+## \addtogroup LSST_task_documentation
+## \{
+## \page ReadFitsCatalogTask
+## \ref ReadFitsCatalogTask_ "ReadFitsCatalogTask"
+## \copybrief ReadFitsCatalogTask
+## \}
+
+
+class ReadFitsCatalogTask(pipeBase.Task):
+    """!Read an object catalog from a FITS table
+
+    @anchor ReadFitsCatalogTask_
+
+    @section pipe_tasks_readFitsCatalog_Contents  Contents
+
+     - @ref pipe_tasks_readFitsCatalog_Purpose
+     - @ref pipe_tasks_readFitsCatalog_Initialize
+     - @ref pipe_tasks_readFitsCatalog_Config
+     - @ref pipe_tasks_readFitsCatalog_Example
+
+    @section pipe_tasks_readFitsCatalog_Purpose  Description
+
+    Read an object catalog from a FITS table. Designed to read foreign catalogs
+    so they can be written out in a form suitable for IngestIndexedReferenceTask.
+
+    @section pipe_tasks_readFitsCatalog_Initialize  Task initialisation
+
+    @copydoc \_\_init\_\_
+
+    @section pipe_tasks_readFitsCatalog_Config  Configuration parameters
+
+    See @ref ReadFitsCatalogConfig
+
+    @section pipe_tasks_readFitsCatalog_Example   A complete example of using ReadFitsCatalogTask
+
+    Run the following code from the main directory of pipe_tasks:
+
+        from lsst.pipe.tasks.readCatalog import ReadFitsCatalogTask
+        filePath = "tests/data/testReadFitsCatalog.fits"
+        task = ReadFitsCatalogTask()
+        catalogArray = task.run(filePath)
+
+    The resulting `catalogArray` is a numpy structured array containing fields such as "name", "ra" and "dec"
+    and a few rows of data. For more complicated cases config parameters allow you to rename columns
+    and choose which HDU to read.
+    """
+    _DefaultName = 'readCatalog'
+    ConfigClass = ReadFitsCatalogConfig
+
+    def run(self, filename):
+        """Read an object catalog from the specified FITS file
+
+        @param[in] filename  path to FITS file
+        @return a numpy structured array containing the specified columns
+        """
+        with fits.open(filename) as f:
+            hdu = f[self.config.hdu]
+            if hdu.data is None:
+                raise RuntimeError("No data found in %s HDU %s" % (filename, self.config.hdu))
+            if hdu.is_image:
+                raise RuntimeError("%s HDU %s is an image" % (filename, self.config.hdu))
+
+            if not self.config.column_map:
+                # take the data as it is
+                return hdu.data
+
+            # some columns need to be renamed; use astropy table
+            table = Table(hdu.data, copy=False)
+            missingnames = set(self.config.column_map.keys()) - set(table.colnames)
+            if missingnames:
+                raise RuntimeError("Columns %s in column_map were not found in %s" % (missingnames, filename))
+            for inname, outname in self.config.column_map.iteritems():
+                table.rename_column(inname, outname)
+            return np.array(table)  # convert the astropy table back to a numpy structured array
diff --git a/tests/config.py b/tests/config.py
@@ -1,31 +1,33 @@
 #!/usr/bin/env python
-# 
+#
 # LSST Data Management System
 # Copyright 2008, 2009, 2010 LSST Corporation.
-# 
+#
 # This product includes software developed by the
 # LSST Project (http://www.lsst.org/).
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
-# 
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
-# 
-# You should have received a copy of the LSST License Statement and 
-# the GNU General Public License along with this program.  If not, 
+#
+# You should have received a copy of the LSST License Statement and
+# the GNU General Public License along with this program.  If not,
 # see <http://www.lsstcorp.org/LegalNotices/>.
 #
 import unittest
 
 import lsst.utils.tests as utilsTests
 from lsst.pipe.tasks.calibrate import CalibrateConfig
 
+
 class CalibrateTestCase(unittest.TestCase):
+
     def testGood(self):
         conf = CalibrateConfig()
         conf.validate()
@@ -39,13 +41,15 @@ def testBad(self):
         with self.assertRaises(Exception):
             conf.invalidField = True
 
+
 def suite():
     utilsTests.init()
     suites = []
     suites += unittest.makeSuite(CalibrateTestCase)
     suites += unittest.makeSuite(utilsTests.MemoryTestCase)
     return unittest.TestSuite(suites)
 
+
 def run():
     """Run the tests"""
     utilsTests.run(suite())

diff --git a/tests/data/testReadFitsCatalog.fits b/tests/data/testReadFitsCatalog.fits
diff --git a/tests/data/testReadTextCatalog.csv b/tests/data/testReadTextCatalog.csv
@@ -0,0 +1,3 @@
+name, ra, dec, counts, flux, resolved
+object 1, -5.0, 10.0, 1000, 1.1, 1
+object 2, 45.0, 5.0, 2000, 1.2, 0