Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-13477: Move association math from DIAObjectCollection into AssociationTask #11

Merged
merged 2 commits into from
Feb 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
159 changes: 156 additions & 3 deletions python/lsst/ap/association/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@

from __future__ import absolute_import, division, print_function

import numpy as np

import lsst.pex.config as pexConfig
import lsst.pipe.base as pipeBase
import lsst.afw.geom as afwGeom
import lsst.afw.table as afwTable
from .assoc_db_sqlite import AssociationDBSqliteTask
from .dia_object import DIAObject

__all__ = ["AssociationConfig", "AssociationTask"]

Expand Down Expand Up @@ -117,10 +121,21 @@ def associate_sources(self, dia_collection, dia_sources):
DIASources into.
dia_sources : lsst.afw.table.SourceCatalog
DIASources to associate into the DIAObjectCollection.

Returns
-------
lsst.pipe.base.Struct
struct containing:
* dia_collection: A DIAObjectCollectoin containing the new and
updated DIAObjects.
* updated_ids: id of the DIAObject in this DIAObjectCollection that
the given source matched.
"""
scores = dia_collection.score(
dia_sources, self.config.maxDistArcSeconds * afwGeom.arcseconds)
match_result = dia_collection.match(dia_sources, scores)

scores = self.score(
dia_collection, dia_sources,
self.config.maxDistArcSeconds * afwGeom.arcseconds)
match_result = self.match(dia_collection, dia_sources, scores)

self._add_association_meta_data(match_result)

Expand All @@ -129,6 +144,144 @@ def associate_sources(self, dia_collection, dia_sources):
updated_ids=match_result.updated_and_new_dia_object_ids,
)

def score(self, dia_collection, dia_source_catalog, max_dist):
""" Compute a quality score for each dia_source/dia_object pair
between this collection and an input diat_source catalog.

max_dist sets maximum separation in arcseconds to consider a
dia_source a possible match to a dia_object. If the pair is
beyond this distance no score is computed.

Parameters
----------
dia_object_collection : an lsst.ap.association.DIAObjectCollection
A DIAObjectCollection to score against dia_sources.
dia_source_catalog : an lsst.afw.SourceCatalog
A contiguous catalog of dia_sources to "score" based on distance
and (in the future) other metrics.
max_dist : lsst.afw.geom.Angle
Maximum allowed distance to compute a score for a given DIAObject
DIASource pair.

Returns
-------
lsst.pipe.base.Struct
struct containing:
* scores: array of floats of match quality
* obj_ids: id of the DIAObject in thisDIAObjectCollection that
the given source matched.
Default values for these arrays are
INF and -1 respectively for unassociated sources.
"""
if not dia_collection._is_valid_tree:
dia_collection.update_spatial_tree()

scores = np.ones(len(dia_source_catalog)) * np.inf
obj_ids = -1 * np.ones(len(dia_source_catalog), dtype=np.int)

if len(dia_collection.dia_objects) == 0:
return pipeBase.Struct(
scores=scores,
obj_ids=obj_ids)

for src_idx, dia_source in enumerate(dia_source_catalog):

src_point = dia_source.getCoord().getVector()
dist, obj_idx = dia_collection._spatial_tree.query(src_point)
if dist < max_dist.asRadians():
scores[src_idx] = dist
obj_ids[src_idx] = dia_collection.dia_objects[obj_idx].id

return pipeBase.Struct(
scores=scores,
obj_ids=obj_ids)

def match(self, dia_collection, dia_source_catalog, score_struct):
""" Append DIAsources to DIAObjects given a score and create new
DIAObjects in this collection from DIASources with poor scores.

Parameters
----------
dia_object_collection : an lsst.ap.association.DIAObjectCollection
A DIAObjectCollection to associate to dia_sources.
dia_source_catalog : an lsst.afw.SourceCatalog
A contiguous catalog of dia_sources for which the set of scores
has been computed on with DIAObjectCollection.score.
score_struct : lsst.pipe.base.Struct
struct containing:
* scores: array of floats of match quality
* obj_ids: id of the DIAObject in thisDIAObjectCollection that
the given source matched.
Default values for these arrays are
INF and -1 respectively for unassociated sources.

Returns
-------
pipeBase.Struct
A struct containing the following data:
* updated_and_new_dia_object_ids : list of ints specifying the ids
new and updated dia_objects in the collection.
* n_updated_dia_objects : number of previously know dia_objects with
newly associated DIASources.
* n_new_dia_objects : Number of newly created DIAObjects from
unassociated DIASources
* n_unupdated_dia_objects : number of previous DIAObjects that were
not associated to a new DIASource.
"""

n_previous_dia_objects = len(dia_collection.dia_objects)
used_dia_object = np.zeros(n_previous_dia_objects, dtype=np.bool)
used_dia_source = np.zeros(len(dia_source_catalog), dtype=np.bool)

updated_dia_objects = []
new_dia_objects = []

# We sort from best match to worst to effectively perform a
# "handshake" match where both the DIASources and DIAObjects agree
# their the best match. By sorting this way, scores with NaN (those
# sources that have no match and will create new DIAObjects) will be
# placed at the end of the array.
score_args = score_struct.scores.argsort(axis=None)
for score_idx in score_args:
if not np.isfinite(score_struct.scores[score_idx]):
# Thanks to the sorting the rest of the sources will be
# NaN for their score. We therefore exit the loop to append
# sources to a existing DIAObject, leaving these for
# the loop creating new objects.
break
dia_obj_idx = dia_collection._id_to_index[
score_struct.obj_ids[score_idx]]
if used_dia_object[dia_obj_idx]:
continue
used_dia_object[dia_obj_idx] = True
used_dia_source[score_idx] = True
updated_obj_id = score_struct.obj_ids[score_idx]
updated_dia_objects.append(updated_obj_id)

dia_collection.dia_objects[dia_obj_idx].append_dia_source(
dia_source_catalog[int(score_idx)])

# Argwhere returns a array shape (N, 1) so we access the index
# thusly to retrieve the value rather than the tuple.
for (src_idx,) in np.argwhere(np.logical_not(used_dia_source)):
tmp_src_cat = afwTable.SourceCatalog(dia_source_catalog.schema)
tmp_src_cat.append(dia_source_catalog[int(src_idx)])
dia_collection.append(DIAObject(tmp_src_cat))
new_dia_objects.append(
dia_collection.dia_objects[-1].id)

# Return the ids of the DIAObjects in this DIAObjectCollection that
# were updated or newly created.
n_updated_dia_objects = len(updated_dia_objects)
n_unassociated_dia_objects = \
n_previous_dia_objects - n_updated_dia_objects
updated_dia_objects.extend(new_dia_objects)
return pipeBase.Struct(
updated_and_new_dia_object_ids=updated_dia_objects,
n_updated_dia_objects=n_updated_dia_objects,
n_new_dia_objects=len(new_dia_objects),
n_unassociated_dia_objects=n_unassociated_dia_objects,)

def _add_association_meta_data(self, match_result):
""" Store summaries of the association step in the task metadata.

Expand Down
2 changes: 1 addition & 1 deletion tests/test_association_db_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def setUp(self):
self.metadata.setDouble("CD2_2", -5.10281493481982E-05)
self.metadata.setDouble("CD2_1", -8.27440751733828E-07)

self.wcs = afwImage.makeWcs(self.metadata)
self.wcs = afwGeom.makeSkyWcs(self.metadata)
self.exposure = afwImage.makeExposure(
afwImage.makeMaskedImageFromArrays(np.ones((1024, 1153))),
self.wcs)
Expand Down
62 changes: 61 additions & 1 deletion tests/test_association_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def setUp(self):
self.metadata.setDouble("CD2_2", -5.10281493481982E-05)
self.metadata.setDouble("CD2_1", -8.27440751733828E-07)

self.wcs = afwImage.makeWcs(self.metadata)
self.wcs = afwGeom.makeSkyWcs(self.metadata)
self.exposure = afwImage.makeExposure(
afwImage.makeMaskedImageFromArrays(np.ones((1024, 1153))),
self.wcs)
Expand Down Expand Up @@ -344,6 +344,66 @@ def test_associate_sources(self):
obj_idx - 1 + n_objects * n_sources_per_object)
self.assertEqual(output_dia_object.id, obj_idx + 5 + 4)

def test_score_and_match(self):
""" Test association between a set of sources and an existing
DIAObjectCollection.

This also tests that a DIASource that can't be associated within
tolerance is appended to the DIAObjectCollection as a new
DIAObject.
"""

assoc_task = AssociationTask()
# Create a set of DIAObjects that contain only one DIASource
n_objects = 5
n_sources_per_object = 1
dia_objects = create_test_dia_objects(
n_objects=n_objects,
n_sources=n_sources_per_object,
object_centers_degrees=[[0.04 * obj_idx, 0.04 * obj_idx]
for obj_idx in range(5)],
scatter_arcsec=-1)
dia_collection = DIAObjectCollection(dia_objects)

dia_sources = create_test_dia_sources(
n_sources=5,
start_id=5,
source_locs_deg=[
[0.04 * (src_idx + 1),
0.04 * (src_idx + 1)]
for src_idx in range(5)],
scatter_arcsec=-1)

score_struct = assoc_task.score(dia_collection,
dia_sources,
1.0 * afwGeom.arcseconds)
self.assertFalse(np.isfinite(score_struct.scores[-1]))
for src_idx in range(4):
# Our scores should be extremely close to 0 but not exactly so due
# to machine noise.
self.assertAlmostEqual(score_struct.scores[src_idx], 0.0,
places=16)

# After matching each DIAObject should now contain 2 DIASources
# except the last DIAObject in this collection which should be
# newly created during the matching step and contain only one
# DIASource.
match_result = dia_collection.match(dia_sources, score_struct)
updated_ids = match_result.updated_and_new_dia_object_ids
self.assertEqual(len(dia_collection.dia_objects), 6)
self.assertEqual(match_result.n_updated_dia_objects, 4)
self.assertEqual(match_result.n_new_dia_objects, 1)
self.assertEqual(match_result.n_unassociated_dia_objects, 1)

# We created a new DIAObject in the collection hence the last
# DIAObject in this collection is new and contains only one
# DIASource. All others contain two.
self.assertEqual(
dia_collection.get_dia_object(updated_ids[-1]).n_dia_sources, 1)
for obj_id in updated_ids[0:-1]:
self.assertEqual(
dia_collection.get_dia_object(obj_id).n_dia_sources, 2)


class MemoryTester(lsst.utils.tests.MemoryTestCase):
pass
Expand Down
60 changes: 0 additions & 60 deletions tests/test_dia_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,66 +142,6 @@ def test_append_and_update(self):

self.assertEqual(obj_collection.get_dia_object_ids(), [0, 1])

def test_score_and_match(self):
""" Test association between a set of sources and an existing
DIAObjectCollection.

This also tests that a DIASource that can't be associated within
tolerance is appended to the DIAObjectCollection as a new
DIAObject.
"""
# Create a set of DIAObjects that contain only one DIASource
obj_collection = DIAObjectCollection(
create_test_dia_objects(n_objects=4,
n_sources=1,
start_id=0,
start_angle_degrees=0.0,
scatter_arcsec=-1.))
# We create a set of sources that should associate to each of
# our current DIAObjects in the collection. We also create
# an extra DIASource that does not associate to any of the current
# DIAObjects to test the creation of a new DIAObject for this
# DIASource.
src_cat = create_test_dia_sources(5)
for src_idx, src in enumerate(src_cat):
edit_and_offset_source_record(
src,
src_idx + 4,
0.1 * src_idx,
0.1 * src_idx,
-1)
score_struct = obj_collection.score(
src_cat, 1.0 * afwGeom.arcseconds)

self.assertFalse(np.isfinite(score_struct.scores[-1]))
for src_idx in range(4):
# Our scores should be extremely close to 0 but not exactly so due
# to machine noise.
self.assertAlmostEqual(score_struct.scores[src_idx], 0.0,
places=16)

# After matching each DIAObject should now contain 2 DIASources
# except the last DIAObject in this collection which should be
# newly created during the matching step and contain only one
# DIASource.
match_result = obj_collection.match(src_cat, score_struct)
updated_ids = match_result.updated_and_new_dia_object_ids
self.assertEqual(len(obj_collection.dia_objects), 5)
self.assertEqual(match_result.n_updated_dia_objects, 4)
self.assertEqual(match_result.n_new_dia_objects, 1)
self.assertEqual(match_result.n_unassociated_dia_objects, 0)

for updated_idx, obj_id in enumerate(updated_ids):
if updated_idx == len(updated_ids) - 1:
# We created a new DIAObject in the collection hence the last
# DIAObject in this collection is new and contains only one
# DIASource.
self.assertEqual(
obj_collection.get_dia_object(obj_id).n_dia_sources, 1)
else:
self.assertEqual(
obj_collection.get_dia_object(obj_id).n_dia_sources, 2)

def test_empty_dia_collection(self):
""" Test the creation and appending to a empty DIAObjectCollection.
"""
Expand Down