Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-11041: Implement simple DIAObject to DIASource matching algorithm and flesh out DIACollection #3

Merged
merged 5 commits into from
Sep 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/lsst/ap/association/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,6 @@
from __future__ import absolute_import, division, print_function

from .version import *
from .association import *
from .dia_object import *
from .dia_collection import *
50 changes: 50 additions & 0 deletions python/lsst/ap/association/association.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# LSST Data Management System
# Copyright 2017 LSST/AURA.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like you truncated the start of the license here?

# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <http://www.lsstcorp.org/LegalNotices/>.
#

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File docstring to say what this is supposed to do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my future reference this is something that should be included in every file yes?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, module-level docstrings should always exist, even if they are just a sentence:

https://developer.lsst.io/docs/py_docs.html#modules

""" A simple implementation of source association task for ap_verify.
"""

from __future__ import absolute_import, division, print_function

import lsst.pex.config as pexConfig
import lsst.pipe.base as pipeBase

__all__ = ["AssociationConfig", "AssociationTask"]

# TODO:
# To be finished in DM-11747


class AssociationConfig(pexConfig.Config):
pass


class AssociationTask(pipeBase.Task):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs a class docstring to describe what the class is for. (is this just a stub?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now it is just a stub. The ticket to complete the class is DM-11747.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put that that ticket as a TODO in the class docstring then.


ConfigClass = AssociationConfig
_DefaultName = "associate_sources"

def __init__(self, **kwargs):
pipeBase.Task.__init__(self, **kwargs)

def associate_sources(self):
pass
157 changes: 132 additions & 25 deletions python/lsst/ap/association/dia_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,19 @@
# see <http://www.lsstcorp.org/LegalNotices/>.
#

""" Define collections of DIAObjects and how to associate them with
DIASources.
"""

from __future__ import absolute_import, division, print_function

from scipy.spatial import cKDtree
import numpy as np
from scipy.spatial import cKDTree

import lsst.afw.table as afwTable
import lsst.pipe.base as pipeBase

import .dia_object
from .dia_object import DIAObject


class DIAObjectCollection(object):
Expand Down Expand Up @@ -65,7 +73,7 @@ def __init__(self, dia_objects):
self.dia_objects = dia_objects
self._id_to_index = {}
for idx, dia_object in enumerate(self.dia_objects):
self._id_to_index[dia_object.getId()] = idx
self._id_to_index[dia_object.id] = idx
self._is_updated = False
self._is_valid_tree = False
self.update_dia_objects()
Expand All @@ -82,21 +90,37 @@ def get_dia_object(self, id):
----------
id : int
id of the DIAObject to retrive


Return
------
A DIAObject
"""
return self.dia_objects[self._id_to_index[id]]

def get_dia_object_ids(self):
""" Retrive the ids of the DIAObjects stored in this collection.

Parameters
----------
id : int
id of the DIAObject to retrive

Return
------
A DIAObject
"""
return self.dia_objects[self._id_to_index[key]]
return self._id_to_index.keys()

def update_dia_objects(self, force=False):
""" Update the summary statistics of all DIAObjects in this
collection.

Loop through the DIAObjects that make up this DIAObjectCollection and
update them as needed. Optional `force` variable forces the DIAObjects
within the collelection to be updated regardless of their `is_updated`
state.
within the collelection to be updated regardless of their `is_updated
state. We set the the variable _is_updated to True after this is run
to assert that this method has been run and all summary statistics
in the DIAObejcts are valid for their current associated DIASources.

Parameters
----------
Expand All @@ -106,7 +130,8 @@ def update_dia_objects(self, force=False):

Returns
-------
None
bool
Successfully updated
"""
self._is_updated = False

Expand All @@ -115,24 +140,34 @@ def update_dia_objects(self, force=False):
dia_object.update()
self._is_valid_tree = False

self._is_updated
self._is_updated = True

return None
return self._is_updated
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As written, this will always return True, since you make _is_updated True immediately above. Is that the intention?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it is. I want this variable to reflect the whether this method was run or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. A comment to that effect would be good.


def update_spatial_tree(self):
""" Update the internal searchable spatial tree on the DIAObjects.

Returns
-------
None
bool
Successfully updated
"""
self._is_valid_tree = False

# Create searchable spatial tree.
if not self._is_updated:
return self._is_valid_tree

xyzs = np.empty((len(self.dia_objects), 3))
for obj_idx in range(len(self.dia_objects)):
tmp_coord = self.dia_objects[obj_idx].dia_object_record.getCoord()
tmp_vect = tmp_coord.getVector()
xyzs[obj_idx, 0] = tmp_vect[0]
xyzs[obj_idx, 1] = tmp_vect[1]
xyzs[obj_idx, 2] = tmp_vect[2]
self._spatial_tree = cKDTree(xyzs)

self._is_valid_tree = True

return None
return self._is_valid_tree

def append(self, dia_object):
""" Add a new DIAObject to this collection.
Expand All @@ -150,12 +185,12 @@ def append(self, dia_object):
self._is_updated = False
self._is_valid_tree = False

self._id_to_index[dia_object.getId()] = len(self.dia_objects)
self._id_to_index[dia_object.id] = len(self.dia_objects)
self.dia_objects.append(dia_object)

return None

def score(self, dia_source_catalog, max_dist_arcsec):
def score(self, dia_source_catalog, max_dist):
""" Compute a quality score for each dia_source/dia_object pair
between this collection and an input diat_source catalog.

Expand All @@ -168,18 +203,38 @@ def score(self, dia_source_catalog, max_dist_arcsec):
dia_source_catalog : an lsst.afw.SourceCatalog
A contiguous catalog of dia_sources to "score" based on distance
and (in the future) other metrics.
max_dist_arcsec : float
max_dist : lsst.afw.geom.Angle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay units! 👍

Maximum allowed distance to compute a score for a given DIAObject
DIASource pair.

Returns
-------
(N DIAObject by M DIASource) float array of between DIAObjects and
DIASources.
lsst.pipe.base.Struct
struct containing:
* scores: array of floats of match quality
* indices: index in DIAObjectCollection that source matched to
Default values for these arrays are NaN and the number of
DIAObjects in this collection, respectively.
"""
pass
if not self._is_valid_tree:
self.update_spatial_tree()

scores = np.ones(len(dia_source_catalog)) * np.inf
obj_indices = np.ones(len(dia_source_catalog), dtype=np.int) * \
len(self.dia_objects)
for src_idx, dia_source in enumerate(dia_source_catalog):

def match(self, dia_source_catalog, scores):
src_point = dia_source.getCoord().getVector()
dist, obj_idx = self._spatial_tree.query(src_point)
if dist < max_dist.asRadians():
scores[src_idx] = dist
obj_indices[src_idx] = obj_idx

return pipeBase.Struct(
scores=scores,
indices=obj_indices)

def match(self, dia_source_catalog, score_struct):
""" Append DIAsources to DIAObjects given a score and create new
DIAObjects in this collection from DIASources with poor scores.

Expand All @@ -188,15 +243,67 @@ def match(self, dia_source_catalog, scores):
dia_source_catalog : an lsst.afw.SourceCatalog
A contiguous catalog of dia_sources for which the set of scores
has been computed on with DIAObjectCollection.score.
scores : (N DIAObject by M DIASource) float array
A ndarray containing the scores between each DIAObject and
each DIASource.
score_struct : lsst.pipe.base.Struct
struct containing:
* scores: array of floats of match quality
* indices: index in DIAObjectCollection that source matched to
Default values for these arrays are NaN and the number of
DIAObjects in this collection, respectively.

Returns
-------
Indices of newly updated and created DIAObjects
"""
pass

used_dia_object = np.zeros(len(self.dia_objects), dtype=np.bool)
used_dia_source = np.zeros(len(dia_source_catalog), dtype=np.bool)

n_previous_objects = len(self.dia_objects)

updated_and_new_dia_objects = []

# We sort from best match to worst to effectively perform a
# "handshake" match where both the DIASources and DIAObjects agree
# their the best match. By sorting this way, scores with NaN (those
# sources that have no match and will create new DIAObjects) will be
# placed at the end of the array.
score_args = score_struct.scores.argsort(axis=None)
for score_idx in score_args:
if not np.isfinite(score_struct.scores[score_idx]):
# Thanks to the sorting the rest of the sources will be
# NaN for their score. We therefore exit the loop to append
# sources to a existing DIAObject, leaving these for
# the loop creating new objects.
break
if used_dia_object[score_idx]:
continue
used_dia_object[score_struct.indices[score_idx]] = True
used_dia_source[score_idx] = True
updated_and_new_dia_objects.append(
score_struct.indices[score_idx])

dia_obj_idx = score_struct.indices[score_idx]
self.dia_objects[dia_obj_idx].append_dia_source(
dia_source_catalog[score_idx])

n_new_objects = 0
# Argwhere returns a array shape (N, 1) so we access the index
# thusly to retreve the value rather than the tuple.
for (src_idx,) in np.argwhere(np.logical_not(used_dia_source)):
tmp_src_cat = afwTable.SourceCatalog(dia_source_catalog.schema)
tmp_src_cat.append(dia_source_catalog[src_idx])
self.append(DIAObject(tmp_src_cat))
n_new_objects += 1

# Concatenate the indices of the DIAObjects that were matched with
# those that were appended. This produces a single array of the
# indices of DIAObjects in this collection that were updated or
# newly created in this matching process.
return np.concatenate(
[updated_and_new_dia_objects,
np.arange(n_previous_objects,
n_previous_objects + n_new_objects,
dtype=np.int)])

@property
def is_updated(self):
Expand Down
8 changes: 4 additions & 4 deletions python/lsst/ap/association/dia_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
# see <http://www.lsstcorp.org/LegalNotices/>.
#

from __future__ import absolute_import, division, print_function
""" Definitions for DIAObject and a minimal schema for them.
"""

import numpy as np
from __future__ import absolute_import, division, print_function

import lsst.afw.table as afwTable
import lsst.afw.geom as afwGeom
from lsst.afw.coord import averageCoord

__all__ = ["DIAObject", "make_minimal_dia_object_schema"]
Expand Down Expand Up @@ -189,7 +189,7 @@ def get_light_curve(self):
# Right now I'm making this the same as returning the
# dia_source_catalog.

raise NotImplimentedError(
raise NotImplementedError(
"Light curves not yet implimented. Use dia_source_catalog property"
"instead.")

Expand Down
8 changes: 8 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
max-line-length = 110
ignore = E133, E226, E228, N802, N803, N806
exclude = __init__.py

[tool:pytest]
addopts = --flake8
flake8-ignore = E133 E226 E228 N802 N803 N806
2 changes: 1 addition & 1 deletion tests/SConscript
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- python -*-
from lsst.sconsUtils import scripts
scripts.BasicSConscript.tests()
scripts.BasicSConscript.tests(pyList=[])