/
repeatability.py
316 lines (268 loc) · 12 KB
/
repeatability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
# LSST Data Management System
# Copyright 2008-2019 AURA/LSST.
#
# This product includes software developed by the
# LSST Project (http://www.lsst.org/).
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the LSST License Statement and
# the GNU General Public License along with this program. If not,
# see <https://www.lsstcorp.org/LegalNotices/>.
import astropy.units as u
import math
import numpy as np
import random
from scipy.stats import norm
import lsst.pipe.base as pipeBase
from lsst.verify import Measurement, Datum
thousandDivSqrtTwo = 1000/math.sqrt(2)
def measurePhotRepeat(metric, filterName, *args, **kwargs):
"""Measurement of a photometric repeatability metric across a set of
observations.
Parameters
----------
metric : `lsst.verify.Metric`
A Metric to construct a Measurement for.
filterName : `str`
Name of filter used for all observations.
*args
Additional arguments to pass to `calcPhotRepeat`.
**kwargs
Additional keyword arguments to pass to `calcPhotRepeat`.
Returns
-------
measurement : `lsst.verify.Measurement`
Measurement of the repeatability and its associated metadata.
See also
--------
calcPhotRepeat: Computes statistics of magnitudes differences of sources across
multiple visits. This is the main computation function behind
repeatability measurement.
"""
results = calcPhotRepeat(*args, **kwargs)
datums = {}
datums['filter_name'] = Datum(filterName, label='filter',
description='Name of filter for this measurement')
datums['rms'] = Datum(results['rms'], label='RMS',
description='Photometric repeatability RMS of stellar pairs for '
'each random sampling')
datums['iqr'] = Datum(results['iqr'], label='IQR',
description='Photometric repeatability IQR of stellar pairs for '
'each random sample')
datums['magDiff'] = Datum(results['magDiff'], label='Delta mag',
description='Photometric repeatability differences magnitudes for '
'stellar pairs for each random sample')
datums['magMean'] = Datum(results['magMean'], label='mag',
description='Mean magnitude of pairs of stellar sources matched '
'across visits, for each random sample.')
return Measurement(metric, results['repeatability'], extras=datums)
def calcPhotRepeat(matches, magKey, numRandomShuffles=50):
"""Calculate the photometric repeatability of measurements across a set
of randomly selected pairs of visits.
Parameters
----------
matches : `lsst.afw.table.GroupView`
`~lsst.afw.table.GroupView` of sources matched between visits,
from MultiMatch, provided by
`lsst.validate.drp.matchreduce.build_matched_dataset`.
magKey : `lsst.afw.table` schema key
Magnitude column key in the ``groupView``.
E.g., ``magKey = allMatches.schema.find("slot_ModelFlux_mag").key``
where ``allMatches`` is the result of
`lsst.afw.table.MultiMatch.finish()`.
numRandomShuffles : int
Number of times to draw random pairs from the different observations.
Returns
-------
statistics : `dict`
Statistics to compute model_phot_rep. Fields are:
- ``model_phot_rep``: scalar `~astropy.unit.Quantity` of mean ``iqr``.
This is formally the model_phot_rep metric measurement.
- ``rms``: `~astropy.unit.Quantity` array in mmag of photometric
repeatability RMS across ``numRandomShuffles``.
Shape: ``(nRandomSamples,)``.
- ``iqr``: `~astropy.unit.Quantity` array in mmag of inter-quartile
range of photometric repeatability distribution.
Shape: ``(nRandomSamples,)``.
- ``magDiff``: `~astropy.unit.Quantity` array of magnitude differences
between pairs of sources. Shape: ``(nRandomSamples, nMatches)``.
- ``magMean``: `~astropy.unit.Quantity` array of mean magnitudes of
each pair of sources. Shape: ``(nRandomSamples, nMatches)``.
Notes
-----
We calculate differences for ``numRandomShuffles`` different random
realizations of the measurement pairs, to provide some estimate of the
uncertainty on our RMS estimates due to the random shuffling. This
estimate could be stated and calculated from a more formally derived
motivation but in practice 50 should be sufficient.
The LSST Science Requirements Document (LPM-17), or SRD, characterizes the
photometric repeatability by putting a requirement on the median RMS of
measurements of non-variable bright stars. This quantity is PA1, with a
design, minimum, and stretch goals of (5, 8, 3) millimag following LPM-17
as of 2011-07-06, available at http://ls.st/LPM-17. model_phot_rep is a
similar quantity measured for extended sources (almost entirely galaxies),
for which no requirement currently exists in the SRD.
This present routine calculates this quantity in two different ways:
1. RMS
2. interquartile range (IQR)
**The repeatability scalar measurement is the median of the IQR.**
This function also returns additional quantities of interest:
- the pair differences of observations of sources,
- the mean magnitude of each source
Examples
--------
Normally ``calcPhotRepeat`` is called by `measurePhotRepeat`, using
data from `lsst.validate.drp.matchreduce.build_matched_dataset`. Here's an
example of how to call ``calcPhotRepeat`` directly given the Butler output
repository generated by examples/runHscQuickTest.sh:
>>> import lsst.daf.persistence as dafPersist
>>> from lsst.afw.table import SourceCatalog, SchemaMapper, Field
>>> from lsst.afw.table import MultiMatch, SourceRecord, GroupView
>>> from lsst.validate.drp.repeatability import calcPhotRepeat
>>> from lsst.validate.drp.util import discoverDataIds
>>> import numpy as np
>>> repo = 'HscQuick/output'
>>> butler = dafPersist.Butler(repo)
>>> dataset = 'src'
>>> schema = butler.get(dataset + '_schema', immediate=True).schema
>>> visitDataIds = discoverDataIds(repo)
>>> mmatch = None
>>> for vId in visitDataIds:
... cat = butler.get('src', vId)
... calib = butler.get('calexp_photoCalib', vId)
... cat = calib.calibrateCatalog(cat, ['modelfit_CModel'])
... if mmatch is None:
... mmatch = MultiMatch(cat.schema,
... dataIdFormat={'visit': np.int32, 'ccd': np.int32},
... RecordClass=SourceRecord)
... mmatch.add(catalog=cat, dataId=vId)
...
>>> matchCat = mmatch.finish()
>>> allMatches = GroupView.build(matchCat)
>>> magKey = allMatches.schema.find('slot_ModelFlux_mag').key
>>> def matchFilter(cat):
>>> if len(cat) < 2:
>>> return False
>>> return np.isfinite(cat.get(magKey)).all()
>>> repeat = calcPhotRepeat(allMatches.where(matchFilter), magKey)
"""
mprSamples = [calcPhotRepeatSample(matches, magKey)
for _ in range(numRandomShuffles)]
rms = np.array([mpr.rms for mpr in mprSamples]) * u.mmag
iqr = np.array([mpr.iqr for mpr in mprSamples]) * u.mmag
magDiff = np.array([mpr.magDiffs for mpr in mprSamples]) * u.mmag
magMean = np.array([mpr.magMean for mpr in mprSamples]) * u.mag
repeat = np.mean(iqr)
return {'rms': rms, 'iqr': iqr, 'magDiff': magDiff, 'magMean': magMean, 'repeatability': repeat}
def calcPhotRepeatSample(matches, magKey):
"""Compute one realization of repeatability by randomly sampling pairs of
visits.
Parameters
----------
matches : `lsst.afw.table.GroupView`
`~lsst.afw.table.GroupView` of sources matched between visits,
from MultiMatch, provided by
`lsst.validate.drp.matchreduce.build_matched_dataset`.
magKey : `lsst.afw.table` schema key
Magnitude column key in the ``groupView``.
E.g., ``magKey = allMatches.schema.find("base_PsfFlux_mag").key``
where ``allMatches`` is the result of
`lsst.afw.table.MultiMatch.finish()`.
Returns
-------
metrics : `lsst.pipe.base.Struct`
Metrics of pairs of sources matched between two visits. Fields are:
- ``rms``: scalar RMS of differences of sources observed in this
randomly sampled pair of visits.
- ``iqr``: scalar inter-quartile range (IQR) of differences of sources
observed in a randomly sampled pair of visits.
- ``magDiffs`: array, shape ``(nMatches,)``, of magnitude differences
(mmag) for observed sources across a randomly sampled pair of visits.
- ``magMean``: array, shape ``(nMatches,)``, of mean magnitudes
of sources observed across a randomly sampled pair of visits.
See also
--------
calcPhotRepeat : A wrapper that repeatedly calls this function to build
the repeatability measurement.
"""
magDiffs = matches.aggregate(getRandomDiffRmsInMmags, field=magKey)
magMean = matches.aggregate(np.mean, field=magKey)
rms, iqr = computeWidths(magDiffs)
return pipeBase.Struct(rms=rms, iqr=iqr, magDiffs=magDiffs, magMean=magMean,)
def getRandomDiffRmsInMmags(array):
"""Calculate the RMS difference in mmag between a random pairing of
visits of a source.
Parameters
----------
array : `list` or `numpy.ndarray`
Magnitudes from which to select the pair [mag].
Returns
-------
rmsMmags : `float`
RMS difference in mmag from a random pair of visits.
Notes
-----
The LSST SRD recommends computing repeatability from a histogram of
magnitude differences for the same source measured on two visits
(using a median over the magDiffs to reject outliers).
Because we have N>=2 measurements for each source, we select a random
pair of visits for each source. We divide each difference by sqrt(2)
to obtain the RMS about the (unknown) mean magnitude,
instead of obtaining just the RMS difference.
See Also
--------
getRandomDiff : Get the difference between two randomly selected elements of an array.
Examples
--------
>>> mag = [24.2, 25.5]
>>> rms = getRandomDiffRmsInMmags(mag)
>>> print(rms)
212.132034
"""
return thousandDivSqrtTwo * getRandomDiff(array)
def getRandomDiff(array):
"""Get the difference between two randomly selected elements of an array.
Parameters
----------
array : `list` or `numpy.ndarray`
Input array.
Returns
-------
float or int
Difference between two random elements of the array.
"""
a, b = random.sample(range(len(array)), 2)
return array[a] - array[b]
def computeWidths(array):
"""Compute the RMS and the scaled inter-quartile range of an array.
Parameters
----------
array : `list` or `numpy.ndarray`
Array.
Returns
-------
rms : `float`
RMS
iqr : `float`
Scaled inter-quartile range (IQR, see *Notes*).
Notes
-----
We estimate the width of the histogram in two ways:
- using a simple RMS,
- using the interquartile range (IQR)
The IQR is scaled by the IQR/RMS ratio for a Gaussian such that it
if the array is Gaussian distributed, then the scaled IQR = RMS.
"""
# For scalars, math.sqrt is several times faster than numpy.sqrt.
rmsSigma = math.sqrt(np.mean(array**2))
iqrSigma = np.subtract.reduce(np.percentile(array, [75, 25])) / (norm.ppf(0.75)*2)
return rmsSigma, iqrSigma