Skip to content

Commit

Permalink
Merge pull request #12 from lsst/tickets/DM-16292
Browse files Browse the repository at this point in the history
DM-16292: Support merging of headers after sorting into date order
  • Loading branch information
timj committed Mar 19, 2019
2 parents e4a94b1 + 6126468 commit 6e72ed3
Show file tree
Hide file tree
Showing 13 changed files with 703 additions and 24 deletions.
1 change: 1 addition & 0 deletions python/astro_metadata_translator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# license that can be found in the LICENSE file.

from .observationInfo import *
from .observationGroup import *
from .translator import *
from .translators import *
from .headers import *
Expand Down
83 changes: 76 additions & 7 deletions python/astro_metadata_translator/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@

__all__ = ("merge_headers",)

import logging
import itertools
import copy

from .translator import MetadataTranslator
from .translators import FitsTranslator

def merge_headers(headers, mode="overwrite"):
log = logging.getLogger(__name__)


def merge_headers(headers, mode="overwrite", sort=False, first=None, last=None):
"""Merge multiple headers into a single dict.
Given a list of dict-like data headers, combine them following the
Expand All @@ -39,12 +45,39 @@ def merge_headers(headers, mode="overwrite"):
(`None` if the key was not present). If the value is
identical in multiple headers but key is missing in
some, then the single identical header is stored.
sort : `bool`, optional
If `True`, sort the supplied headers into date order if possible.
This affects the resulting merged output depending on the requested
merge mode. An attempt will be made to extract a date from the
headers.
first : `list` or `tuple`, optional
Keys to retain even if they differ. For all modes excepting ``append``
(where it is ignored) the value in the merged header will always be
the value first encountered. This is usually to allow time-dependent
headers such as ``DATE-OBS`` and ``AZSTART`` to be retained to allow
the header to indicate the range of values. No exception is raised if
a key can not be found in a header since this allows a range of
expected headers to be listed covering multiple instruments.
last : `list` or `tuple`, optional
Keys to retain even if they differ. For all modes excepting ``append``
(where it is ignored) the value in the merged header will always be
the final value encountered. This is usually to allow time-dependent
headers such as ``DATE-END`` and ``AZEND`` to be retained to allow
the header to indicate the range of values. No exception is raised if
a key can not be found in a header since this allows a range of
expected headers to be listed covering multiple instruments.
Returns
-------
merged : `dict`
Single `dict` combining all the headers using the specified
combination mode.
Notes
-----
If ``first`` and ``last`` are supplied, the keys from ``first`` are
handled first, followed by the keys from ``last``. No check is made to
ensure that the keys do not overlap.
"""
if not headers:
raise ValueError("No headers supplied.")
Expand All @@ -53,22 +86,44 @@ def merge_headers(headers, mode="overwrite"):
# In python 3.7 dicts are guaranteed to retain order
headers = [h.toOrderedDict() if hasattr(h, "toOrderedDict") else h for h in headers]

# With a single header provided return a copy immediately
if len(headers) == 1:
return copy.deepcopy(headers[0])

if sort:
def key_func(hdr):
translator_class = None
try:
translator_class = MetadataTranslator.determine_translator(hdr)
except ValueError:
# Try the FITS translator
translator_class = FitsTranslator
translator = translator_class(hdr)
return translator.to_datetime_begin()

headers = sorted(headers, key=key_func)

log.debug("Received %d headers for merging", len(headers))

# Pull out first header
first_hdr = headers.pop(0)

# Seed the merged header with a copy
merged = copy.deepcopy(first_hdr)

if mode == "overwrite":
merged = copy.deepcopy(headers.pop(0))
for h in headers:
merged.update(h)

elif mode == "first":
# Reversing the headers and using overwrite mode would result in the
# header order being inconsistent dependent on mode.
merged = copy.deepcopy(headers.pop(0))
for hdr in headers:
for key in hdr:
if key not in merged:
merged[key] = hdr[key]

elif mode == "drop":
merged = copy.deepcopy(headers.pop(0))
drop = set()
for hdr in headers:
for key in hdr:
Expand All @@ -83,8 +138,6 @@ def merge_headers(headers, mode="overwrite"):
del merged[key]

elif mode == "append":
first = headers.pop(0)
merged = copy.deepcopy(first)
fill = set()
for hdr in headers:
for key in hdr:
Expand All @@ -100,9 +153,25 @@ def merge_headers(headers, mode="overwrite"):
# Fill the entries that have multiple differing values
for key in fill:
merged[key] = [h[key] if key in h else None
for h in itertools.chain([first], headers)]
for h in itertools.chain([first_hdr], headers)]

else:
raise ValueError(f"Unsupported value of '{mode}' for mode parameter.")

# Force the first and last values to be inserted
#
if mode != "append":
def retain_value(to_receive, to_retain, sources):
if to_retain:
for k in to_retain:
# Look for values until we find one
for h in sources:
if k in h:
to_receive[k] = h[k]
break

all_headers = (first_hdr, *headers)
retain_value(merged, first, all_headers)
retain_value(merged, last, tuple(reversed(all_headers)))

return merged
191 changes: 191 additions & 0 deletions python/astro_metadata_translator/observationGroup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# This file is part of astro_metadata_translator.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the LICENSE file at the top-level directory of this distribution
# for details of code ownership.
#
# Use of this source code is governed by a 3-clause BSD-style
# license that can be found in the LICENSE file.

"""Represent a collection of translated headers"""

__all__ = ("ObservationGroup",)

import logging
from collections.abc import MutableSequence

from .observationInfo import ObservationInfo

log = logging.getLogger(__name__)


class ObservationGroup(MutableSequence):
"""A collection of `ObservationInfo` headers.
Parameters
----------
members : iterable of `ObservationInfo` or `dict`-like
`ObservationInfo` to seed the group membership. If `dict`-like
values are used they will be passed to the `ObservationInfo`
constructor.
translator_class : `MetadataTranslator`-class, optional
If any of the members is not an `ObservationInfo`, translator class
to pass to the `ObservationInfo` constructor. If `None` the
translation class will be determined automatically.
pedantic : `bool`, optional
If any of the members is not an `ObservationInfo`, passed to the
`ObservationInfo` constructor to control whether
a failed translation is fatal or not. `None` indicates that the
`ObservationInfo` constructor default should be used.
"""

def __init__(self, members, translator_class=None, pedantic=None):
self._members = [self._coerce_value(m, translator_class=translator_class, pedantic=pedantic)
for m in members]

# Cache of members in time order
self._sorted = None

def __len__(self):
return len(self._members)

def __delitem__(self, index):
del self._members[index]
self._sorted = None

def __getitem__(self, index):
return self._members[index]

def __str__(self):
results = []
for obs_info in self._members:
results.append(f"({obs_info.instrument}, {obs_info.datetime_begin})")
return "[" + ", ".join(results) + "]"

def _coerce_value(self, value, translator_class=None, pedantic=None):
"""Given a value, ensure it is an `ObservationInfo`.
Parameters
----------
value : `ObservationInfo` or `dict`-like
Either an `ObservationInfo` or something that can be passed to
an `ObservationInfo` constructor.
translator_class : `MetadataTranslator`-class, optional
If value is not an `ObservationInfo`, translator class to pass to
the `ObservationInfo` constructor. If `None` the
translation class will be determined automatically.
pedantic : `bool`, optional
If value is not an `ObservationInfo`, passed to the
`ObservationInfo` constructor to control whether
a failed translation is fatal or not. `None` indicates that the
`ObservationInfo` constructor default should be used.
Raises
------
ValueError
Raised if supplied value is not an `ObservationInfo` and can
not be turned into one.
"""
if value is None:
raise ValueError("An ObservationGroup cannot contain 'None'")

if not isinstance(value, ObservationInfo):
try:
kwargs = {"translator_class": translator_class}
if pedantic is not None:
kwargs["pedantic"] = pedantic
value = ObservationInfo(value, **kwargs)
except Exception as e:
raise ValueError("Could not convert value to ObservationInfo") from e

return value

def __iter__(self):
return iter(self._members)

def __eq__(self, other):
"""Compares equal if all the members are equal in the same order.
"""
for info1, info2 in zip(self, other):
if info1 != info2:
return False
return True

def __setitem__(self, index, value):
"""Store item in group.
Item must be an `ObservationInfo` or something that can be passed
to an `ObservationInfo` constructor.
"""
print("CALLING SET ITEM")
value = self._coerce_value(value)
self._members[index] = value
self._sorted = None

def insert(self, index, value):
value = self._coerce_value(value)
self._members.insert(index, value)
self._sorted = None

def reverse(self):
self._members.reverse()

def sort(self, key=None, reverse=False):
self._members.sort(key=key, reverse=reverse)
if key is None and not reverse and self._sorted is None:
# Store sorted order in cache
self._sorted = self._members.copy()

def extremes(self):
"""Return the oldest observation in the group and the newest.
If there is only one member of the group, the newest and oldest
can be the same observation.
Returns
-------
oldest : `ObservationInfo`
Oldest observation.
newest : `ObservationInfo`
Newest observation.
"""
if self._sorted is None:
self._sorted = sorted(self._members)
return self._sorted[0], self._sorted[-1]

def newest(self):
"""Return the newest observation in the group.
Returns
-------
newest : `ObservationInfo`
The newest `ObservationInfo` in the `ObservationGroup`.
"""
return self.extremes()[1]

def oldest(self):
"""Return the oldest observation in the group.
Returns
-------
oldest : `ObservationInfo`
The oldest `ObservationInfo` in the `ObservationGroup`.
"""
return self.extremes()[0]

def property_values(self, property):
"""Return a set of values associated with the specified property.
Parameters
----------
property : `str`
Property of an `ObservationInfo`
Returns
-------
values : `set`
All the distinct values for that property within this group.
"""
return {getattr(obs_info, property) for obs_info in self}
17 changes: 14 additions & 3 deletions python/astro_metadata_translator/observationInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,15 @@ class ObservationInfo:
Raises
------
ValueError
The supplied header was not recognized by any of the registered
translators.
Raised if the supplied header was not recognized by any of the
registered translators.
TypeError
The supplied translator class was not a MetadataTranslator.
Raised if the supplied translator class was not a MetadataTranslator.
KeyError
Raised if a translation fails and pedantic mode is enabled.
NotImplementedError
Raised if the selected translator does not support a required
property.
"""

_PROPERTIES = PROPERTIES
Expand Down Expand Up @@ -167,6 +172,12 @@ def __eq__(self, other):

return True

def __lt__(self, other):
return self.datetime_begin < other.datetime_begin

def __gt__(self, other):
return self.datetime_begin > other.datetime_begin

def __getstate__(self):
"""Get pickleable state
Expand Down
12 changes: 12 additions & 0 deletions python/astro_metadata_translator/serialize/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This file is part of astro_metadata_translator.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the LICENSE file at the top-level directory of this distribution
# for details of code ownership.
#
# Use of this source code is governed by a 3-clause BSD-style
# license that can be found in the LICENSE file.

from .fits import *

0 comments on commit 6e72ed3

Please sign in to comment.