Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-29266: Add translator methods to read all relevant HDUs #52

Merged
merged 15 commits into from
Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
82 changes: 48 additions & 34 deletions python/astro_metadata_translator/bin/translateheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import traceback
import importlib
import yaml
from astro_metadata_translator import ObservationInfo, fix_header
from astro_metadata_translator import ObservationInfo, fix_header, MetadataTranslator

from ..file_helpers import find_files, read_basic_metadata_from_file

Expand Down Expand Up @@ -53,6 +53,11 @@
"label": "Filter",
},
{
"format": ">8.8s",
"attr": "detector_unique_name",
"label": "Detector"
},
{
"format": "5.1f",
"attr": "exposure_time",
"label": "ExpTime",
Expand Down Expand Up @@ -138,7 +143,9 @@ def read_file(file, hdrnum, print_trace,
of the native object type representing the header (which can be
PropertyList or an Astropy header). Without this modify headers
will be dumped as simple `dict` form.
"auto" is not allowed by this point.
"auto" is used to indicate that a single file has been specified
but the output will depend on whether the file is a multi-extension
FITS file or not.
write_heading: `bool`, optional
If `True` and in table mode, write a table heading out before writing
the content.
Expand All @@ -151,8 +158,6 @@ def read_file(file, hdrnum, print_trace,
"""
if output_mode not in OUTPUT_MODES:
raise ValueError(f"Output mode of '{output_mode}' is not understood.")
if output_mode == "auto":
raise ValueError("Output mode can not be 'auto' here.")

# This gets in the way in tabular mode
if output_mode != "table":
Expand All @@ -178,33 +183,43 @@ def read_file(file, hdrnum, print_trace,
# The header should be written out in the insertion order
print(yaml.dump(md, sort_keys=False), file=outstream)
return True
obs_info = ObservationInfo(md, pedantic=True, filename=file)
if output_mode == "table":
columns = ["{:{fmt}}".format(getattr(obs_info, c["attr"]), fmt=c["format"])
for c in TABLE_COLUMNS]

if write_heading:
# Construct headings of the same width as the items
# we have calculated. Doing this means we don't have to
# work out for ourselves how many characters will be used
# for non-strings (especially Quantity)
headings = []
separators = []
for thiscol, defn in zip(columns, TABLE_COLUMNS):
width = len(thiscol)
headings.append("{:{w}.{w}}".format(defn["label"], w=width))
separators.append("-"*width)
print(" ".join(headings), file=outstream)
print(" ".join(separators), file=outstream)

row = " ".join(columns)
print(row, file=outstream)
elif output_mode == "verbose":
print(f"{obs_info}", file=outstream)
elif output_mode == "none":
pass
else:
raise RuntimeError(f"Output mode of '{output_mode}' not recognized but should be known.")

# Try to work out a translator class.
translator_class = MetadataTranslator.determine_translator(md, filename=file)
headers = list(translator_class.read_all_headers(file, md))
if output_mode == "auto":
output_mode = "table" if len(headers) > 1 else "verbose"

wrote_heading = False
for md in headers:
obs_info = ObservationInfo(md, pedantic=True, filename=file)
if output_mode == "table":
columns = ["{:{fmt}}".format(getattr(obs_info, c["attr"]), fmt=c["format"])
for c in TABLE_COLUMNS]

if write_heading and not wrote_heading:
# Construct headings of the same width as the items
# we have calculated. Doing this means we don't have to
# work out for ourselves how many characters will be used
# for non-strings (especially Quantity)
headings = []
separators = []
for thiscol, defn in zip(columns, TABLE_COLUMNS):
width = len(thiscol)
headings.append("{:{w}.{w}}".format(defn["label"], w=width))
separators.append("-"*width)
print(" ".join(headings), file=outstream)
print(" ".join(separators), file=outstream)
wrote_heading = True

row = " ".join(columns)
print(row, file=outstream)
elif output_mode == "verbose":
print(f"{obs_info}", file=outstream)
elif output_mode == "none":
pass
else:
raise RuntimeError(f"Output mode of '{output_mode}' not recognized but should be known.")
except Exception as e:
if print_trace:
traceback.print_exc(file=outstream)
Expand Down Expand Up @@ -251,12 +266,11 @@ def process_files(files, regex, hdrnum, print_trace,
"""
found_files = find_files(files, regex)

# Convert "auto" to correct mode
# Convert "auto" to correct mode but for a single file keep it
# auto in case that file has multiple headers
if output_mode == "auto":
if len(found_files) > 1:
output_mode = "table"
else:
output_mode = "verbose"

# Process each file
failed = []
Expand Down
5 changes: 3 additions & 2 deletions python/astro_metadata_translator/cli/astrometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@
PACKAGES_VAR = "METADATA_TRANSLATORS"

hdrnum_option = click.option("-n", "--hdrnum",
default=1,
default=-1,
help="HDU number to read. If the HDU can not be found, a warning is issued but"
" reading is attempted using the primary header. The primary header is"
" always read and merged with this header.")
" always read and merged with this header. Negative number indicates that"
" the second header will be merged if the FITS file supports extended FITS.")
timj marked this conversation as resolved.
Show resolved Hide resolved
regex_option = click.option("-r", "--regex",
default=re_default,
help="When looking in a directory, regular expression to use to determine whether"
Expand Down
18 changes: 12 additions & 6 deletions python/astro_metadata_translator/file_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
import sys
import traceback

from astro_metadata_translator import merge_headers, ObservationInfo
from .headers import merge_headers
from .observationInfo import ObservationInfo
from .tests import read_test_file


Expand Down Expand Up @@ -111,7 +112,7 @@ def find_files(files, regex):
return found_files


def read_basic_metadata_from_file(file, hdrnum, errstream=sys.stderr, can_raise=False):
def read_basic_metadata_from_file(file, hdrnum, errstream=sys.stderr, can_raise=True):
timj marked this conversation as resolved.
Show resolved Hide resolved
"""Read a raw header from a file, merging if necessary

Parameters
Expand All @@ -121,12 +122,13 @@ def read_basic_metadata_from_file(file, hdrnum, errstream=sys.stderr, can_raise=
top-level dict.
hdrnum : `int`
Header number to read. Only relevant for FITS. If greater than 1
it will be merged with the primary header.
it will be merged with the primary header. If negative the second
header will be merged with the primary header if a second is present.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wording is a bit confusing, especially in the context of the previous sentence. Maybe say "If any negative number, the second header need not exist but will be merged with the primary header if it does."?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tried to rewrite.

errstream : `io.StringIO`, optional
Stream to send messages that would normally be sent to standard
error. Defaults to `sys.stderr`. Only used if exceptions are disabled.
can_raise : `bool`, optional
Indicate whether the function can raise and exception (default)
Indicate whether the function can raise an exception (default)
or should return `None` on error. Can still raise if an unexpected
error is encountered.

Expand All @@ -152,8 +154,12 @@ def read_basic_metadata_from_file(file, hdrnum, errstream=sys.stderr, can_raise=
if md is None:
print(f"Unable to open file {file}", file=errstream)
return None
if hdrnum != 0:
mdn = _read_fits_metadata(file, int(hdrnum), can_raise=can_raise)
if hdrnum < 0:
if "EXTEND" in md and md["EXTEND"]:
hdrnum = 1
if hdrnum > 0:
# Allow this to fail
mdn = _read_fits_metadata(file, int(hdrnum), can_raise=False)
# Astropy does not allow append mode since it does not
# convert lists to multiple cards. Overwrite for now
if mdn is not None:
Expand Down
2 changes: 1 addition & 1 deletion python/astro_metadata_translator/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def read_test_file(filename, dir=None):
header = yaml.load(fd, Loader=Loader)
# Cannot directly check for Mapping because PropertyList is not one
if not hasattr(header, "items"):
raise ValueError(f"Contents of YAML file are not a mapping, they are {type(header)}")
raise ValueError(f"Contents of YAML file {filename} are not a mapping, they are {type(header)}")
return header


Expand Down
41 changes: 39 additions & 2 deletions python/astro_metadata_translator/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,13 +462,16 @@ def determine_translator(cls, header, filename=None):
None of the registered translation classes understood the supplied
header.
"""
file_msg = ""
if filename is not None:
file_msg = f" from {filename}"
for name, trans in cls.translators.items():
if trans.can_translate(header, filename=filename):
log.debug(f"Using translation class {name}")
log.debug("Using translation class %s%s", name, file_msg)
return trans
else:
raise ValueError(f"None of the registered translation classes {list(cls.translators.keys())}"
" understood this header")
f" understood this header{file_msg}")

@classmethod
def translator_version(cls):
Expand Down Expand Up @@ -943,6 +946,40 @@ def to_observation_counter(self):
"""
return 0

@classmethod
def read_all_headers(cls, filename, primary=None):
"""Read all relevant headers from the given file.

This should simply return all headers, but for multi-extension FITS
files where each file includes data from multiple detectors,
all the headers for each detector should be returned.

Parameters
----------
filename : `str`
Path to a file in a format understood by this translator.
primary : `dict`-like, optional
The primary header obtained by the caller. This is sometimes
already known, for example if a system is trying to bootstrap
without already knowing what data is in the file. For many
instruments where the primary header is the only relevant
header, the primary header will be returned with no further
action.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 , this is much clearer than the original wording.


Yields
------
headers : iterator of `dict`-like
Each relevant header in turn.
"""
if primary is not None:
yield primary
else:
kfindeisen marked this conversation as resolved.
Show resolved Hide resolved
# Prevent circular import by deferring
from .file_helpers import read_basic_metadata_from_file
kfindeisen marked this conversation as resolved.
Show resolved Hide resolved

# Merge primary and secondary header if they exist.
yield read_basic_metadata_from_file(filename, -1)
kfindeisen marked this conversation as resolved.
Show resolved Hide resolved


def _make_abstract_translator_method(property, doc, return_typedoc, return_type):
"""Create a an abstract translation method for this property.
Expand Down
61 changes: 61 additions & 0 deletions python/astro_metadata_translator/translators/decam.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import posixpath
import logging

from astropy.io import fits
from astropy.coordinates import EarthLocation, Angle
import astropy.units as u

Expand Down Expand Up @@ -297,3 +298,63 @@ def fix_header(cls, header, instrument, obsid, filename=None):
log_label, header["FILTER"], obstype)

return modified

@classmethod
def read_all_headers(cls, filename, primary=None):
"""Read all relevant headers from the given file.

Returns all non-guide headers and does not include the primary
header.

Parameters
----------
filename : `str`
Path to a file in a format understood by this translator.
primary : `dict`-like, optional
The primary header obtained by the caller. This is sometimes
already known, for example if a system is trying to bootstrap
without already knowing what data is in the file. For many
instruments where the primary header is the only relevant
header, the primary header will be returned with no further
action.
timj marked this conversation as resolved.
Show resolved Hide resolved

Yields
------
headers : iterator of `dict`-like
Each relevant header in turn.

Notes
-----
DECam data use INHERIT=T so all returned headers will be merged
with the primary header.
"""
# Circular dependency so must defer import
from ..headers import merge_headers

# If this is not a FITS file return the primary if defined.
# It is likely that the metadata came from a test YAML file.
if not re.search(r"\.fit[s]?\b", filename) and primary is not None:
yield primary
return

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit concerned by how many things read_all_headers appears to be doing other than reading all headers. Can you mention this special case in the base class documentation? It wouldn't even occur to me that it would be valid to call this method on a non-FITS file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the documentation but now that I think about it it might be better to have astrometadata translate skip over this call completely if a test YAML file is encountered.

# Since we want to scan many HDUs we use astropy directly to keep
# the file open rather than continually opening and closing it
# as we go to each HDU.
with fits.open(filename) as fits_file:
# Astropy does not automatically handle the INHERIT=T in
# DECam headers so the primary header must be merged.
first_pass = True

for hdu in fits_file:
if first_pass:
if not primary:
primary = hdu.header
first_pass = False
continue

header = hdu.header
if "CCDNUM" not in header:
kfindeisen marked this conversation as resolved.
Show resolved Hide resolved
continue
if header["CCDNUM"] > 62: # ignore guide CCDs
continue
yield merge_headers([primary, header], mode="overwrite")