Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-15857 Use correct aliases for old schemas #397

Merged
merged 3 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
86 changes: 42 additions & 44 deletions src/table/io/FitsSchemaInputMapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <cstdint>
#include <cstdio>
#include <string>
#include <algorithm>
#include <cctype>

#include "boost/regex.hpp"
#include "boost/multi_index_container.hpp"
Expand Down Expand Up @@ -81,6 +83,7 @@ class FitsSchemaInputMapper::Impl {
Impl() : version(0), flagColumn(0), archiveHdu(-1) {}

int version;
std::string type;
int flagColumn;
int archiveHdu;
Schema schema;
Expand All @@ -99,6 +102,7 @@ FitsSchemaInputMapper::FitsSchemaInputMapper(daf::base::PropertyList &metadata,
_impl->version = lsst::afw::table::Schema::VERSION;
}
_impl->version = metadata.get("AFW_TABLE_VERSION", _impl->version);
_impl->type = metadata.get("AFW_TYPE", _impl->type);
Copy link
Contributor Author

@parejkoj parejkoj Oct 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although it's no longer used directly, we might as well keep it around, I think?

if (stripMetadata) {
metadata.remove("AFW_TABLE_VERSION");
}
Expand Down Expand Up @@ -708,9 +712,21 @@ bool endswith(std::string const &s, std::string const &suffix) {
return s.size() >= suffix.size() && s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
}

// Replace the last n characters of a string with a new suffix string, returning the result as a new string
std::string replaceSuffix(std::string const &s, std::size_t n, std::string const &suffix) {
return s.substr(0, s.size() - n) + suffix;
bool isInstFlux(FitsSchemaItem const & item) {
// helper lambda to make reading the real logic easier
auto includes = [](std::string const & s, char const * target) {
return s.find(target) != std::string::npos;
};
if (!includes(item.ttype, "flux")) return false;
// transform units to lowercase.
std::string units(item.tunit);
std::transform(units.begin(), units.end(), units.begin(), [](char c) { return std::tolower(c); } );
return includes(units, "count") || includes(units, "dn") || includes (units, "adu");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh joy: we use all of these units? Have we standardized this yet, or do we need an RFC for it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We certainly don't anymore. I'm pretty sure that at one point we used dn, but that may predate afw::table, and I'm not certain we ever used adu. I was just trying to be thorough.

}

// Replace 'from' with 'to' in 'full', returning the result.
std::string replace(std::string full, std::string const & from, std::string const & to) {
return full.replace(full.find(from), from.size(), to);
}

} // namespace
Expand All @@ -736,10 +752,10 @@ Schema FitsSchemaInputMapper::finalize() {
// Of course, we'll try to recreate that alias every time we handle another flag field with
// the same prefix, but AliasMap know hows to handle that no-op set.
aliases.set(prefix + "flags", prefix + "flag");
} else if (iter->ttype.find("flux") != std::string::npos) {
} else if (isInstFlux(*iter)) {
// Create an alias that resolves "X_instFlux" to "X" or "X_instFluxErr" to "X_err".
if (endswith(iter->ttype, "_err")) {
aliases.set(replaceSuffix(iter->ttype, 4, "_instFluxErr"), iter->ttype);
aliases.set(replace(iter->ttype, "_err", "_instFluxErr"), iter->ttype);
} else {
aliases.set(iter->ttype + "_instFlux", iter->ttype);
}
Expand All @@ -750,52 +766,34 @@ Schema FitsSchemaInputMapper::finalize() {
// centroid and shape values themselves, as those will automatically be correct
// after the PointConversionReader and MomentsConversionReader do their work.
if (iter->tccls == "Covariance(Point)") {
aliases.set(replaceSuffix(iter->ttype, 4, "_xErr"), iter->ttype + "_xErr");
aliases.set(replaceSuffix(iter->ttype, 4, "_yErr"), iter->ttype + "_yErr");
aliases.set(replaceSuffix(iter->ttype, 4, "_x_y_Cov"), iter->ttype + "_x_y_Cov");
aliases.set(replace(iter->ttype, "_err", "_yErr"), iter->ttype + "_yErr");
aliases.set(replace(iter->ttype, "_err", "_xErr"), iter->ttype + "_xErr");
aliases.set(replace(iter->ttype, "_err", "_x_y_Cov"), iter->ttype + "_x_y_Cov");
} else if (iter->tccls == "Covariance(Moments)") {
aliases.set(replaceSuffix(iter->ttype, 4, "_xxErr"), iter->ttype + "_xxErr");
aliases.set(replaceSuffix(iter->ttype, 4, "_yyErr"), iter->ttype + "_yyErr");
aliases.set(replaceSuffix(iter->ttype, 4, "_xyErr"), iter->ttype + "_xyErr");
aliases.set(replaceSuffix(iter->ttype, 4, "_xx_yy_Cov"), iter->ttype + "_xx_yy_Cov");
aliases.set(replaceSuffix(iter->ttype, 4, "_xx_xy_Cov"), iter->ttype + "_xx_xy_Cov");
aliases.set(replaceSuffix(iter->ttype, 4, "_yy_xy_Cov"), iter->ttype + "_yy_xy_Cov");
aliases.set(replace(iter->ttype, "_err", "_xxErr"), iter->ttype + "_xxErr");
aliases.set(replace(iter->ttype, "_err", "_yyErr"), iter->ttype + "_yyErr");
aliases.set(replace(iter->ttype, "_err", "_xyErr"), iter->ttype + "_xyErr");
aliases.set(replace(iter->ttype, "_err", "_xx_yy_Cov"), iter->ttype + "_xx_yy_Cov");
aliases.set(replace(iter->ttype, "_err", "_xx_xy_Cov"), iter->ttype + "_xx_xy_Cov");
aliases.set(replace(iter->ttype, "_err", "_yy_xy_Cov"), iter->ttype + "_yy_xy_Cov");
}
}
}
}
if (_impl->version == 1) {
// Version 1 tables use Sigma when we should use Err (see RFC-333) and had no fields
// that should have been named Sigma. So provide aliases xErr -> xSigma
} else if (_impl->version < 3) {
// Version == 1 tables use Sigma when we should use Err (see RFC-333) and had no fields
// that should have been named Sigma. So provide aliases xErr -> xSigma.
// Version <= 2 tables used _flux when we should use _instFlux (see RFC-322).
AliasMap &aliases = *_impl->schema.getAliasMap();
for (auto iter = _impl->asList().begin(); iter != _impl->asList().end(); ++iter) {
if (iter->ttype.find("flux") != std::string::npos) {
// Create an alias that resolves "X_instFlux" to "X_flux" or "X_instFluxErr" to "X_fluxSigma".
if (endswith(iter->ttype, "fluxSigma")) {
// replace "fluxSigma"->"instFluxErr"
aliases.set(replaceSuffix(iter->ttype, 9, "instFluxErr"), iter->ttype);
} else {
// replace "flux"->"instFlux"
aliases.set(replaceSuffix(iter->ttype, 4, "instFlux"), iter->ttype);
}
} else if (endswith(iter->ttype, "Sigma")) {
aliases.set(replaceSuffix(iter->ttype, 5, "Err"), iter->ttype);
std::string name = iter->ttype;
if (_impl->version < 2 && endswith(name, "Sigma")) {
name = replace(std::move(name), "Sigma", "Err");
}
}
}
if (_impl->version == 2) {
// Version 2 tables used _flux when we should use _instFlux (see RFC-322).
AliasMap &aliases = *_impl->schema.getAliasMap();
for (auto iter = _impl->asList().begin(); iter != _impl->asList().end(); ++iter) {
if (iter->ttype.find("flux") != std::string::npos) {
// Create an alias that resolves "X_instFlux" to "X_flux" or "X_instFluxErr" to "X_fluxErr".
if (endswith(iter->ttype, "fluxErr")) {
// replace "fluxErr"->"instFluxErr"
aliases.set(replaceSuffix(iter->ttype, 9, "instFluxErr"), iter->ttype);
} else {
// replace "flux"->"instFlux"
aliases.set(replaceSuffix(iter->ttype, 4, "instFlux"), iter->ttype);
}
if (_impl->version < 3 && isInstFlux(*iter)) {
name = replace(std::move(name), "flux", "instFlux");
}
if (name != iter->ttype) {
aliases.set(name, iter->ttype);
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions tests/data/ps1-refcat-v1.fits

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions tests/data/sourceCatalog-hsc-v2.fits

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/data/sourceTable-v1.fits

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/data/sourceTable-v2.fits

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions tests/test_simpleTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@
display = False


# Testing files live under this, in `data/`.
testPath = os.path.abspath(os.path.dirname(__file__))


def makeArray(size, dtype):
return np.array(np.random.randn(size), dtype=dtype)

Expand Down Expand Up @@ -757,6 +761,16 @@ def testCompoundFieldFitsConversion(self):
self.assertFloatsAlmostEqual(record2.get(covMKey),
covValues["cov_m"], rtol=1E-6)

def testFitsReadVersion1Compatibility(self):
"""Test that v1 SimpleCatalogs read from FITS get correct aliases."""
filename = os.path.join(testPath, "data", "ps1-refcat-v1.fits")
catalog = lsst.afw.table.SimpleCatalog.readFits(filename)
self.assertIn('g_flux', catalog.schema)
self.assertNotIn('g_instFlux', catalog.schema)

self.assertIn('g_fluxErr', catalog.schema)
self.assertNotIn('g_instFluxErr', catalog.schema)

def testDelete(self):
schema = lsst.afw.table.Schema()
key = schema.addField("a", type=np.float64, doc="doc for 'a'")
Expand Down
8 changes: 8 additions & 0 deletions tests/test_sourceTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,14 @@ def testFitsReadVersion1Compatibility(self):
self.assertEqual(cat.schema["a_flux"].asKey(), cat.schema["a_instFlux"].asKey())
self.assertEqual(cat.schema["a_fluxSigma"].asKey(), cat.schema["a_instFluxErr"].asKey())

def testFitsReadVersion2CompatibilityRealSourceCatalog(self):
"""DM-15891: some fields were getting aliases they shouldn't have."""
cat = lsst.afw.table.SourceCatalog.readFits(
os.path.join(testPath, "data", "sourceCatalog-hsc-v2.fits"))
self.assertNotIn('base_SdssShape_flux_xxinstFlux', cat.schema)
self.assertIn('base_SdssShape_instFlux_xx_Cov', cat.schema)
self.assertNotIn('base_Blendedness_abs_flux_cinstFlux', cat.schema)

def testFitsVersion2Compatibility(self):
"""Test reading of catalogs with version 2 schema

Expand Down