Merge pull request #368 from lsst/tickets/DM-14998

DM-14998: Fix documentation on Schema field name conventions.
lsst · Jul 3, 2018 · d1b3a1c · d1b3a1c
2 parents df7162d + ed2d468
commit d1b3a1c
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 32 deletions.
diff --git a/doc/table.dox b/doc/table.dox
@@ -158,17 +158,64 @@ namespace lsst { namespace afw { namespace table {
  *  discussed below).
  *
  *  @section afwTableFieldNames Field Names
- *  By convention, field names are all lowercase and have '.'-separated elements.  Only letters, numbers
- *  and periods should be used.  These rules are not enforced, but names that do not meet these requirements
- *  may not round-trip correctly in FITS (periods are converted to underscores in the FITS persistence
- *  layer, so we cannot distinguish between the two when we read tables from FITS).
- *  Schema provides extra functionality for names with period-separated elements; these elements can
- *  be accessed separately individually with the bracket operators.  More information on schema namespaces
- *  can be found in the Schema and SubSchema class documentation, and the testSchema.py unit test may
- *  also be a useful example.
- *
- *  Other field strings (documentation and units) are essentially arbitrary, but should not contain
- *  single quotes, as these may also confuse FITS parsers (even when escaped).
+ *
+ *  By convention, field names are camel case and have '_'-separated elements.
+ *  Only letters, numbers, and underscores should be used.
+ *  These rules are not enforced, but names that do not meet these requirements
+ *  may not round-trip correctly in FITS.
+ *
+ *  Underscores should be used as a sort of namespace separator (much like '.'
+ *  in Python; we use underscore instead so we don't need to translate field
+ *  names when using them in SQL).  These namespace elements typically indicate
+ *  things like the module or Task containing the algorithm that produced the
+ *  field, the name of the algorithm, and (finally) the name of the particular
+ *  output produced by that algorithm.
+ *
+ *  For example:
+ *
+ *    - `base_SdssShape_xx` is produced by the SdssShape algorithm located in
+ *      the meas_base package (we drop "meas_" since so many algorithms are in
+ *      meas_* packages; see `lsst.meas.base.generateAlgorithmName`), and this
+ *      particular field represents the x-x second-moment of the source.
+ *
+ *    - `deblend_nChild` is produced by `SourceDeblendTask`, where `deblend`
+ *      is an abbreviation of the Task, and `nChild` is a value produced by
+ *      it.  There are no hard rules for how to abbreviate the name of a Task
+ *      when generating its field names; we trust developer judgement in
+ *      selecting a prefix that is both concise and unambiguous.
+ *
+ *  Note that some underscore-separated elements are themselves multiple words,
+ *  such as `SdssShape` or `nChild`, and we use CamelCase, not more underscores,
+ *  to separate words.
+ *  Two rules of thumb are:
+ *
+ *    -  if two words are not individually meaningful (or mean something
+ *       different when separated), join them with CamelCase;
+ *
+ *    -  if the prefix of a field name represents a conceptual group of
+ *       multiple fields, use underscores to join the group name to the group
+ *       elements.
+ *
+ *  We also do not have hard rules about whether words begin with uppercase or
+ *  lowercase, with three exceptions:
+ *
+ *    - The first namespace element (typically a module or Task abbreviation)
+ *      should start with a lowercase letter.
+ *
+ *    - The last namespace element (the name of a particular value) should
+ *      begin with a lowercase letter.
+ *
+ *    - Names that correspond to a Python or C++ class that produce the field
+ *      should start with uppercase (to match the name of the class).
+ *
+ *  Schema provides extra functionality for names with underscore-separated
+ *  elements; these elements can be accessed individually with the
+ *  bracket operators.  More information on this behavior can be found in the
+ *  Schema and SubSchema class documentation.
+ *
+ *  Other field strings (documentation and units) are essentially arbitrary,
+ *  but should not contain single quotes, as these may also confuse FITS
+ *  parsers (even when escaped).
  *
  *  @section afwTableVariableLengthArrays Variable-Length Arrays
  *

diff --git a/include/lsst/afw/table/Field.h b/include/lsst/afw/table/Field.h
@@ -29,11 +29,8 @@ struct Field : public FieldBase<T> {
      *  Construct a new field.
      *
      *  @param[in]  name         Name of the field.  Schemas provide extra functionality for names
-     *                           whose components are separated by periods.  It may also be practical
-     *                           to limit field names to lowercase letters, numbers, and periods,
-     *                           as only those names can be round-tripped with FITS I/O (periods are
-     *                           converted to underscores in FITS, but hence cannot be distinguished
-     *                           from underscores in field names).
+     *                           whose components are separated by underscores.  Field names should
+     *                           be limited to letters, numbers, and underscores.
      *  @param[in]  doc          Documentation for the field.  Should not contain single-quotes
      *                           to avoid FITS round-trip problems.
      *  @param[in]  units        Units for the field.  Should not contain single-quotes
@@ -52,11 +49,8 @@ struct Field : public FieldBase<T> {
      *  Construct a new field.
      *
      *  @param[in]  name         Name of the field.  Schemas provide extra functionality for names
-     *                           whose components are separated by periods.  It may also be practical
-     *                           to limit field names to lowercase letters, numbers, and periods,
-     *                           as only those names can be round-tripped with FITS I/O (periods are
-     *                           converted to underscores in FITS, but hence cannot be distinguished
-     *                           from underscores in field names).
+     *                           whose components are separated by underscores.  Field names should
+     *                           be limited to letters, numbers, and underscores.
      *  @param[in]  doc          Documentation for the field.
      *  @param[in]  size         Size of the field as an integer, if appropriate.  Field types that
      *                           accept a size have a FieldBase that is implicitly constructable from

diff --git a/include/lsst/afw/table/Schema.h b/include/lsst/afw/table/Schema.h
@@ -32,11 +32,13 @@ class BaseRecord;
  *  Because offsets for fields are assigned when the field is added to the Schema,
  *  Schemas do not support removing fields, though they do allow renaming.
  *
- *  Field names in Schemas are expected to be dot-separated names (e.g. 'a.b.c').  The SubSchema
+ *  Field names in Schemas are expected to be underscore-separated names (e.g. 'a_b_c',
+ *  but see @ref afwTableFieldNames for the full conventions, including when to use
+ *  underscores vs. CamelCase).  The SubSchema
  *  class and Schema::operator[] provide a heirarchical interface to these names, but are
  *  implemented entirely as string splitting/joining operations that ultimately forward to
  *  member functions that operate on the fully-qualified field name, so there is no requirement
- *  that names be separated by periods, and no performance advantage to using a SubSchema.
+ *  that names be separated by underscores, and no performance advantage to using a SubSchema.
  *
  *  A SchemaMapper object can be used to define a relationship between two Schemas to be used
  *  when copying values from one table to another or loading/saving selected fields to disk.
@@ -132,8 +134,8 @@ class Schema {
      *  Return a set of field names in the schema.
      *
      *  If topOnly==true, return a unique list of only the part
-     *  of the names before the first period.  For example,
-     *  if the full list of field names is ['a.b.c', 'a.d', 'e.f'],
+     *  of the names before the first underscore.  For example,
+     *  if the full list of field names is ['a_b_c', 'a_d', 'e_f'],
      *  topOnly==true will return ['a', 'e'].
      *
      *  Returns an instance of Python's builtin set in Python.
@@ -323,7 +325,7 @@ class Schema {
 /**
  *  A proxy type for name lookups in a Schema.
  *
- *  Elements of schema names are assumed to be separated by periods ("a.b.c.d");
+ *  Elements of schema names are assumed to be separated by underscores ("a_b_c");
  *  an incomplete lookup is one that does not resolve to a field.  Not that even
  *  complete lookups can have nested names; a Point field, for instance, has "x"
  *  and "y" nested names.
@@ -341,16 +343,16 @@ class Schema {
  *  Some examples:
  *
  *      Schema schema(false);
- *      Key<int> a_i = schema.addField<int>("a.i", "integer field");
- *      Key< Point<double> > a_p = schema.addField< Point<double> >("a.p", "point field");
+ *      Key<int> a_i = schema.addField<int>("a_i", "integer field");
+ *      Key< Point<double> > a_p = schema.addField< Point<double> >("a_p", "point field");
  *
- *      assert(schema["a.i"] == a_i);
+ *      assert(schema["a_i"] == a_i);
  *      SubSchema a = schema["a"];
  *      assert(a["i"] == a_i);
- *      Field<int> f_a_i = schema["a.i"];
+ *      Field<int> f_a_i = schema["a_i"];
  *      assert(f_a_i.getDoc() == "integer field");
- *      assert(schema["a.i"] == "a.i");
- *      assert(schema.find("a.p.x") == a_p.getX());
+ *      assert(schema["a_i"] == "a_i");
+ *      assert(schema.find("a_p_x") == a_p.getX());
  */
 class SubSchema {
     typedef detail::SchemaImpl Impl;