ICU-21184 rephrase docs/comments using the term grandfathered

srl295 · Aug 21, 2020 · 39da689 · 39da689
1 parent cde54fc
commit 39da689
Show file tree

Hide file tree

Showing 18 changed files with 138 additions and 287 deletions.
diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
@@ -1025,13 +1025,14 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
         return result;
     }
 
-    // If a BCP-47 language tag is passed as the language parameter to the
+    // If a BCP 47 language tag is passed as the language parameter to the
     // normal Locale constructor, it will actually fall back to invoking
     // uloc_forLanguageTag() to parse it if it somehow is able to detect that
-    // the string actually is BCP-47. This works well for things like strings
-    // using BCP-47 extensions, but it does not at all work for things like
-    // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
-    // interpret as ICU locale IDs and because of that won't trigger the BCP-47
+    // the string actually is BCP 47. This works well for things like strings
+    // using BCP 47 extensions, but it does not at all work for things like
+    // legacy language tags (marked as “Type: grandfathered” in BCP 47,
+    // e.g., "en-GB-oed") which are possible to also
+    // interpret as ICU locale IDs and because of that won't trigger the BCP 47
     // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
     // and then Locale::init(), instead of just calling the normal constructor.
 

diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
@@ -53,7 +53,7 @@ typedef struct ULanguageTag {
     VariantListEntry    *variants;
     ExtensionListEntry  *extensions;
     const char          *privateuse;
-    const char          *grandfathered;
+    const char          *legacy;
 } ULanguageTag;
 
 #define MINLEN 2
@@ -85,8 +85,9 @@ static const char LOCALE_TYPE_YES[] = "yes";
  Updated on 2018-09-12 from
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
 
- This table has 2 parts. The parts for Grandfathered tags is generated by the
- following scripts from the IANA language tag registry.
+ This table has 2 parts. The part for
+ legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ is generated by the following scripts from the IANA language tag registry.
 
  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
  egrep -A 7 'Type: grandfathered' | \
@@ -100,8 +101,8 @@ static const char LOCALE_TYPE_YES[] = "yes";
  values. They may have to be removed for the strict BCP 47 compliance.
 
 */
-static const char* const GRANDFATHERED[] = {
-/*  grandfathered   preferred */
+static const char* const LEGACY[] = {
+/*  legacy          preferred */
     "art-lojban",   "jbo",
     "en-gb-oed",    "en-gb-oxendict",
     "i-ami",        "ami",
@@ -124,7 +125,7 @@ static const char* const GRANDFATHERED[] = {
     "zh-min-nan",   "nan",
     "zh-xiang",     "hsn",
 
-    // Grandfathered tags with no preferred value in the IANA
+    // Legacy tags with no preferred value in the IANA
     // registry. Kept for now for the backward compatibility
     // because ICU has mapped them this way.
     "cel-gaulish",  "xtg-x-cel-gaulish",
@@ -346,7 +347,7 @@ ultag_getPrivateUse(const ULanguageTag* langtag);
 
 #if 0
 static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag);
+ultag_getLegacy(const ULanguageTag* langtag);
 #endif
 
 U_NAMESPACE_BEGIN
@@ -986,7 +987,7 @@ _initializeULanguageTag(ULanguageTag* langtag) {
     langtag->variants = NULL;
     langtag->extensions = NULL;
 
-    langtag->grandfathered = EMPTY;
+    langtag->legacy = EMPTY;
     langtag->privateuse = EMPTY;
 }
 
@@ -2042,7 +2043,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     char *pExtValueSubtag, *pExtValueSubtagEnd;
     int32_t i;
     UBool privateuseVar = FALSE;
-    int32_t grandfatheredLen = 0;
+    int32_t legacyLen = 0;
 
     if (parsedLen != NULL) {
         *parsedLen = 0;
@@ -2082,25 +2083,25 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     }
 
     size_t parsedLenDelta = 0;
-    // Grandfathered tag will be consider together. Grandfathered tag with intervening
+    // Legacy tag will be consider together. Legacy tag with intervening
     // script and region such as art-DE-lojban or art-Latn-lojban won't be
     // matched.
-    /* check if the tag is grandfathered */
-    for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
-        int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
-        if (tagLen < checkGrandfatheredLen) {
+    /* check if the tag is legacy */
+    for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) {
+        int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i]));
+        if (tagLen < checkLegacyLen) {
             continue;
         }
-        if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
+        if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') {
             // make sure next char is '-'.
             continue;
         }
-        if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
+        if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) {
             int32_t newTagLength;
 
-            grandfatheredLen = checkGrandfatheredLen;  /* back up for output parsedLen */
-            int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
-            newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
+            legacyLen = checkLegacyLen;  /* back up for output parsedLen */
+            int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
+            newTagLength = replacementLen + tagLen - checkLegacyLen;
             if (tagLen < newTagLength) {
                 uprv_free(tagBuf);
                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
@@ -2111,16 +2112,16 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
                 t->buf = tagBuf;
                 tagLen = newTagLength;
             }
-            parsedLenDelta = checkGrandfatheredLen - replacementLen;
-            uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
-            if (checkGrandfatheredLen != tagLen) {
-                uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
+            parsedLenDelta = checkLegacyLen - replacementLen;
+            uprv_strcpy(t->buf, LEGACY[i + 1]);
+            if (checkLegacyLen != tagLen) {
+                uprv_strcpy(t->buf + replacementLen, tag + checkLegacyLen);
             }
             break;
         }
     }
 
-    if (grandfatheredLen == 0) {
+    if (legacyLen == 0) {
         for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
             const char* redundantTag = REDUNDANT[i];
             size_t redundantTagLen = uprv_strlen(redundantTag);
@@ -2608,8 +2609,8 @@ ultag_getPrivateUse(const ULanguageTag* langtag) {
 
 #if 0
 static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag) {
-    return langtag->grandfathered;
+ultag_getLegacy(const ULanguageTag* langtag) {
+    return langtag->legacy;
 }
 #endif
 

diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
@@ -109,13 +109,17 @@ ulocimp_toLanguageTag(const char* localeID,
  * If the specified language tag contains any ill-formed subtags,
  * the first such subtag and all following subtags are ignored.
  * <p>
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags.  Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist.  Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
  * the first paragraph, so some information might be lost.
+ *
  * @param langtag   the input BCP47 language tag.
  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
  * @param sink      the output sink receiving a locale ID for the

diff --git a/icu4c/source/common/unicode/localebuilder.h b/icu4c/source/common/unicode/localebuilder.h
@@ -92,11 +92,12 @@ class U_COMMON_API LocaleBuilder : public UObject {
     /**
      * Resets the LocaleBuilder to match the provided
      * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
-     * Discards the existing state. the empty string cause the builder to be
-     * reset, like {@link #clear}.  Grandfathered tags are converted to their
-     * canonical form before being processed.  Otherwise, the <code>language
-     * tag</code> must be well-formed, or else the build() method will later
-     * report an U_ILLEGAL_ARGUMENT_ERROR.
+     * Discards the existing state.
+     * The empty string causes the builder to be reset, like {@link #clear}.
+     * Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+     * are converted to their canonical form before being processed.
+     * Otherwise, the <code>language tag</code> must be well-formed,
+     * or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR.
      *
      * <p>This method clears the internal UErrorCode.
      *

diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h
@@ -393,13 +393,17 @@ class U_COMMON_API Locale : public UObject {
      * If the specified language tag contains any ill-formed subtags,
      * the first such subtag and all following subtags are ignored.
      * <p>
-     * This implements the 'Language-Tag' production of BCP47, and so
-     * supports grandfathered (regular and irregular) as well as private
-     * use language tags.  Private use tags are represented as 'x-whatever',
-     * and grandfathered tags are converted to their canonical replacements
-     * where they exist.  Note that a few grandfathered tags have no modern
-     * replacement, these will be converted using the fallback described in
+     * This implements the 'Language-Tag' production of BCP 47, and so
+     * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+     * (regular and irregular) as well as private use language tags.
+     *
+     * Private use tags are represented as 'x-whatever',
+     * and legacy tags are converted to their canonical replacements where they exist.
+     *
+     * Note that a few legacy tags have no modern replacement;
+     * these will be converted using the fallback described in
      * the first paragraph, so some information might be lost.
+     *
      * @param tag     the input BCP47 language tag.
      * @param status  error information if creating the Locale failed.
      * @return        the Locale for the specified BCP47 language tag.

diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h
@@ -1237,14 +1237,18 @@ uloc_minimizeSubtags(const char*    localeID,
  * Returns a locale ID for the specified BCP47 language tag string.
  * If the specified language tag contains any ill-formed subtags,
  * the first such subtag and all following subtags are ignored.
- * <p> 
- * This implements the 'Language-Tag' production of BCP47, and so
- * supports grandfathered (regular and irregular) as well as private
- * use language tags.  Private use tags are represented as 'x-whatever',
- * and grandfathered tags are converted to their canonical replacements
- * where they exist.  Note that a few grandfathered tags have no modern
- * replacement, these will be converted using the fallback described in
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
  * the first paragraph, so some information might be lost.
+ *
  * @param langtag   the input BCP47 language tag.
  * @param localeID  the output buffer receiving a locale ID for the
  *                  specified BCP47 language tag.

diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp
@@ -266,7 +266,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) {
     //TODO: ULOC_FULL_NAME is out of date and too small..
     char canonicalName[256];
 
-    // canonicalize, so grandfathered variant will be transformed to keywords
+    // Canonicalize, so that an old-style variant will be transformed to keywords.
     // e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
     // NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and
     // the Gregorian calendar is returned instead.

diff --git a/icu4c/source/i18n/fmtable_cnv.cpp b/icu4c/source/i18n/fmtable_cnv.cpp
@@ -30,8 +30,6 @@ U_NAMESPACE_BEGIN
 // -------------------------------------
 // Creates a formattable object with a char* string.
 // This API is useless. The API that takes a UnicodeString is actually just as good.
-// This is just a grandfathered API.
-
 Formattable::Formattable(const char* stringToCopy)
 {
     init();

diff --git a/icu4c/source/test/testdata/localeMatcherTest.txt b/icu4c/source/test/testdata/localeMatcherTest.txt
@@ -279,7 +279,7 @@ und-TW >> zh-Hant
 zh-Hant >> und-TW
 zh >> und-TW
 
-** test: testMatchGrandfatheredCode
+** test: testMatchLegacyCode
 
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
@@ -984,7 +984,7 @@ x-bork >> x-bork
 x-piglatin >> x-bork
 x-bork >> x-bork
 
-** test: MatchGrandfatheredCode
+** test: MatchLegacyCode
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh
@@ -1525,7 +1525,7 @@ en >> null
 x-piglatin >> fr
 x-bork >> x-bork
 
-** test: grandfathered codes
+** test: legacy codes
 @supported=fr, i-klingon, en-Latn-US
 en-GB-oed >> en-Latn-US
 i-klingon >> tlh

diff --git a/...ipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java b/...ipse-build/plugins.template/com.ibm.icu.base/src/com/ibm/icu/impl/locale/LanguageTag.java
@@ -38,13 +38,13 @@ public class LanguageTag {
     private List<String> _variants = Collections.emptyList();   // variant subtags
     private List<String> _extensions = Collections.emptyList(); // extensions
 
-    // Map contains grandfathered tags and its preferred mappings from
-    // http://www.ietf.org/rfc/rfc5646.txt
-    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
+    // The Map contains legacy language tags (marked as “Type: grandfathered” in BCP 47)
+    // and their preferred mappings from BCP 47.
+    private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> LEGACY =
         new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
 
     static {
-        // grandfathered = irregular           ; non-redundant tags registered
+        // legacy        = irregular           ; non-redundant tags registered
         //               / regular             ; during the RFC 3066 era
         //
         // irregular     = "en-GB-oed"         ; irregular tags do not match
@@ -105,57 +105,17 @@ public class LanguageTag {
             {"zh-xiang",    "hsn"},
         };
         for (String[] e : entries) {
-            GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
+            LEGACY.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
         }
     }
 
     private LanguageTag() {
     }
 
-    /*
-     * BNF in RFC5464
-     *  
-     * Language-Tag  = langtag             ; normal language tags
-     *               / privateuse          ; private use tag
-     *               / grandfathered       ; grandfathered tags
-     *
-     * 
-     * langtag       = language
-     *                 ["-" script]
-     *                 ["-" region]
-     *                 *("-" variant)
-     *                 *("-" extension)
-     *                 ["-" privateuse]
-     * 
-     * language      = 2*3ALPHA            ; shortest ISO 639 code
-     *                 ["-" extlang]       ; sometimes followed by
-     *                                     ; extended language subtags
-     *               / 4ALPHA              ; or reserved for future use
-     *               / 5*8ALPHA            ; or registered language subtag
-     * 
-     * extlang       = 3ALPHA              ; selected ISO 639 codes
-     *                 *2("-" 3ALPHA)      ; permanently reserved
-     * 
-     * script        = 4ALPHA              ; ISO 15924 code
-     * 
-     * region        = 2ALPHA              ; ISO 3166-1 code
-     *               / 3DIGIT              ; UN M.49 code
-     * 
-     * variant       = 5*8alphanum         ; registered variants
-     *               / (DIGIT 3alphanum)
-     * 
-     * extension     = singleton 1*("-" (2*8alphanum))
-     * 
-     *                                     ; Single alphanumerics
-     *                                     ; "x" reserved for private use
-     * singleton     = DIGIT               ; 0 - 9
-     *               / %x41-57             ; A - W
-     *               / %x59-5A             ; Y - Z
-     *               / %x61-77             ; a - w
-     *               / %x79-7A             ; y - z
-     * 
-     * privateuse    = "x" 1*("-" (1*8alphanum))
-     * 
+    /**
+     * See BCP 47 “Tags for Identifying Languages”:
+     * https://www.rfc-editor.org/info/bcp47 -->
+     * https://www.rfc-editor.org/rfc/rfc5646.html#section-2.1
      */
     public static LanguageTag parse(String languageTag, ParseStatus sts) {
         if (sts == null) {
@@ -166,8 +126,7 @@ public static LanguageTag parse(String languageTag, ParseStatus sts) {
 
         StringTokenIterator itr;
 
-        // Check if the tag is grandfathered
-        String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+        String[] gfmap = LEGACY.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
         if (gfmap != null) {
             // use preferred mapping
             itr = new StringTokenIterator(gfmap[1], SEP);