Support TZ and OF format codes in to_timestamp().

Formerly, these were only supported in to_char(), but there seems little reason for that restriction. We should at least have enough support to permit round-tripping the output of to_char(). In that spirit, TZ accepts either zone abbreviations or numeric (HH or HH:MM) offsets, which are the cases that to_char() can output. In an ideal world we'd make it take full zone names too, but that seems like it'd introduce an unreasonable amount of ambiguity, since the rules for POSIX-spec zone names are so lax. OF is a subset of this, accepting only HH or HH:MM. One small benefit of this improvement is that we can simplify jsonpath's executeDateTimeMethod function, which no longer needs to consider the HH and HH:MM cases separately. Moreover, letting it accept zone abbreviations means it will accept "Z" to mean UTC, which is emitted by JSON.stringify() for example. Patch by me, reviewed by Aleksander Alekseev and Daniel Gustafsson Discussion: https://postgr.es/m/1681086.1686673242@sss.pgh.pa.us
postgres · Jan 25, 2024 · 8ba6fdf · 8ba6fdf
1 parent 06a66d8
commit 8ba6fdf
Show file tree

Hide file tree

Showing 9 changed files with 318 additions and 74 deletions.
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
@@ -8131,13 +8131,11 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
        </row>
        <row>
         <entry><literal>TZ</literal></entry>
-        <entry>upper case time-zone abbreviation
-         (only supported in <function>to_char</function>)</entry>
+        <entry>upper case time-zone abbreviation</entry>
        </row>
        <row>
         <entry><literal>tz</literal></entry>
-        <entry>lower case time-zone abbreviation
-         (only supported in <function>to_char</function>)</entry>
+        <entry>lower case time-zone abbreviation</entry>
        </row>
        <row>
        <entry><literal>TZH</literal></entry>
@@ -8149,8 +8147,8 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
        </row>
        <row>
         <entry><literal>OF</literal></entry>
-        <entry>time-zone offset from UTC
-         (only supported in <function>to_char</function>)</entry>
+        <entry>time-zone offset from UTC (<replaceable>HH</replaceable>
+         or <replaceable>HH</replaceable><literal>:</literal><replaceable>MM</replaceable>)</entry>
        </row>
       </tbody>
      </tgroup>

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
@@ -3246,6 +3246,82 @@ DecodeTimezoneNameToTz(const char *tzname)
 	return result;
 }
 
+/* DecodeTimezoneAbbrevPrefix()
+ * Interpret prefix of string as a timezone abbreviation, if possible.
+ *
+ * This has roughly the same functionality as DecodeTimezoneAbbrev(),
+ * but the API is adapted to the needs of formatting.c.  Notably,
+ * we will match the longest possible prefix of the given string
+ * rather than insisting on a complete match, and downcasing is applied
+ * here rather than in the caller.
+ *
+ * Returns the length of the timezone abbreviation, or -1 if not recognized.
+ * On success, sets *offset to the GMT offset for the abbreviation if it
+ * is a fixed-offset abbreviation, or sets *tz to the pg_tz struct for
+ * a dynamic abbreviation.
+ */
+int
+DecodeTimezoneAbbrevPrefix(const char *str, int *offset, pg_tz **tz)
+{
+	char		lowtoken[TOKMAXLEN + 1];
+	int			len;
+
+	*offset = 0;				/* avoid uninitialized vars on failure */
+	*tz = NULL;
+
+	if (!zoneabbrevtbl)
+		return -1;				/* no abbrevs known, so fail immediately */
+
+	/* Downcase as much of the string as we could need */
+	for (len = 0; len < TOKMAXLEN; len++)
+	{
+		if (*str == '\0' || !isalpha((unsigned char) *str))
+			break;
+		lowtoken[len] = pg_tolower((unsigned char) *str++);
+	}
+	lowtoken[len] = '\0';
+
+	/*
+	 * We could avoid doing repeated binary searches if we cared to duplicate
+	 * datebsearch here, but it's not clear that such an optimization would be
+	 * worth the trouble.  In common cases there's probably not anything after
+	 * the zone abbrev anyway.  So just search with successively truncated
+	 * strings.
+	 */
+	while (len > 0)
+	{
+		const datetkn *tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs,
+										zoneabbrevtbl->numabbrevs);
+
+		if (tp != NULL)
+		{
+			if (tp->type == DYNTZ)
+			{
+				DateTimeErrorExtra extra;
+				pg_tz	   *tzp = FetchDynamicTimeZone(zoneabbrevtbl, tp,
+													   &extra);
+
+				if (tzp != NULL)
+				{
+					/* Caller must resolve the abbrev's current meaning */
+					*tz = tzp;
+					return len;
+				}
+			}
+			else
+			{
+				/* Fixed-offset zone abbrev, so it's easy */
+				*offset = tp->value;
+				return len;
+			}
+		}
+		lowtoken[--len] = '\0';
+	}
+
+	/* Did not find a match */
+	return -1;
+}
+
 
 /* ClearPgItmIn
  *