From 106a1bf824ce80c434dbd4cee94e8b66e07e1231 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 21 Apr 2023 08:19:41 -0700 Subject: [PATCH] Avoid character classification in regex escape parsing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For regex escape sequences, just test directly for the relevant ASCII characters rather than using locale-sensitive character classification. This fixes an assertion failure when a locale considers a non-ASCII character, such as "൧", to be a digit. Reported-by: Richard Guo Discussion: https://postgr.es/m/CAMbWs49Q6UoKGeT8pBkMtJGJd+16CBFZaaWUk9Du+2ERE5g_YA@mail.gmail.com Backpatch-through: 11 --- src/backend/regex/regc_lex.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index d573032db67a6..4e6fa5c61325d 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -750,7 +750,11 @@ lexescape(struct vars *v) assert(!ATEOS()); c = *v->now++; - if (!iscalnum(c)) + + /* if it's not alphanumeric ASCII, treat it as a plain character */ + if (!('a' <= c && c <= 'z') && + !('A' <= c && c <= 'Z') && + !('0' <= c && c <= '9')) RETV(PLAIN, c); NOTE(REG_UNONPOSIX); @@ -892,8 +896,11 @@ lexescape(struct vars *v) RETV(PLAIN, c); break; default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ + /* + * Throw an error for unrecognized ASCII alpha escape sequences, + * which reserves them for future use if needed. + */ + FAILW(REG_EESCAPE); break; } assert(NOTREACHED);