Avoid character classification in regex escape parsing.

For regex escape sequences, just test directly for the relevant ASCII characters rather than using locale-sensitive character classification. This fixes an assertion failure when a locale considers a non-ASCII character, such as "൧", to be a digit. Reported-by: Richard Guo Discussion: https://postgr.es/m/CAMbWs49Q6UoKGeT8pBkMtJGJd+16CBFZaaWUk9Du+2ERE5g_YA@mail.gmail.com Backpatch-through: 11
postgres · Apr 21, 2023 · 106a1bf · 106a1bf
1 parent c976ccc
commit 106a1bf
Showing 1 changed file with 10 additions and 3 deletions.
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c
@@ -750,7 +750,11 @@ lexescape(struct vars *v)
 
 	assert(!ATEOS());
 	c = *v->now++;
-	if (!iscalnum(c))
+
+	/* if it's not alphanumeric ASCII, treat it as a plain character */
+	if (!('a' <= c && c <= 'z') &&
+		!('A' <= c && c <= 'Z') &&
+		!('0' <= c && c <= '9'))
 		RETV(PLAIN, c);
 
 	NOTE(REG_UNONPOSIX);
@@ -892,8 +896,11 @@ lexescape(struct vars *v)
 			RETV(PLAIN, c);
 			break;
 		default:
-			assert(iscalpha(c));
-			FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+			/*
+			 * Throw an error for unrecognized ASCII alpha escape sequences,
+			 * which reserves them for future use if needed.
+			 */
+			FAILW(REG_EESCAPE);
 			break;
 	}
 	assert(NOTREACHED);