diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 7a95465111ad1..9a00499510929 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -1846,7 +1846,7 @@ skip_white_space(const char *query) while (*query) { - int mblen = PQmblen(query, pset.encoding); + int mblen = PQmblenBounded(query, pset.encoding); /* * Note: we assume the encoding is a superset of ASCII, so that for @@ -1883,7 +1883,7 @@ skip_white_space(const char *query) query++; break; } - query += PQmblen(query, pset.encoding); + query += PQmblenBounded(query, pset.encoding); } } else if (cnestlevel > 0) @@ -1918,7 +1918,7 @@ command_no_begin(const char *query) */ wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* * Transaction control commands. These should include every keyword that @@ -1949,7 +1949,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0) return true; @@ -1983,7 +1983,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) return true; @@ -1999,7 +1999,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); } if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0) @@ -2010,7 +2010,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) return true; @@ -2027,7 +2027,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* ALTER SYSTEM isn't allowed in xacts */ if (wordlen == 6 && pg_strncasecmp(query, "system", 6) == 0) @@ -2050,7 +2050,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) return true; @@ -2065,7 +2065,7 @@ command_no_begin(const char *query) query = skip_white_space(query); wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); /* * REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in @@ -2084,7 +2084,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) return true; @@ -2104,7 +2104,7 @@ command_no_begin(const char *query) wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 3 && pg_strncasecmp(query, "all", 3) == 0) return true; @@ -2140,7 +2140,7 @@ is_select_command(const char *query) */ wordlen = 0; while (isalpha((unsigned char) query[wordlen])) - wordlen += PQmblen(&query[wordlen], pset.encoding); + wordlen += PQmblenBounded(&query[wordlen], pset.encoding); if (wordlen == 6 && pg_strncasecmp(query, "select", 6) == 0) return true; diff --git a/src/bin/psql/psqlscanslash.l b/src/bin/psql/psqlscanslash.l index 4bb18f132f4f4..51aa33e1611ef 100644 --- a/src/bin/psql/psqlscanslash.l +++ b/src/bin/psql/psqlscanslash.l @@ -753,7 +753,7 @@ dequote_downcase_identifier(char *str, bool downcase, int encoding) { if (downcase && !inquotes) *cp = pg_tolower((unsigned char) *cp); - cp += PQmblen(cp, encoding); + cp += PQmblenBounded(cp, encoding); } } } diff --git a/src/bin/psql/stringutils.c b/src/bin/psql/stringutils.c index 0acc53801cb4a..3a141cdc2b393 100644 --- a/src/bin/psql/stringutils.c +++ b/src/bin/psql/stringutils.c @@ -143,7 +143,7 @@ strtokx(const char *s, /* okay, we have a quoted token, now scan for the closer */ char thisquote = *p++; - for (; *p; p += PQmblen(p, encoding)) + for (; *p; p += PQmblenBounded(p, encoding)) { if (*p == escape && p[1] != '\0') p++; /* process escaped anything */ @@ -262,7 +262,7 @@ strip_quotes(char *source, char quote, char escape, int encoding) else if (c == escape && src[1] != '\0') src++; /* process escaped character */ - i = PQmblen(src, encoding); + i = PQmblenBounded(src, encoding); while (i--) *dst++ = *src++; } @@ -324,7 +324,7 @@ quote_if_needed(const char *source, const char *entails_quote, else if (strchr(entails_quote, c)) need_quotes = true; - i = PQmblen(src, encoding); + i = PQmblenBounded(src, encoding); while (i--) *dst++ = *src++; } diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 109b22acb6ba5..32c1bdfdca743 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -4397,7 +4397,7 @@ _complete_from_query(const char *simple_query, while (*pstr) { char_length++; - pstr += PQmblen(pstr, pset.encoding); + pstr += PQmblenBounded(pstr, pset.encoding); } /* Free any prior result */ diff --git a/src/bin/scripts/common.c b/src/bin/scripts/common.c index c86c19eae28b3..79cdc6cf33076 100644 --- a/src/bin/scripts/common.c +++ b/src/bin/scripts/common.c @@ -52,7 +52,7 @@ splitTableColumnsSpec(const char *spec, int encoding, cp++; } else - cp += PQmblen(cp, encoding); + cp += PQmblenBounded(cp, encoding); } *table = pnstrdup(spec, cp - spec); *columns = cp; diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 1bf38d7b4295e..d376ab152d48d 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -740,7 +740,7 @@ json_lex_string(JsonLexContext *lex) ch = (ch * 16) + (*s - 'A') + 10; else { - lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); + lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); return JSON_UNICODE_ESCAPE_FORMAT; } } @@ -846,7 +846,7 @@ json_lex_string(JsonLexContext *lex) default: /* Not a valid string escape, so signal error. */ lex->token_start = s; - lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); + lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); return JSON_ESCAPING_INVALID; } } @@ -860,7 +860,7 @@ json_lex_string(JsonLexContext *lex) * shown it's not a performance win. */ lex->token_start = s; - lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); + lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s); return JSON_ESCAPING_INVALID; } diff --git a/src/common/wchar.c b/src/common/wchar.c index 6e7d731e020fe..0636b8765ba35 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -1911,6 +1911,11 @@ const pg_wchar_tbl pg_wchar_table[] = { /* * Returns the byte length of a multibyte character. + * + * Caution: when dealing with text that is not certainly valid in the + * specified encoding, the result may exceed the actual remaining + * string length. Callers that are not prepared to deal with that + * should use pg_encoding_mblen_bounded() instead. */ int pg_encoding_mblen(int encoding, const char *mbstr) @@ -1920,6 +1925,16 @@ pg_encoding_mblen(int encoding, const char *mbstr) pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr)); } +/* + * Returns the byte length of a multibyte character; but not more than + * the distance to end of string. + */ +int +pg_encoding_mblen_bounded(int encoding, const char *mbstr) +{ + return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr)); +} + /* * Returns the display length of a multibyte character. */ diff --git a/src/fe_utils/print.c b/src/fe_utils/print.c index 273b1bfe4a49f..d48fcc4a0328d 100644 --- a/src/fe_utils/print.c +++ b/src/fe_utils/print.c @@ -3636,6 +3636,9 @@ strlen_max_width(unsigned char *str, int *target_width, int encoding) curr_width += char_width; str += PQmblen((char *) str, encoding); + + if (str > end) /* Don't overrun invalid string */ + str = end; } *target_width = curr_width; diff --git a/src/fe_utils/string_utils.c b/src/fe_utils/string_utils.c index 5b206c7481d79..3efee4e7eed7f 100644 --- a/src/fe_utils/string_utils.c +++ b/src/fe_utils/string_utils.c @@ -1072,12 +1072,9 @@ patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, appendPQExpBufferChar(curbuf, '\\'); else if (ch == '[' && cp[1] == ']') appendPQExpBufferChar(curbuf, '\\'); - i = PQmblen(cp, encoding); - while (i-- && *cp) - { - appendPQExpBufferChar(curbuf, *cp); - cp++; - } + i = PQmblenBounded(cp, encoding); + while (i--) + appendPQExpBufferChar(curbuf, *cp++); } } appendPQExpBufferStr(curbuf, ")$"); diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index 0f31e683189d7..d93ccac263338 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -574,6 +574,7 @@ extern int pg_valid_server_encoding_id(int encoding); * earlier in this file are also available from libpgcommon. */ extern int pg_encoding_mblen(int encoding, const char *mbstr); +extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr); extern int pg_encoding_dsplen(int encoding, const char *mbstr); extern int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len); extern int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len); diff --git a/src/interfaces/libpq/exports.txt b/src/interfaces/libpq/exports.txt index a00701f2c5fe6..9ef99f6de127b 100644 --- a/src/interfaces/libpq/exports.txt +++ b/src/interfaces/libpq/exports.txt @@ -184,3 +184,4 @@ PQexitPipelineMode 181 PQpipelineSync 182 PQpipelineStatus 183 PQtraceSetFlags 184 +PQmblenBounded 185 diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index b347d7f847937..9a2a97029340f 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -1180,8 +1180,13 @@ pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time) */ /* - * returns the byte length of the character beginning at s, using the + * Returns the byte length of the character beginning at s, using the * specified encoding. + * + * Caution: when dealing with text that is not certainly valid in the + * specified encoding, the result may exceed the actual remaining + * string length. Callers that are not prepared to deal with that + * should use PQmblenBounded() instead. */ int PQmblen(const char *s, int encoding) @@ -1190,7 +1195,17 @@ PQmblen(const char *s, int encoding) } /* - * returns the display length of the character beginning at s, using the + * Returns the byte length of the character beginning at s, using the + * specified encoding; but not more than the distance to end of string. + */ +int +PQmblenBounded(const char *s, int encoding) +{ + return strnlen(s, pg_encoding_mblen(encoding, s)); +} + +/* + * Returns the display length of the character beginning at s, using the * specified encoding. */ int diff --git a/src/interfaces/libpq/fe-print.c b/src/interfaces/libpq/fe-print.c index 94219b1825bcb..fc7d84844e104 100644 --- a/src/interfaces/libpq/fe-print.c +++ b/src/interfaces/libpq/fe-print.c @@ -365,7 +365,7 @@ do_field(const PQprintOpt *po, const PGresult *res, /* Detect whether field contains non-numeric data */ char ch = '0'; - for (p = pval; *p; p += PQmblen(p, res->client_encoding)) + for (p = pval; *p; p += PQmblenBounded(p, res->client_encoding)) { ch = *p; if (!((ch >= '0' && ch <= '9') || diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index b45fb7e70593a..2e8330534873a 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1296,7 +1296,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) if (w <= 0) w = 1; scroffset += w; - qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]); + qoffset += PQmblenBounded(&wquery[qoffset], encoding); } else { @@ -1364,7 +1364,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) * width. */ scroffset = 0; - for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i])) + for (; i < msg->len; i += PQmblenBounded(&msg->data[i], encoding)) { int w = pg_encoding_dsplen(encoding, &msg->data[i]); diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index 227adde5a5e42..845b4c04c9c2f 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -625,6 +625,9 @@ extern int PQlibVersion(void); /* Determine length of multibyte encoded char at *s */ extern int PQmblen(const char *s, int encoding); +/* Same, but not more than the distance to the end of string s */ +extern int PQmblenBounded(const char *s, int encoding); + /* Determine display length of multibyte encoded char at *s */ extern int PQdsplen(const char *s, int encoding);