Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Fix encode(...bytea..., 'escape') so that it converts all high-bit-se…

…t byte

values into \nnn octal escape sequences.  When the database encoding is
multibyte this is *necessary* to avoid generating invalidly encoded text.
Even in a single-byte encoding, the old behavior seems very hazardous ---
consider for example what happens if the text is transferred to another
database with a different encoding.  Decoding would then yield some other
bytea value than what was encoded, which is surely undesirable.  Per gripe
from Hernan Gonzalez.

Backpatch to 8.3, but not further.  This is a bit of a judgment call, but I
make it on these grounds: pre-8.3 we don't really have much encoding safety
anyway because of the convert() function family, and we would also have much
higher risk of breaking existing apps that may not be expecting this behavior.
8.3 is still new enough that we can probably get away with making this change
in the function's behavior.
  • Loading branch information...
commit fd15dba543247eb1ce879d22632b9fdb4c230831 1 parent bc93919
Tom Lane authored

Showing 1 changed file with 21 additions and 16 deletions. Show diff stats Hide diff stats

  1. +21 16 src/backend/utils/adt/encode.c
37 src/backend/utils/adt/encode.c
@@ -7,7 +7,7 @@
7 7 *
8 8 *
9 9 * IDENTIFICATION
10   - * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.20 2008/01/01 19:45:52 momjian Exp $
  10 + * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.21 2008/02/26 02:54:08 tgl Exp $
11 11 *
12 12 *-------------------------------------------------------------------------
13 13 */
@@ -26,7 +26,7 @@ struct pg_encoding
26 26 unsigned (*decode) (const char *data, unsigned dlen, char *res);
27 27 };
28 28
29   -static struct pg_encoding *pg_find_encoding(const char *name);
  29 +static const struct pg_encoding *pg_find_encoding(const char *name);
30 30
31 31 /*
32 32 * SQL functions.
@@ -42,7 +42,7 @@ binary_encode(PG_FUNCTION_ARGS)
42 42 int datalen,
43 43 resultlen,
44 44 res;
45   - struct pg_encoding *enc;
  45 + const struct pg_encoding *enc;
46 46
47 47 datalen = VARSIZE(data) - VARHDRSZ;
48 48
@@ -78,7 +78,7 @@ binary_decode(PG_FUNCTION_ARGS)
78 78 int datalen,
79 79 resultlen,
80 80 res;
81   - struct pg_encoding *enc;
  81 + const struct pg_encoding *enc;
82 82
83 83 datalen = VARSIZE(data) - VARHDRSZ;
84 84
@@ -348,10 +348,13 @@ b64_dec_len(const char *src, unsigned srclen)
348 348 * Minimally escape bytea to text.
349 349 * De-escape text to bytea.
350 350 *
351   - * Only two characters are escaped:
352   - * \0 (null) and \\ (backslash)
  351 + * We must escape zero bytes and high-bit-set bytes to avoid generating
  352 + * text that might be invalid in the current encoding, or that might
  353 + * change to something else if passed through an encoding conversion
  354 + * (leading to failing to de-escape to the original bytea value).
  355 + * Also of course backslash itself has to be escaped.
353 356 *
354   - * De-escapes \\ and any \### octal
  357 + * De-escaping processes \\ and any \### octal
355 358 */
356 359
357 360 #define VAL(CH) ((CH) - '0')
@@ -366,16 +369,18 @@ esc_encode(const char *src, unsigned srclen, char *dst)
366 369
367 370 while (src < end)
368 371 {
369   - if (*src == '\0')
  372 + unsigned char c = (unsigned char) *src;
  373 +
  374 + if (c == '\0' || IS_HIGHBIT_SET(c))
370 375 {
371 376 rp[0] = '\\';
372   - rp[1] = '0';
373   - rp[2] = '0';
374   - rp[3] = '0';
  377 + rp[1] = DIG(c >> 6);
  378 + rp[2] = DIG((c >> 3) & 7);
  379 + rp[3] = DIG(c & 7);
375 380 rp += 4;
376 381 len += 4;
377 382 }
378   - else if (*src == '\\')
  383 + else if (c == '\\')
379 384 {
380 385 rp[0] = '\\';
381 386 rp[1] = '\\';
@@ -384,7 +389,7 @@ esc_encode(const char *src, unsigned srclen, char *dst)
384 389 }
385 390 else
386 391 {
387   - *rp++ = *src;
  392 + *rp++ = c;
388 393 len++;
389 394 }
390 395
@@ -450,7 +455,7 @@ esc_enc_len(const char *src, unsigned srclen)
450 455
451 456 while (src < end)
452 457 {
453   - if (*src == '\0')
  458 + if (*src == '\0' || IS_HIGHBIT_SET(*src))
454 459 len += 4;
455 460 else if (*src == '\\')
456 461 len += 2;
@@ -510,7 +515,7 @@ esc_dec_len(const char *src, unsigned srclen)
510 515 * Common
511 516 */
512 517
513   -static struct
  518 +static const struct
514 519 {
515 520 const char *name;
516 521 struct pg_encoding enc;
@@ -543,7 +548,7 @@ static struct
543 548 }
544 549 };
545 550
546   -static struct pg_encoding *
  551 +static const struct pg_encoding *
547 552 pg_find_encoding(const char *name)
548 553 {
549 554 int i;

0 comments on commit fd15dba

Please sign in to comment.
Something went wrong with that request. Please try again.