Skip to content

Commit 33a2e5a

Browse files
author
Sreeharsha Ramanavarapu
committed
Bug #20238729: ILLEGALLY CRAFTED UTF8 SELECT PROVIDES NO
WARNINGS Backporting to 5.1 and 5.5
1 parent 8f87d6c commit 33a2e5a

File tree

9 files changed

+164
-46
lines changed

9 files changed

+164
-46
lines changed
Binary file not shown.

mysql-test/suite/sys_vars/r/character_set_connection_func.result

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@ SET @@session.character_set_connection = latin1;
2323
SELECT 'ЁЂЃЄ' AS utf_text;
2424
utf_text
2525
????
26+
Warnings:
27+
Warning 1105 Can't convert the character string from utf8 to latin1: '\xD0\x81\xD0\x82\xD0\x83...'
2628
SET @@session.character_set_connection = utf8;
2729
SELECT 'ЁЂЃЄ' AS utf_text;
2830
utf_text
2931
ЁЂЃЄ
3032
'---now inserting utf8 string with different character_set_connection--'
3133
SET @@session.character_set_connection = ascii;
3234
INSERT INTO t1 VALUES('ЁЂЃЄ');
35+
Warnings:
36+
Warning 1105 Can't convert the character string from utf8 to ascii: '\xD0\x81\xD0\x82\xD0\x83...'
3337
SELECT * FROM t1;
3438
b
3539
????
@@ -39,6 +43,8 @@ SET @@session.character_set_connection = ascii;
3943
SET @@session.character_set_client = latin1;
4044
SET @@session.character_set_results = latin1;
4145
INSERT INTO t1 VALUES('ЁЂЃЄ');
46+
Warnings:
47+
Warning 1105 Can't convert the character string from latin1 to ascii: '\xD0\x81\xD0\x82\xD0\x83...'
4248
SELECT * FROM t1;
4349
b
4450
????????

sql/item.cc

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -4807,39 +4807,60 @@ bool Item::is_datetime()
48074807
}
48084808

48094809

4810-
String *Item::check_well_formed_result(String *str, bool send_error)
4810+
/**
4811+
Verifies that the input string is well-formed according to its character set.
4812+
@param send_error If true, call my_error if string is not well-formed.
4813+
@param truncate If true, set to null/truncate if not well-formed.
4814+
4815+
@return
4816+
If well-formed: input string.
4817+
If not well-formed:
4818+
if truncate is true and strict mode: NULL pointer and we set this
4819+
Item's value to NULL.
4820+
if truncate is true and not strict mode: input string truncated up to
4821+
last good character.
4822+
if truncate is false: input string is returned.
4823+
*/
4824+
String *Item::check_well_formed_result(String *str,
4825+
bool send_error,
4826+
bool truncate)
48114827
{
48124828
/* Check whether we got a well-formed string */
48134829
CHARSET_INFO *cs= str->charset();
4814-
int well_formed_error;
4815-
uint wlen= cs->cset->well_formed_len(cs,
4816-
str->ptr(), str->ptr() + str->length(),
4817-
str->length(), &well_formed_error);
4818-
if (wlen < str->length())
4830+
4831+
size_t valid_length;
4832+
bool length_error;
4833+
4834+
if (validate_string(cs, str->ptr(), str->length(),
4835+
&valid_length, &length_error))
48194836
{
4837+
const char *str_end= str->ptr() + str->length();
4838+
const char *print_byte= str->ptr() + valid_length;
48204839
THD *thd= current_thd;
48214840
char hexbuf[7];
4822-
enum MYSQL_ERROR::enum_warning_level level;
4823-
uint diff= str->length() - wlen;
4841+
enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN;
4842+
uint diff= str_end - print_byte;
48244843
set_if_smaller(diff, 3);
4825-
octet2hex(hexbuf, str->ptr() + wlen, diff);
4826-
if (send_error)
4844+
octet2hex(hexbuf, print_byte, diff);
4845+
if (send_error && length_error)
48274846
{
48284847
my_error(ER_INVALID_CHARACTER_STRING, MYF(0),
48294848
cs->csname, hexbuf);
48304849
return 0;
48314850
}
4832-
if ((thd->variables.sql_mode &
4833-
(MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES)))
4851+
if (truncate && length_error)
48344852
{
4835-
level= MYSQL_ERROR::WARN_LEVEL_ERROR;
4836-
null_value= 1;
4837-
str= 0;
4838-
}
4839-
else
4840-
{
4841-
level= MYSQL_ERROR::WARN_LEVEL_WARN;
4842-
str->length(wlen);
4853+
if ((thd->variables.sql_mode &
4854+
(MODE_STRICT_TRANS_TABLES | MODE_STRICT_ALL_TABLES)))
4855+
{
4856+
level= MYSQL_ERROR::WARN_LEVEL_ERROR;
4857+
null_value= 1;
4858+
str= 0;
4859+
}
4860+
else
4861+
{
4862+
str->length(valid_length);
4863+
}
48434864
}
48444865
push_warning_printf(thd, level, ER_INVALID_CHARACTER_STRING,
48454866
ER(ER_INVALID_CHARACTER_STRING), cs->csname, hexbuf);

sql/item.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,7 +1052,9 @@ class Item {
10521052
bool is_datetime();
10531053
virtual Field::geometry_type get_geometry_type() const
10541054
{ return Field::GEOM_GEOMETRY; };
1055-
String *check_well_formed_result(String *str, bool send_error= 0);
1055+
String *check_well_formed_result(String *str,
1056+
bool send_error,
1057+
bool truncate);
10561058
bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs);
10571059

10581060
/**
@@ -1929,6 +1931,11 @@ class Item_string :public Item_basic_constant
19291931
decimals=NOT_FIXED_DEC;
19301932
// it is constant => can be used without fix_fields (and frequently used)
19311933
fixed= 1;
1934+
/*
1935+
Check if the string has any character that can't be
1936+
interpreted using the relevant charset.
1937+
*/
1938+
check_well_formed_result(&str_value, false, false);
19321939
}
19331940
/* Just create an item and do not fill string representation */
19341941
Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)

sql/item_strfunc.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -2373,7 +2373,9 @@ String *Item_func_char::val_str(String *str)
23732373
}
23742374
}
23752375
str->realloc(str->length()); // Add end 0 (for Purify)
2376-
return check_well_formed_result(str);
2376+
return check_well_formed_result(str,
2377+
false, // send warning
2378+
true); // truncate
23772379
}
23782380

23792381

@@ -2773,7 +2775,9 @@ String *Item_func_conv_charset::val_str(String *str)
27732775
}
27742776
null_value= tmp_value.copy(arg->ptr(), arg->length(), arg->charset(),
27752777
conv_charset, &dummy_errors);
2776-
return null_value ? 0 : check_well_formed_result(&tmp_value);
2778+
return null_value ? 0 : check_well_formed_result(&tmp_value,
2779+
false, // send warning
2780+
true); // truncate
27772781
}
27782782

27792783
void Item_func_conv_charset::fix_length_and_dec()

sql/sql_class.cc

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -1313,21 +1313,17 @@ LEX_STRING *THD::make_lex_string(LEX_STRING *lex_str,
13131313
/*
13141314
Convert a string to another character set
13151315
1316-
SYNOPSIS
1317-
convert_string()
1318-
to Store new allocated string here
1319-
to_cs New character set for allocated string
1320-
from String to convert
1321-
from_length Length of string to convert
1322-
from_cs Original character set
1316+
@param to Store new allocated string here
1317+
@param to_cs New character set for allocated string
1318+
@param from String to convert
1319+
@param from_length Length of string to convert
1320+
@param from_cs Original character set
13231321
1324-
NOTES
1325-
to will be 0-terminated to make it easy to pass to system funcs
1322+
@note to will be 0-terminated to make it easy to pass to system funcs
13261323
1327-
RETURN
1328-
0 ok
1329-
1 End of memory.
1330-
In this case to->str will point to 0 and to->length will be 0.
1324+
@retval false ok
1325+
@retval true End of memory.
1326+
In this case to->str will point to 0 and to->length will be 0.
13311327
*/
13321328

13331329
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
@@ -1336,15 +1332,25 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
13361332
{
13371333
DBUG_ENTER("convert_string");
13381334
size_t new_length= to_cs->mbmaxlen * from_length;
1339-
uint dummy_errors;
1335+
uint errors= 0;
13401336
if (!(to->str= (char*) alloc(new_length+1)))
13411337
{
13421338
to->length= 0; // Safety fix
13431339
DBUG_RETURN(1); // EOM
13441340
}
13451341
to->length= copy_and_convert((char*) to->str, new_length, to_cs,
1346-
from, from_length, from_cs, &dummy_errors);
1342+
from, from_length, from_cs, &errors);
13471343
to->str[to->length]=0; // Safety
1344+
if (errors != 0)
1345+
{
1346+
char printable_buff[32];
1347+
convert_to_printable(printable_buff, sizeof(printable_buff),
1348+
from, from_length, from_cs, 6);
1349+
push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
1350+
"Can't convert the character string from %s to %s: '%.64s'",
1351+
from_cs->csname, to_cs->csname, printable_buff);
1352+
}
1353+
13481354
DBUG_RETURN(0);
13491355
}
13501356

sql/sql_string.cc

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -1231,3 +1231,69 @@ uint convert_to_printable(char *to, size_t to_len,
12311231
*t= '\0';
12321232
return t - to;
12331233
}
1234+
1235+
/**
1236+
Check if an input byte sequence is a valid character string of a given charset
1237+
1238+
@param cs The input character set.
1239+
@param str The input byte sequence to validate.
1240+
@param length A byte length of the str.
1241+
@param [out] valid_length A byte length of a valid prefix of the str.
1242+
@param [out] length_error True in the case of a character length error:
1243+
some byte[s] in the input is not a valid
1244+
prefix for a character, i.e. the byte length
1245+
of that invalid character is undefined.
1246+
1247+
@retval true if the whole input byte sequence is a valid character string.
1248+
The length_error output parameter is undefined.
1249+
1250+
@return
1251+
if the whole input byte sequence is a valid character string
1252+
then
1253+
return false
1254+
else
1255+
if the length of some character in the input is undefined (MY_CS_ILSEQ)
1256+
or the last character is truncated (MY_CS_TOOSMALL)
1257+
then
1258+
*length_error= true; // fatal error!
1259+
else
1260+
*length_error= false; // non-fatal error: there is no wide character
1261+
// encoding for some input character
1262+
return true
1263+
*/
1264+
bool validate_string(CHARSET_INFO *cs, const char *str, uint32 length,
1265+
size_t *valid_length, bool *length_error)
1266+
{
1267+
if (cs->mbmaxlen > 1)
1268+
{
1269+
int well_formed_error;
1270+
*valid_length= cs->cset->well_formed_len(cs, str, str + length,
1271+
length, &well_formed_error);
1272+
*length_error= well_formed_error;
1273+
return well_formed_error;
1274+
}
1275+
1276+
/*
1277+
well_formed_len() is not functional on single-byte character sets,
1278+
so use mb_wc() instead:
1279+
*/
1280+
*length_error= false;
1281+
1282+
const uchar *from= reinterpret_cast<const uchar *>(str);
1283+
const uchar *from_end= from + length;
1284+
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
1285+
1286+
while (from < from_end)
1287+
{
1288+
my_wc_t wc;
1289+
int cnvres= (*mb_wc)(cs, &wc, (uchar*) from, from_end);
1290+
if (cnvres <= 0)
1291+
{
1292+
*valid_length= from - reinterpret_cast<const uchar *>(str);
1293+
return true;
1294+
}
1295+
from+= cnvres;
1296+
}
1297+
*valid_length= length;
1298+
return false;
1299+
}

sql/sql_string.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -413,3 +413,7 @@ static inline bool check_if_only_end_space(CHARSET_INFO *cs, char *str,
413413
{
414414
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
415415
}
416+
417+
bool
418+
validate_string(CHARSET_INFO *cs, const char *str, uint32 length,
419+
size_t *valid_length, bool *length_error);

sql/sql_yacc.yy

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
2+
Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
33
44
This program is free software; you can redistribute it and/or modify
55
it under the terms of the GNU General Public License as published by
@@ -11070,7 +11070,9 @@ literal:
1107011070
str ? str->length() : 0,
1107111071
$1);
1107211072
if (!item_str ||
11073-
!item_str->check_well_formed_result(&item_str->str_value, TRUE))
11073+
!item_str->check_well_formed_result(&item_str->str_value,
11074+
true, //send error
11075+
true)) //truncate
1107411076
{
1107511077
MYSQL_YYABORT;
1107611078
}
@@ -11099,7 +11101,9 @@ literal:
1109911101
str ? str->length() : 0,
1110011102
$1);
1110111103
if (!item_str ||
11102-
!item_str->check_well_formed_result(&item_str->str_value, TRUE))
11104+
!item_str->check_well_formed_result(&item_str->str_value,
11105+
true, //send error
11106+
true)) //truncate
1110311107
{
1110411108
MYSQL_YYABORT;
1110511109
}

0 commit comments

Comments
 (0)