From dc8dcb6c4b1ab72963efc1817106d61693efd0fa Mon Sep 17 00:00:00 2001 From: animalize Date: Wed, 5 Apr 2017 11:50:44 +0800 Subject: [PATCH 1/2] fix range checking in GB18030 decoder --- Lib/test/test_codecencodings_cn.py | 6 ++++++ Misc/NEWS | 1 + Modules/cjkcodecs/_codecs_cn.c | 6 +++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index 3bdf7d0e14b3b7..39e249ed32730f 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -48,6 +48,12 @@ class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), + # issue29990 + (b"\xFF\x30\x81\x30", "strict", None), + (b"\x81\x30\xFF\x30", "strict", None), + (b"abc\x81\x39\xFF\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), + (b"abc\xAB\x36\xFF\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), + (b"abc\xBF\x38\xFF\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), ) has_iso10646 = True diff --git a/Misc/NEWS b/Misc/NEWS index 95092afa0b38f2..ec6b38fd411522 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,7 @@ What's New in Python 3.7.0 alpha 1? Core and Builtins ----------------- +- bpo-29990: Fix range checking in GB18030 decoder. - bpo-29949: Fix memory usage regression of set and frozenset object. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 1a070f2f393219..6883944dc44a87 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -279,8 +279,12 @@ DECODER(gb18030) REQUIRE_INBUF(4); c3 = INBYTE3; c4 = INBYTE4; - if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) + /* if c == 0xFF, lseq will > 0x10FFFF */ + if (c < 0x81 || + c3 < 0x81 || c3 > 0xFE || + c4 < 0x30 || c4 > 0x39) return 1; + c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; From 56974ad114c0a04d980143ed22cc502ed4a26c25 Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Mon, 8 May 2017 11:24:03 +0800 Subject: [PATCH 2/2] small modification --- Lib/test/test_codecencodings_cn.py | 10 +++++----- Misc/NEWS | 3 ++- Modules/cjkcodecs/_codecs_cn.c | 4 +--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index 39e249ed32730f..c8a410c2e0398c 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -49,11 +49,11 @@ class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), # issue29990 - (b"\xFF\x30\x81\x30", "strict", None), - (b"\x81\x30\xFF\x30", "strict", None), - (b"abc\x81\x39\xFF\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), - (b"abc\xAB\x36\xFF\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), - (b"abc\xBF\x38\xFF\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), + (b"\xff\x30\x81\x30", "strict", None), + (b"\x81\x30\xff\x30", "strict", None), + (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), + (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), + (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), ) has_iso10646 = True diff --git a/Misc/NEWS b/Misc/NEWS index c9354f388561b1..f5b8279c1ee9af 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,7 +9,6 @@ What's New in Python 3.7.0 alpha 1? Core and Builtins ----------------- -- bpo-29990: Fix range checking in GB18030 decoder. - bpo-12414: sys.getsizeof() on a code object now returns the sizes which includes the code struct and sizes of objects which it references. @@ -318,6 +317,8 @@ Extension Modules Library ------- +- bpo-29990: Fix range checking in GB18030 decoder. Original patch by Ma Lin. + - bpo-30243: Removed the __init__ methods of _json's scanner and encoder. Misusing them could cause memory leaks or crashes. Now scanner and encoder objects are completely initialized in the __new__ methods. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 6883944dc44a87..bda175c55d1323 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -279,12 +279,10 @@ DECODER(gb18030) REQUIRE_INBUF(4); c3 = INBYTE3; c4 = INBYTE4; - /* if c == 0xFF, lseq will > 0x10FFFF */ - if (c < 0x81 || + if (c < 0x81 || c > 0xFE || c3 < 0x81 || c3 > 0xFE || c4 < 0x30 || c4 > 0x39) return 1; - c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30;