From 4215f0a532a08e8551bcf54d9bc057005ac703a9 Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Mon, 27 Nov 2017 11:39:25 +0500 Subject: [PATCH 1/6] bpo-32147: Improved perfomance of binascii.unhexlify(). --- Lib/test/test_binascii.py | 1 + Modules/binascii.c | 21 +++------------------ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 8fa57cdf1b0be3..00d05d2a0d30ce 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -198,6 +198,7 @@ def test_hex(self): self.assertEqual(s, u) self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1]) self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1] + b'q') + self.assertRaises(binascii.Error, binascii.a2b_hex, bytes([255, 255])) # Confirm that b2a_hex == hexlify and a2b_hex == unhexlify self.assertEqual(binascii.hexlify(self.type2test(s)), t) diff --git a/Modules/binascii.c b/Modules/binascii.c index 1af6b7f98f255f..3b617a3c1d1566 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1130,21 +1130,6 @@ binascii_hexlify_impl(PyObject *module, Py_buffer *data) return _Py_strhex_bytes((const char *)data->buf, data->len); } -static int -to_int(int c) -{ - if (Py_ISDIGIT(c)) - return c - '0'; - else { - if (Py_ISUPPER(c)) - c = Py_TOLOWER(c); - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - } - return -1; -} - - /*[clinic input] binascii.a2b_hex @@ -1187,9 +1172,9 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) retbuf = PyBytes_AS_STRING(retval); for (i=j=0; i < arglen; i += 2) { - int top = to_int(Py_CHARMASK(argbuf[i])); - int bot = to_int(Py_CHARMASK(argbuf[i+1])); - if (top == -1 || bot == -1) { + unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; + unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])]; + if (top > 16 || bot > 16) { PyErr_SetString(Error, "Non-hexadecimal digit found"); goto finally; From 4f5e429efe7509cdf6b061e61d9c6ddca17a845b Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Mon, 27 Nov 2017 21:21:04 +0500 Subject: [PATCH 2/6] Added more tests for binascii.a2b_hex(). --- Lib/test/test_binascii.py | 4 ++++ Modules/binascii.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 00d05d2a0d30ce..0997d9432bf684 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -199,6 +199,10 @@ def test_hex(self): self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1]) self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1] + b'q') self.assertRaises(binascii.Error, binascii.a2b_hex, bytes([255, 255])) + self.assertRaises(binascii.Error, binascii.a2b_hex, b'0G') + self.assertRaises(binascii.Error, binascii.a2b_hex, b'0g') + self.assertRaises(binascii.Error, binascii.a2b_hex, b'G0') + self.assertRaises(binascii.Error, binascii.a2b_hex, b'g0') # Confirm that b2a_hex == hexlify and a2b_hex == unhexlify self.assertEqual(binascii.hexlify(self.type2test(s)), t) diff --git a/Modules/binascii.c b/Modules/binascii.c index 3b617a3c1d1566..3a96567fd2b266 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1174,7 +1174,7 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr) for (i=j=0; i < arglen; i += 2) { unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])]; unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])]; - if (top > 16 || bot > 16) { + if (top >= 16 || bot >= 16) { PyErr_SetString(Error, "Non-hexadecimal digit found"); goto finally; From bf57e77ac4ddae79fa52a028eacbc533a0e4e622 Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Mon, 27 Nov 2017 22:31:28 +0500 Subject: [PATCH 3/6] Improved performance of binascii.a2b_qp(). --- Modules/binascii.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 3a96567fd2b266..2ca3a813aedcf3 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1203,19 +1203,6 @@ binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr) return binascii_a2b_hex_impl(module, hexstr); } -static const int table_hex[128] = { - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, - -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, - -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 -}; - -#define hexval(c) table_hex[(unsigned int)(c)] - #define MAXLINESIZE 76 @@ -1233,7 +1220,7 @@ binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/ { Py_ssize_t in, out; - char ch; + unsigned int top, bot; const unsigned char *ascii_data; unsigned char *odata; Py_ssize_t datalen = 0; @@ -1271,18 +1258,11 @@ binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) in++; } else if ((in + 1 < datalen) && - ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') || - (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') || - (ascii_data[in] >= '0' && ascii_data[in] <= '9')) && - ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') || - (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') || - (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) { - /* hexval */ - ch = hexval(ascii_data[in]) << 4; - in++; - ch |= hexval(ascii_data[in]); - in++; - odata[out++] = ch; + ((top = _PyLong_DigitValue[ascii_data[in]]) < 16) && + ((bot = _PyLong_DigitValue[ascii_data[in + 1]]) < 16)) + { + odata[out++] = (top << 4) + bot; + in += 2; } else { odata[out++] = '='; From 6e3585466bcacebbf505549f6c758338050832f3 Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Tue, 28 Nov 2017 09:10:52 +0500 Subject: [PATCH 4/6] Partially reverted previous commit as it doesn't provide expected perfomance increase. --- Modules/binascii.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Modules/binascii.c b/Modules/binascii.c index 2ca3a813aedcf3..59e99282ae3571 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1220,7 +1220,7 @@ binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/ { Py_ssize_t in, out; - unsigned int top, bot; + char ch; const unsigned char *ascii_data; unsigned char *odata; Py_ssize_t datalen = 0; @@ -1258,11 +1258,18 @@ binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header) in++; } else if ((in + 1 < datalen) && - ((top = _PyLong_DigitValue[ascii_data[in]]) < 16) && - ((bot = _PyLong_DigitValue[ascii_data[in + 1]]) < 16)) - { - odata[out++] = (top << 4) + bot; - in += 2; + ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') || + (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') || + (ascii_data[in] >= '0' && ascii_data[in] <= '9')) && + ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') || + (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') || + (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) { + /* hexval */ + ch = _PyLong_DigitValue[ascii_data[in]] << 4; + in++; + ch |= _PyLong_DigitValue[ascii_data[in]]; + in++; + odata[out++] = ch; } else { odata[out++] = '='; From adc9d23c586cb93aea8ea3e27018283bd0840a91 Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Tue, 28 Nov 2017 10:23:17 +0500 Subject: [PATCH 5/6] Added NEWS entry. --- .../NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst diff --git a/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst b/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst new file mode 100644 index 00000000000000..33ce793cadba90 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst @@ -0,0 +1 @@ +:func:`binascii.unhexlify` is now up to 2 times faster. From d42cb6d0e26a596b9e46ff0ce6c25e19cbbdfedf Mon Sep 17 00:00:00 2001 From: Sergey Fedoseev Date: Sat, 13 Jan 2018 21:36:10 +0500 Subject: [PATCH 6/6] Added my name to NEWS entry and to ACKS. --- Misc/ACKS | 1 + .../NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/Misc/ACKS b/Misc/ACKS index 54d8d62b633f70..8e2a1017ddcb0e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -455,6 +455,7 @@ Michael Farrell Troy J. Farrell Jim Fasarakis-Hilliard Mark Favas +Sergey Fedoseev Boris Feld Thomas Fenzl Niels Ferguson diff --git a/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst b/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst index 33ce793cadba90..e02a97c5e9e6b2 100644 --- a/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst +++ b/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst @@ -1 +1,2 @@ :func:`binascii.unhexlify` is now up to 2 times faster. +Patch by Sergey Fedoseev.