From 87d3daaed4f559db8ac080ceef518c2d1dd8c932 Mon Sep 17 00:00:00 2001 From: Jamie Davis Date: Mon, 5 Mar 2018 10:02:58 -0500 Subject: [PATCH 1/3] bpo-32997: Fix REDOS in fpformat The regex to decode a number in fpformat is susceptible to catastrophic backtracking. This is a potential DOS vector if a server is using fpformat on untrusted number strings. Replace it with an equivalent non-vulnerable regex. The match behavior of the new regex is slightly different. This difference is addressed with a follow-up check. --- Lib/fpformat.py | 4 +++- Lib/test/test_fpformat.py | 10 ++++++++++ .../Security/2018-03-05-10-14-42.bpo-32997.hp2s8n.rst | 4 ++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Security/2018-03-05-10-14-42.bpo-32997.hp2s8n.rst diff --git a/Lib/fpformat.py b/Lib/fpformat.py index 71cbb25f3c8b9e..24e95a72b0f7a5 100644 --- a/Lib/fpformat.py +++ b/Lib/fpformat.py @@ -19,7 +19,7 @@ __all__ = ["fix","sci","NotANumber"] # Compiled regular expression to "decode" a number -decoder = re.compile(r'^([-+]?)0*(\d*)((?:\.\d*)?)(([eE][-+]?\d+)?)$') +decoder = re.compile(r'^([-+]?)0*([1-9]\d*)?((?:\.\d*)?)(([eE][-+]?\d+)?)$') # \0 the whole thing # \1 leading sign or empty # \2 digits left of decimal point @@ -41,6 +41,8 @@ def extract(s): res = decoder.match(s) if res is None: raise NotANumber, s sign, intpart, fraction, exppart = res.group(1,2,3,4) + if intpart is None: # ([1-9]\d*)? may match nothing + intpart = '' if sign == '+': sign = '' if fraction: fraction = fraction[1:] if exppart: expo = int(exppart[1:]) diff --git a/Lib/test/test_fpformat.py b/Lib/test/test_fpformat.py index e6de3b0c11beb0..428623ebb35fa1 100644 --- a/Lib/test/test_fpformat.py +++ b/Lib/test/test_fpformat.py @@ -67,6 +67,16 @@ def test_failing_values(self): else: self.fail("No exception on non-numeric sci") + def test_REDOS(self): + # This attack string will hang on the old decoder pattern. + attack = '+0' + ('0' * 1000000) + '++' + digs = 5 # irrelevant + + # fix returns input if it does not decode + self.assertEqual(fpformat.fix(attack, digs), attack) + # sci raises NotANumber + with self.assertRaises(NotANumber): + fpformat.sci(attack, digs) def test_main(): run_unittest(FpformatTest) diff --git a/Misc/NEWS.d/next/Security/2018-03-05-10-14-42.bpo-32997.hp2s8n.rst b/Misc/NEWS.d/next/Security/2018-03-05-10-14-42.bpo-32997.hp2s8n.rst new file mode 100644 index 00000000000000..3c78ba61ae3438 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2018-03-05-10-14-42.bpo-32997.hp2s8n.rst @@ -0,0 +1,4 @@ +A regex in fpformat was vulnerable to catastrophic backtracking. This regex +was a potential DOS vector (REDOS). Based on typical uses of fpformat the +risk seems low. The regex has been refactored and is now safe. Patch by +Jamie Davis. From fc083d02a1fb86d365d589a47a0ea3d9501e70d6 Mon Sep 17 00:00:00 2001 From: Jamie Davis Date: Mon, 5 Mar 2018 15:06:35 -0500 Subject: [PATCH 2/3] Address review comments Use a simplified regex. Capture the integer part of the number in one group, then strip off leading 0's. --- Lib/fpformat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/fpformat.py b/Lib/fpformat.py index 24e95a72b0f7a5..bbfef36c198591 100644 --- a/Lib/fpformat.py +++ b/Lib/fpformat.py @@ -19,7 +19,7 @@ __all__ = ["fix","sci","NotANumber"] # Compiled regular expression to "decode" a number -decoder = re.compile(r'^([-+]?)0*([1-9]\d*)?((?:\.\d*)?)(([eE][-+]?\d+)?)$') +decoder = re.compile(r'^([-+]?)(\d*)?((?:\.\d*)?)(([eE][-+]?\d+)?)$') # \0 the whole thing # \1 leading sign or empty # \2 digits left of decimal point @@ -41,8 +41,7 @@ def extract(s): res = decoder.match(s) if res is None: raise NotANumber, s sign, intpart, fraction, exppart = res.group(1,2,3,4) - if intpart is None: # ([1-9]\d*)? may match nothing - intpart = '' + intpart = intpart.lstrip('0'); if sign == '+': sign = '' if fraction: fraction = fraction[1:] if exppart: expo = int(exppart[1:]) From c8ab0bd24786afc71d680b144147848668ede7ca Mon Sep 17 00:00:00 2001 From: Jamie Davis Date: Mon, 5 Mar 2018 15:15:26 -0500 Subject: [PATCH 3/3] Fix typo --- Lib/fpformat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/fpformat.py b/Lib/fpformat.py index bbfef36c198591..0537a27b88207c 100644 --- a/Lib/fpformat.py +++ b/Lib/fpformat.py @@ -19,7 +19,7 @@ __all__ = ["fix","sci","NotANumber"] # Compiled regular expression to "decode" a number -decoder = re.compile(r'^([-+]?)(\d*)?((?:\.\d*)?)(([eE][-+]?\d+)?)$') +decoder = re.compile(r'^([-+]?)(\d*)((?:\.\d*)?)(([eE][-+]?\d+)?)$') # \0 the whole thing # \1 leading sign or empty # \2 digits left of decimal point