From a02a2e16d4ac69fbaae7cbd1121acbb79b98508b Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Fri, 10 Nov 2017 08:39:47 -0800 Subject: [PATCH 1/3] FIX: tdb: Update float parsing regex to detect floats with no trailing digits. Fixes gh-143. --- pycalphad/io/tdb.py | 4 ++-- pycalphad/tests/test_database.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pycalphad/io/tdb.py b/pycalphad/io/tdb.py index f1007e1c9..c9c923207 100644 --- a/pycalphad/io/tdb.py +++ b/pycalphad/io/tdb.py @@ -179,8 +179,8 @@ def _tdb_grammar(): #pylint: disable=R0914 """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) pos_neg_int_number = Word('+-'+nums).setParseAction(lambda t: [int(t[0])]) # '+3' or '-2' are examples - # matching float w/ regex is ugly but is recommended by pyparsing - float_number = Regex(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?') \ + # https://stackoverflow.com/questions/2293780/how-to-detect-a-floating-point-number-using-a-regular-expression#comment28701224_2293793 + float_number = Regex(r'-?(0(\.\d*)?|([1-9]\d*\.?\d*)|(\.\d+))([Ee][+-]?\d+)?') \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_:', min=1) diff --git a/pycalphad/tests/test_database.py b/pycalphad/tests/test_database.py index 3a861e436..228cb684c 100644 --- a/pycalphad/tests/test_database.py +++ b/pycalphad/tests/test_database.py @@ -406,3 +406,10 @@ def test_tdb_missing_terminator_element(): FUNCTION EMBCCTI 298.15 -39.72; 6000 N !""" Database(tdb_str) + +def test_database_parsing_of_floats_with_no_values_after_decimal(): + """Floats with no values after the decimal should be properly parsed (gh-143)""" + tdb_string = """$ The element has no values after the decimal in 5004. causing a failure + ELEMENT CU FCC_A1 63.546 5004. 33.15 !""" + dbf = Database.from_string(tdb_string, fmt='tdb') + assert "CU" in dbf.elements From 851f5c1d0d5fb012ad89d0914261739f8fdb3e5b Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sun, 12 Nov 2017 10:00:04 -0800 Subject: [PATCH 2/3] FIX/WIP: tdb: Float parsing regex with lookaround --- pycalphad/io/tdb.py | 5 +++-- pycalphad/tests/test_database.py | 10 +++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/pycalphad/io/tdb.py b/pycalphad/io/tdb.py index c9c923207..09e445f21 100644 --- a/pycalphad/io/tdb.py +++ b/pycalphad/io/tdb.py @@ -179,8 +179,9 @@ def _tdb_grammar(): #pylint: disable=R0914 """ int_number = Word(nums).setParseAction(lambda t: [int(t[0])]) pos_neg_int_number = Word('+-'+nums).setParseAction(lambda t: [int(t[0])]) # '+3' or '-2' are examples - # https://stackoverflow.com/questions/2293780/how-to-detect-a-floating-point-number-using-a-regular-expression#comment28701224_2293793 - float_number = Regex(r'-?(0(\.\d*)?|([1-9]\d*\.?\d*)|(\.\d+))([Ee][+-]?\d+)?') \ + # matching float w/ regex is ugly but is recommended by pyparsing + regex_after_decimal = r'([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)' + float_number = Regex(r'[-+]?([0-9]+\.(?!{0}))|{0}'.format(regex_after_decimal)) \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_:', min=1) diff --git a/pycalphad/tests/test_database.py b/pycalphad/tests/test_database.py index 228cb684c..030346a76 100644 --- a/pycalphad/tests/test_database.py +++ b/pycalphad/tests/test_database.py @@ -409,7 +409,15 @@ def test_tdb_missing_terminator_element(): def test_database_parsing_of_floats_with_no_values_after_decimal(): """Floats with no values after the decimal should be properly parsed (gh-143)""" - tdb_string = """$ The element has no values after the decimal in 5004. causing a failure + tdb_string = """$ The element has no values after the decimal in '5004.' ELEMENT CU FCC_A1 63.546 5004. 33.15 !""" dbf = Database.from_string(tdb_string, fmt='tdb') assert "CU" in dbf.elements + + +def test_database_parsing_of_floats_with_multiple_leading_zeros(): + """Floats with multiple leading zeros should be properly parsed (gh-143)""" + tdb_string = """$ The element has multiple leading zeros in '00.546' + ELEMENT CU FCC_A1 00.546 5004.0 33.15 !""" + dbf = Database.from_string(tdb_string, fmt='tdb') + assert "CU" in dbf.elements From 39b5963d671dde39f66ee069003b8114946100d4 Mon Sep 17 00:00:00 2001 From: Richard Otis Date: Sun, 12 Nov 2017 10:05:21 -0800 Subject: [PATCH 3/3] WIP: tdb: Simplify negative lookahea for float parsing regex --- pycalphad/io/tdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycalphad/io/tdb.py b/pycalphad/io/tdb.py index 09e445f21..0c052c14f 100644 --- a/pycalphad/io/tdb.py +++ b/pycalphad/io/tdb.py @@ -181,7 +181,7 @@ def _tdb_grammar(): #pylint: disable=R0914 pos_neg_int_number = Word('+-'+nums).setParseAction(lambda t: [int(t[0])]) # '+3' or '-2' are examples # matching float w/ regex is ugly but is recommended by pyparsing regex_after_decimal = r'([0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)' - float_number = Regex(r'[-+]?([0-9]+\.(?!{0}))|{0}'.format(regex_after_decimal)) \ + float_number = Regex(r'[-+]?([0-9]+\.(?!([0-9]|[eE])))|{0}'.format(regex_after_decimal)) \ .setParseAction(lambda t: [float(t[0])]) # symbol name, e.g., phase name, function name symbol_name = Word(alphanums+'_:', min=1)