Merge c291ea5 into 144b2be

pytroll · Nov 16, 2020 · 4d2ca76 · 4d2ca76
2 parents 144b2be + c291ea5
commit 4d2ca76
Show file tree

Hide file tree

Showing 4 changed files with 141 additions and 22 deletions.
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -9,3 +9,4 @@ The following people have made contributions to this project:
 - [Panu Lahtinen (pnuu)](https://github.com/pnuu)
 - [Martin Raspaud (mraspaud)](https://github.com/mraspaud)
 - [Hrobjartur Thorsteinsson (thorsteinssonh)](https://github.com/thorsteinssonh)
+- [Stephan Finkensieper (sfinkens)](https://github.com/sfinkens)
diff --git a/setup.py b/setup.py
@@ -47,4 +47,5 @@
       keywords=["string parsing", "string formatting", "pytroll"],
       zip_safe=False,
       install_requires=[],
+      tests_require=['pytest']
       )
diff --git a/trollsift/parser.py b/trollsift/parser.py
@@ -133,7 +133,16 @@ def convert_field(self, value, conversion):
 spec_regexes = {
     'c': r'.',
     'd': r'[-+]?\d',
-    'f': r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?',
+    'f': {
+        # Naive fixed point format specifier (e.g. {foo:f})
+        'naive': r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?',
+        # Fixed point format specifier including width and precision
+        # (e.g. {foo:4.2f}). The lookahead (?=.{width}) makes sure that the
+        # subsequent pattern is only matched if the string has the required
+        # (minimum) width.
+        'precision': r'(?=.{{{width}}})([-+]?([\d ]+(\.\d{{{decimals}}})+|\.\d{{{decimals}}})([eE][-+]?\d+)?)'
+
+    },
     'i': r'[-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+)',
     'o': r'[-+]?[0-7]',
     's': r'\S',
@@ -144,13 +153,34 @@ def convert_field(self, value, conversion):
 spec_regexes['g'] = spec_regexes['f']
 spec_regexes['X'] = spec_regexes['x']
 allow_multiple = ['c', 'd', 'o', 's', 'x', 'X']
+fixed_point_types = ['f', 'e', 'E', 'g']
 # format_spec ::=  [[fill]align][sign][#][0][width][,][.precision][type]
 # https://docs.python.org/3.4/library/string.html#format-specification-mini-language
 fmt_spec_regex = re.compile(
     r'(?P<align>(?P<fill>.)?[<>=^])?(?P<sign>[\+\-\s])?(?P<pound>#)?(?P<zero>0)?(?P<width>\d+)?'
     r'(?P<comma>,)?(?P<precision>.\d+)?(?P<type>[bcdeEfFgGnosxX%])')
 
 
+def _get_fixed_point_regex(regex_dict, width, precision):
+    """Get regular expression for fixed point numbers.
+
+    Args:
+        width: Total width of the string representation.
+        precision: Number of decimals.
+    """
+    if width or precision:
+        if precision is None:
+            precision = '0,'
+        else:
+            precision = precision.strip('.')
+        if width is None:
+            width = '1,'
+        return regex_dict['precision'].format(
+            width=width, decimals=precision)
+    else:
+        return regex_dict['naive']
+
+
 class RegexFormatter(string.Formatter):
     """String formatter that converts a format string to a regular expression.
     
@@ -248,6 +278,7 @@ def format_spec_to_regex(field_name, format_spec):
         ftype = regex_dict['type']
         width = regex_dict['width']
         align = regex_dict['align']
+        precision = regex_dict['precision']
         # NOTE: does not properly handle `=` alignment
         if fill is None:
             if width is not None and width[0] == '0':
@@ -256,13 +287,19 @@ def format_spec_to_regex(field_name, format_spec):
                 fill = ' '
 
         char_type = spec_regexes[ftype]
+        if ftype in fixed_point_types:
+            char_type = _get_fixed_point_regex(
+                char_type,
+                width=width,
+                precision=precision
+            )
         if ftype == 's' and align and align.endswith('='):
             raise ValueError("Invalid format specification: '{}'".format(format_spec))
         final_regex = char_type
         if ftype in allow_multiple and (not width or width == '0'):
             final_regex += r'*?'
         elif width and width != '0':
-            if not fill:
+            if not fill and ftype not in fixed_point_types:
                 # we know we have exactly this many characters
                 final_regex += r'{{{}}}'.format(int(width))
             elif fill:
@@ -339,35 +376,46 @@ def _get_number_from_fmt(fmt):
 
 def _convert(convdef, stri):
     """Convert the string *stri* to the given conversion definition *convdef*."""
+    is_fixed_point = any([ftype in convdef for ftype in fixed_point_types])
     if '%' in convdef:
         result = dt.datetime.strptime(stri, convdef)
-    elif 'd' in convdef or 's' in convdef:
-        regex_match = fmt_spec_regex.match(convdef)
-        match_dict = regex_match.groupdict() if regex_match else {}
-        align = match_dict.get('align')
-        pad = match_dict.get('fill')
-        if align:
-            # align character is the last one
-            align = align[-1]
-        if align and align in '<>^' and not pad:
-            pad = ' '
-
-        if align == '>':
-            stri = stri.lstrip(pad)
-        elif align == '<':
-            stri = stri.rstrip(pad)
-        elif align == '^':
-            stri = stri.strip(pad)
-
+    elif 'd' in convdef or 's' in convdef or is_fixed_point:
+        stri = _strip_padding(convdef, stri)
         if 'd' in convdef:
             result = int(stri)
+        elif is_fixed_point:
+            result = float(stri)
         else:
             result = stri
     else:
         result = stri
     return result
 
 
+def _strip_padding(convdef, stri):
+    """Strip padding from the given string.
+
+    Args:
+        stri: String to be modified
+        convdef: Corresponding conversion definition (indicates the padding)
+    """
+    regex_match = fmt_spec_regex.match(convdef)
+    match_dict = regex_match.groupdict() if regex_match else {}
+    align = match_dict.get('align')
+    pad = match_dict.get('fill')
+    if align:
+        # align character is the last one
+        align = align[-1]
+    if align and align in '<>^' and not pad:
+        pad = ' '
+    if align == '>':
+        stri = stri.lstrip(pad)
+    elif align == '<':
+        stri = stri.rstrip(pad)
+    elif align == '^':
+        stri = stri.strip(pad)
+    return stri
+
 @lru_cache()
 def get_convert_dict(fmt):
     """Retrieve parse definition from the format string `fmt`."""

diff --git a/trollsift/tests/unittests/test_parser.py b/trollsift/tests/unittests/test_parser.py
@@ -1,9 +1,10 @@
 import unittest
 import datetime as dt
+import pytest
 
 from trollsift.parser import get_convert_dict, regex_formatter
 from trollsift.parser import _convert
-from trollsift.parser import parse, globify, validate, is_one2one
+from trollsift.parser import parse, globify, validate, is_one2one, compose
 
 
 class TestParser(unittest.TestCase):
@@ -273,7 +274,6 @@ def test_is_one2one(self):
 
     def test_compose(self):
         """Test the compose method's custom conversion options."""
-        from trollsift import compose
         key_vals = {'a': 'this Is A-Test b_test c test'}
 
         new_str = compose("{a!c}", key_vals)
@@ -320,3 +320,72 @@ def test_greediness(self):
         template = '{band_type:s}_{polarization_extracted}_{unit}_{s1_fname}'
         res_dict = parse(template, fname)
         self.assertEqual(exp, res_dict)
+
+
+class TestParserFixedPoint:
+    """Test parsing of fixed point numbers."""
+
+    @pytest.mark.parametrize(
+        'test_case',
+        [
+            # Naive
+            {'fmt': '{foo:f}', 'string': '12.34', 'expected': 12.34},
+            # Including width and precision
+            {'fmt': '{foo:5.2f}', 'string': '12.34', 'expected': 12.34},
+            {'fmt': '{foo:5.2f}', 'string': '-1.23', 'expected': -1.23},
+            {'fmt': '{foo:5.2f}', 'string': '12.34', 'expected': 12.34},
+            {'fmt': '{foo:5.2f}', 'string': '123.45', 'expected': 123.45},
+            # Whitespace padded
+            {'fmt': '{foo:5.2f}', 'string': ' 1.23', 'expected': 1.23},
+            {'fmt': '{foo:5.2f}', 'string': ' 12.34', 'expected': 12.34},
+            # Zero padded
+            {'fmt': '{foo:05.2f}', 'string': '01.23', 'expected': 1.23},
+            {'fmt': '{foo:05.2f}', 'string': '012.34', 'expected': 12.34},
+            # Only precision, no width
+            {'fmt': '{foo:.2f}', 'string': '12.34', 'expected': 12.34},
+            # Only width, no precision
+            {'fmt': '{foo:16f}', 'string': '            1.12', 'expected': 1.12},
+            # No digits before decimal point
+            {'fmt': '{foo:3.2f}', 'string': '.12', 'expected': 0.12},
+            {'fmt': '{foo:4.2f}', 'string': '-.12', 'expected': -0.12},
+            {'fmt': '{foo:4.2f}', 'string': ' .12', 'expected': 0.12},
+            {'fmt': '{foo:4.2f}', 'string': '  .12', 'expected': 0.12},
+            {'fmt': '{foo:16f}', 'string': '             .12', 'expected': 0.12},
+            # Exponential format
+            {'fmt': '{foo:7.2e}', 'string': '-1.23e4', 'expected': -1.23e4},
+        ]
+    )
+    def test_match(self, test_case):
+        """Test cases expected to be matched."""
+
+        # Test parsed value
+        parsed = parse(test_case['fmt'], test_case['string'])
+        assert parsed['foo'] == test_case['expected']
+
+        # Test round trip
+        composed = compose(test_case['fmt'], {'foo': test_case['expected']})
+        parsed = parse(test_case['fmt'], composed)
+        assert parsed['foo'] == test_case['expected']
+
+    @pytest.mark.parametrize(
+        'test_case',
+        [
+            # Decimals incorrect
+            {'fmt': '{foo:5.2f}', 'string': '12345'},
+            {'fmt': '{foo:5.2f}', 'string': '1234.'},
+            {'fmt': '{foo:5.2f}', 'string': '1.234'},
+            {'fmt': '{foo:5.2f}', 'string': '123.4'},
+            {'fmt': '{foo:.2f}', 'string': '12.345'},
+            # Decimals correct, but width too short
+            {'fmt': '{foo:5.2f}', 'string': '1.23'},
+            {'fmt': '{foo:5.2f}', 'string': '.23'},
+            {'fmt': '{foo:10.2e}', 'string': '1.23e4'},
+            # Invalid
+            {'fmt': '{foo:5.2f}', 'string': '12_34'},
+            {'fmt': '{foo:5.2f}', 'string': 'aBcD'},
+        ]
+    )
+    def test_no_match(self, test_case):
+        """Test cases expected to not be matched."""
+        with pytest.raises(ValueError):
+            parse(test_case['fmt'], test_case['string'])