Skip to content

Commit

Permalink
Merge c291ea5 into 144b2be
Browse files Browse the repository at this point in the history
  • Loading branch information
sfinkens committed Nov 16, 2020
2 parents 144b2be + c291ea5 commit 4d2ca76
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 22 deletions.
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ The following people have made contributions to this project:
- [Panu Lahtinen (pnuu)](https://github.com/pnuu)
- [Martin Raspaud (mraspaud)](https://github.com/mraspaud)
- [Hrobjartur Thorsteinsson (thorsteinssonh)](https://github.com/thorsteinssonh)
- [Stephan Finkensieper (sfinkens)](https://github.com/sfinkens)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@
keywords=["string parsing", "string formatting", "pytroll"],
zip_safe=False,
install_requires=[],
tests_require=['pytest']
)
88 changes: 68 additions & 20 deletions trollsift/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,16 @@ def convert_field(self, value, conversion):
spec_regexes = {
'c': r'.',
'd': r'[-+]?\d',
'f': r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?',
'f': {
# Naive fixed point format specifier (e.g. {foo:f})
'naive': r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?',
# Fixed point format specifier including width and precision
# (e.g. {foo:4.2f}). The lookahead (?=.{width}) makes sure that the
# subsequent pattern is only matched if the string has the required
# (minimum) width.
'precision': r'(?=.{{{width}}})([-+]?([\d ]+(\.\d{{{decimals}}})+|\.\d{{{decimals}}})([eE][-+]?\d+)?)'

},
'i': r'[-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+)',
'o': r'[-+]?[0-7]',
's': r'\S',
Expand All @@ -144,13 +153,34 @@ def convert_field(self, value, conversion):
spec_regexes['g'] = spec_regexes['f']
spec_regexes['X'] = spec_regexes['x']
allow_multiple = ['c', 'd', 'o', 's', 'x', 'X']
fixed_point_types = ['f', 'e', 'E', 'g']
# format_spec ::= [[fill]align][sign][#][0][width][,][.precision][type]
# https://docs.python.org/3.4/library/string.html#format-specification-mini-language
fmt_spec_regex = re.compile(
r'(?P<align>(?P<fill>.)?[<>=^])?(?P<sign>[\+\-\s])?(?P<pound>#)?(?P<zero>0)?(?P<width>\d+)?'
r'(?P<comma>,)?(?P<precision>.\d+)?(?P<type>[bcdeEfFgGnosxX%])')


def _get_fixed_point_regex(regex_dict, width, precision):
"""Get regular expression for fixed point numbers.
Args:
width: Total width of the string representation.
precision: Number of decimals.
"""
if width or precision:
if precision is None:
precision = '0,'
else:
precision = precision.strip('.')
if width is None:
width = '1,'
return regex_dict['precision'].format(
width=width, decimals=precision)
else:
return regex_dict['naive']


class RegexFormatter(string.Formatter):
"""String formatter that converts a format string to a regular expression.
Expand Down Expand Up @@ -248,6 +278,7 @@ def format_spec_to_regex(field_name, format_spec):
ftype = regex_dict['type']
width = regex_dict['width']
align = regex_dict['align']
precision = regex_dict['precision']
# NOTE: does not properly handle `=` alignment
if fill is None:
if width is not None and width[0] == '0':
Expand All @@ -256,13 +287,19 @@ def format_spec_to_regex(field_name, format_spec):
fill = ' '

char_type = spec_regexes[ftype]
if ftype in fixed_point_types:
char_type = _get_fixed_point_regex(
char_type,
width=width,
precision=precision
)
if ftype == 's' and align and align.endswith('='):
raise ValueError("Invalid format specification: '{}'".format(format_spec))
final_regex = char_type
if ftype in allow_multiple and (not width or width == '0'):
final_regex += r'*?'
elif width and width != '0':
if not fill:
if not fill and ftype not in fixed_point_types:
# we know we have exactly this many characters
final_regex += r'{{{}}}'.format(int(width))
elif fill:
Expand Down Expand Up @@ -339,35 +376,46 @@ def _get_number_from_fmt(fmt):

def _convert(convdef, stri):
"""Convert the string *stri* to the given conversion definition *convdef*."""
is_fixed_point = any([ftype in convdef for ftype in fixed_point_types])
if '%' in convdef:
result = dt.datetime.strptime(stri, convdef)
elif 'd' in convdef or 's' in convdef:
regex_match = fmt_spec_regex.match(convdef)
match_dict = regex_match.groupdict() if regex_match else {}
align = match_dict.get('align')
pad = match_dict.get('fill')
if align:
# align character is the last one
align = align[-1]
if align and align in '<>^' and not pad:
pad = ' '

if align == '>':
stri = stri.lstrip(pad)
elif align == '<':
stri = stri.rstrip(pad)
elif align == '^':
stri = stri.strip(pad)

elif 'd' in convdef or 's' in convdef or is_fixed_point:
stri = _strip_padding(convdef, stri)
if 'd' in convdef:
result = int(stri)
elif is_fixed_point:
result = float(stri)
else:
result = stri
else:
result = stri
return result


def _strip_padding(convdef, stri):
"""Strip padding from the given string.
Args:
stri: String to be modified
convdef: Corresponding conversion definition (indicates the padding)
"""
regex_match = fmt_spec_regex.match(convdef)
match_dict = regex_match.groupdict() if regex_match else {}
align = match_dict.get('align')
pad = match_dict.get('fill')
if align:
# align character is the last one
align = align[-1]
if align and align in '<>^' and not pad:
pad = ' '
if align == '>':
stri = stri.lstrip(pad)
elif align == '<':
stri = stri.rstrip(pad)
elif align == '^':
stri = stri.strip(pad)
return stri

@lru_cache()
def get_convert_dict(fmt):
"""Retrieve parse definition from the format string `fmt`."""
Expand Down
73 changes: 71 additions & 2 deletions trollsift/tests/unittests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import unittest
import datetime as dt
import pytest

from trollsift.parser import get_convert_dict, regex_formatter
from trollsift.parser import _convert
from trollsift.parser import parse, globify, validate, is_one2one
from trollsift.parser import parse, globify, validate, is_one2one, compose


class TestParser(unittest.TestCase):
Expand Down Expand Up @@ -273,7 +274,6 @@ def test_is_one2one(self):

def test_compose(self):
"""Test the compose method's custom conversion options."""
from trollsift import compose
key_vals = {'a': 'this Is A-Test b_test c test'}

new_str = compose("{a!c}", key_vals)
Expand Down Expand Up @@ -320,3 +320,72 @@ def test_greediness(self):
template = '{band_type:s}_{polarization_extracted}_{unit}_{s1_fname}'
res_dict = parse(template, fname)
self.assertEqual(exp, res_dict)


class TestParserFixedPoint:
"""Test parsing of fixed point numbers."""

@pytest.mark.parametrize(
'test_case',
[
# Naive
{'fmt': '{foo:f}', 'string': '12.34', 'expected': 12.34},
# Including width and precision
{'fmt': '{foo:5.2f}', 'string': '12.34', 'expected': 12.34},
{'fmt': '{foo:5.2f}', 'string': '-1.23', 'expected': -1.23},
{'fmt': '{foo:5.2f}', 'string': '12.34', 'expected': 12.34},
{'fmt': '{foo:5.2f}', 'string': '123.45', 'expected': 123.45},
# Whitespace padded
{'fmt': '{foo:5.2f}', 'string': ' 1.23', 'expected': 1.23},
{'fmt': '{foo:5.2f}', 'string': ' 12.34', 'expected': 12.34},
# Zero padded
{'fmt': '{foo:05.2f}', 'string': '01.23', 'expected': 1.23},
{'fmt': '{foo:05.2f}', 'string': '012.34', 'expected': 12.34},
# Only precision, no width
{'fmt': '{foo:.2f}', 'string': '12.34', 'expected': 12.34},
# Only width, no precision
{'fmt': '{foo:16f}', 'string': ' 1.12', 'expected': 1.12},
# No digits before decimal point
{'fmt': '{foo:3.2f}', 'string': '.12', 'expected': 0.12},
{'fmt': '{foo:4.2f}', 'string': '-.12', 'expected': -0.12},
{'fmt': '{foo:4.2f}', 'string': ' .12', 'expected': 0.12},
{'fmt': '{foo:4.2f}', 'string': ' .12', 'expected': 0.12},
{'fmt': '{foo:16f}', 'string': ' .12', 'expected': 0.12},
# Exponential format
{'fmt': '{foo:7.2e}', 'string': '-1.23e4', 'expected': -1.23e4},
]
)
def test_match(self, test_case):
"""Test cases expected to be matched."""

# Test parsed value
parsed = parse(test_case['fmt'], test_case['string'])
assert parsed['foo'] == test_case['expected']

# Test round trip
composed = compose(test_case['fmt'], {'foo': test_case['expected']})
parsed = parse(test_case['fmt'], composed)
assert parsed['foo'] == test_case['expected']

@pytest.mark.parametrize(
'test_case',
[
# Decimals incorrect
{'fmt': '{foo:5.2f}', 'string': '12345'},
{'fmt': '{foo:5.2f}', 'string': '1234.'},
{'fmt': '{foo:5.2f}', 'string': '1.234'},
{'fmt': '{foo:5.2f}', 'string': '123.4'},
{'fmt': '{foo:.2f}', 'string': '12.345'},
# Decimals correct, but width too short
{'fmt': '{foo:5.2f}', 'string': '1.23'},
{'fmt': '{foo:5.2f}', 'string': '.23'},
{'fmt': '{foo:10.2e}', 'string': '1.23e4'},
# Invalid
{'fmt': '{foo:5.2f}', 'string': '12_34'},
{'fmt': '{foo:5.2f}', 'string': 'aBcD'},
]
)
def test_no_match(self, test_case):
"""Test cases expected to not be matched."""
with pytest.raises(ValueError):
parse(test_case['fmt'], test_case['string'])

0 comments on commit 4d2ca76

Please sign in to comment.