diff --git a/doc/source/index.rst b/doc/source/index.rst
index 21d53c2..d05aadb 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -4,8 +4,8 @@
contain the root `toctree` directive.
.. meta::
- description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names
- keywords: Python, pytroll, format, parse, filter, string
+ :description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names
+ :keywords: Python, pytroll, format, parse, filter, string
Welcome to the trollsift documentation!
=========================================
@@ -26,7 +26,6 @@ Contents
installation
usage
- examples
api
Indices and tables
diff --git a/doc/source/usage.rst b/doc/source/usage.rst
index acfc244..ad426af 100644
--- a/doc/source/usage.rst
+++ b/doc/source/usage.rst
@@ -1,9 +1,3 @@
-
-.. .. sectnum::
-.. :depth: 4
-.. :start: 2
-.. :suffix: .
-
.. _string-format: https://docs.python.org/2/library/string.html#format-string-syntax
Usage
@@ -44,6 +38,14 @@ a new file name,
>>> p.compose(data)
'/somedir/otherdir/hrpt_noaa16_20120101_0101_69022.l1b'
+In addition to python's builtin string formatting functionality trollsift also
+provides extra conversion options such as making all characters lowercase:
+
+ >>> my_parser = Parser("{platform_name:l}")
+ >>> my_parser.compose({'platform_name': 'NPP'})
+ 'npp'
+
+For all of the options see :class:`~trollsift.parser.StringFormatter`.
standalone parse and compose
+++++++++++++++++++++++++++++++++++++++++
diff --git a/setup.cfg b/setup.cfg
index dd67fc4..accf151 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,4 +6,7 @@ requires=python python-six
release=1
[bdist_wheel]
-universal=1
\ No newline at end of file
+universal=1
+
+[flake8]
+max-line-length = 120
diff --git a/trollsift/parser.py b/trollsift/parser.py
index 7d04fdf..00ac585 100644
--- a/trollsift/parser.py
+++ b/trollsift/parser.py
@@ -22,8 +22,8 @@
# along with this program. If not, see .
-'''Parser class
-'''
+"""Parser class
+"""
import re
import datetime as dt
@@ -33,9 +33,8 @@
class Parser(object):
-
- '''Parser class
- '''
+ """Parser class
+ """
def __init__(self, fmt):
self.fmt = fmt
@@ -55,6 +54,8 @@ def compose(self, keyvals):
'''
return compose(self.fmt, keyvals)
+ format = compose
+
def globify(self, keyvals=None):
'''Generate a string useable with glob.glob() from format string
*fmt* and *keyvals* dictionary.
@@ -88,98 +89,171 @@ def is_one2one(self):
return is_one2one(self.fmt)
-def _extract_parsedef(fmt):
- '''Retrieve parse definition from the format string *fmt*.
- '''
+class StringFormatter(string.Formatter):
+ """Custom string formatter class for basic strings.
+
+ This formatter adds a few special conversions for assisting with common
+ trollsift situations like making a parameter lowercase or removing
+ hyphens. The added conversions are listed below and can be used in a
+ format string by prefixing them with an `!` like so:
+
+ >>> fstr = "{!u}_{!l}"
+ >>> formatter = StringFormatter()
+ >>> formatter.format(fstr, "to_upper", "To_LowerCase")
+ "TO_UPPER_to_lowercase"
+
+ - c: Make capitalized version of string (first character upper case, all lowercase after that) by executing the
+ parameter's `.capitalize()` method.
+ - h: A combination of 'R' and 'l'.
+ - H: A combination of 'R' and 'u'.
+ - l: Make all characters lowercase by executing the parameter's `.lower()` method.
+ - R: Remove all separators from the parameter including '-', '_', ' ', and ':'.
+ - t: Title case the string by executing the parameter's `.title()` method.
+ - u: Make all characters uppercase by executing the parameter's `.upper()` method.
+ """
+ CONV_FUNCS = {
+ 'c': 'capitalize',
+ 'h': 'lower',
+ 'H': 'upper',
+ 'l': 'lower',
+ 't': 'title',
+ 'u': 'upper'
+ }
+
+ def convert_field(self, value, conversion):
+ """Apply conversions mentioned above."""
+ func = self.CONV_FUNCS.get(conversion)
+ if func is not None:
+ value = getattr(value, func)()
+ elif conversion not in ['R']:
+ # default conversion ('r', 's')
+ return super(StringFormatter, self).convert_field(value, conversion)
+
+ if conversion in ['h', 'H', 'R']:
+ value = value.replace('-', '').replace('_', '').replace(':', '').replace(' ', '')
+ return value
+
+
+formatter = StringFormatter()
+
+
+def _extract_parsedef(fmt):
+ """Retrieve parse definition from the format string `fmt`."""
parsedef = []
convdef = {}
-
- for part1 in fmt.split('}'):
- part2 = part1.split('{', 1)
- if part2[0] is not '':
- parsedef.append(part2[0])
- if len(part2) > 1 and part2[1] is not '':
- if ':' in part2[1]:
- part2 = part2[1].split(':', 1)
- parsedef.append({part2[0]: part2[1]})
- convdef[part2[0]] = part2[1]
- else:
- reg = re.search('(\{' + part2[1] + '\})', fmt)
- if reg:
- parsedef.append({part2[1]: None})
- else:
- parsedef.append(part2[1])
+ for literal_text, field_name, format_spec, conversion in formatter.parse(fmt):
+ if literal_text:
+ parsedef.append(literal_text)
+ if field_name is None:
+ continue
+ parsedef.append({field_name: format_spec or None})
+ convdef[field_name] = format_spec
return parsedef, convdef
-def _extract_values(parsedef, stri):
- """
- Given a parse definition *parsedef* match and extract key value
- pairs from input string *stri*.
- """
- if len(parsedef) == 0:
- return {}
-
- match = parsedef.pop(0)
- # we allow ourselves typechecking
- # in case of this subroutine
- if isinstance(match, (str, six.text_type)):
- # match
- if stri.find(match) == 0:
- stri_next = stri[len(match):]
- return _extract_values(parsedef, stri_next)
- else:
- raise ValueError
- else:
- key = list(match)[0]
- fmt = match[key]
- fmt_list = ["%f", "%a", "%A", "%b", "%B", "%z", "%Z",
- "%p", "%c", "%x", "%X"]
- if fmt is None or fmt.isalpha() or any([x in fmt for x in fmt_list]):
- if len(parsedef) != 0:
- next_match = parsedef[0]
- # next match is string ...
- if isinstance(next_match, (str, six.text_type)):
- try:
- count = fmt.count(next_match)
- except AttributeError:
- count = 0
- pos = -1
- for dummy in range(count + 1):
- pos = stri.find(next_match, pos + 1)
- value = stri[0:pos]
- # next match is string key ...
- else:
- # pick out segment until string match,
- # and parse in reverse,
- rev_parsedef = []
- x = ''
- for x in parsedef:
- if isinstance(x, (str, six.text_type)):
- break
- rev_parsedef.insert(0, x)
- rev_parsedef = rev_parsedef + [match]
- if isinstance(x, (str, six.text_type)):
- rev_stri = stri[:stri.find(x)][::-1]
- else:
- rev_stri = stri[::-1]
- # parse reversely and pick out value
- value = _extract_values(rev_parsedef, rev_stri)[key][::-1]
- else:
- value = stri
- stri_next = stri[len(value):]
- keyvals = _extract_values(parsedef, stri_next)
- keyvals[key] = value
- return keyvals
+# taken from https://docs.python.org/3/library/re.html#simulating-scanf
+spec_regexes = {
+ 'c': r'.',
+ 'd': r'[-+]?\d',
+ 'f': r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?',
+ 'i': r'[-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+)',
+ 'o': r'[-+]?[0-7]',
+ 's': r'\S',
+ 'x': r'[-+]?(0[xX])?[\dA-Fa-f]',
+}
+spec_regexes['e'] = spec_regexes['f']
+spec_regexes['E'] = spec_regexes['f']
+spec_regexes['g'] = spec_regexes['f']
+spec_regexes['X'] = spec_regexes['x']
+allow_multiple = ['c', 'd', 'o', 's', 'x', 'X']
+
+
+class RegexFormatter(string.Formatter):
+
+ # special string to mark a parameter not being specified
+ UNPROVIDED_VALUE = ''
+ ESCAPE_CHARACTERS = [x for x in string.punctuation if x not in '\\%']
+ ESCAPE_SETS = [(c, '\{}'.format(c)) for c in ESCAPE_CHARACTERS]
+
+ def _escape(self, s):
+ """Escape bad characters for regular expressions.
+
+ Similar to `re.escape` but allows '%' to pass through.
+
+ """
+ for ch, r_ch in self.ESCAPE_SETS:
+ s = s.replace(ch, r_ch)
+ return s
+
+ def parse(self, format_string):
+ parse_ret = super(RegexFormatter, self).parse(format_string)
+ for literal_text, field_name, format_spec, conversion in parse_ret:
+ # the parent class will call parse multiple times moving
+ # 'format_spec' to 'literal_text'. We only escape 'literal_text'
+ # so we don't escape things twice.
+ literal_text = self._escape(literal_text)
+ yield literal_text, field_name, format_spec, conversion
+
+ def get_value(self, key, args, kwargs):
+ try:
+ return super(RegexFormatter, self).get_value(key, args, kwargs)
+ except (IndexError, KeyError):
+ return key, self.UNPROVIDED_VALUE
+
+ def _regex_datetime(self, format_spec):
+ replace_str = format_spec
+ for fmt_key, fmt_val in DT_FMT.items():
+ if fmt_key == '%%':
+ # special case
+ replace_str.replace('%%', '%')
+ continue
+ count = fmt_val.count('?')
+ # either a series of numbers or letters/numbers
+ regex = r'\d{{{:d}}}'.format(count) if count else r'[^ \t\n\r\f\v\-_:]+'
+ replace_str = replace_str.replace(fmt_key, regex)
+ return replace_str
+
+ def regex_field(self, value, format_spec):
+ if value != self.UNPROVIDED_VALUE:
+ return super(RegexFormatter, self).format_field(value, format_spec)
+
+ # Replace format spec with glob patterns (*, ?, etc)
+ if not format_spec:
+ return r'.*'
+ if '%' in format_spec:
+ return self._regex_datetime(format_spec)
+ char_type = spec_regexes[format_spec[-1]]
+ num_match = re.search('[0-9]+', format_spec)
+ num = 0 if num_match is None else int(num_match.group(0))
+ has_multiple = format_spec[-1] in allow_multiple
+ if num == 0 and has_multiple:
+ # don't know the count
+ return r'{}*'.format(char_type)
+ elif num == 0:
+ # floats and other types can't have multiple
+ return char_type
+ elif format_spec[-1] in allow_multiple:
+ return r'{}{{{:d}}}'.format(char_type, num)
else:
- # find number of chars
- num = _get_number_from_fmt(fmt)
- value = stri[0:num]
- stri_next = stri[len(value):]
- keyvals = _extract_values(parsedef, stri_next)
- keyvals[key] = value
- return keyvals
+ return r'{}'.format(char_type)
+
+ def format_field(self, value, format_spec):
+ if not isinstance(value, tuple) or value[1] != self.UNPROVIDED_VALUE:
+ return super(RegexFormatter, self).format_field(value, format_spec)
+ field_name, value = value
+ new_value = self.regex_field(value, format_spec)
+ return '(?P<{}>{})'.format(field_name, new_value)
+
+ def extract_values(self, fmt, stri):
+ regex = self.format(fmt)
+ match = re.match(regex, stri)
+ if match is None:
+ raise ValueError("String does not match pattern.")
+ return match.groupdict()
+
+
+regex_formatter = RegexFormatter()
def _get_number_from_fmt(fmt):
@@ -197,10 +271,7 @@ def _get_number_from_fmt(fmt):
def _convert(convdef, stri):
- '''Convert the string *stri* to the given conversion definition
- *convdef*.
- '''
-
+ """Convert the string *stri* to the given conversion definition *convdef*."""
if '%' in convdef:
result = dt.datetime.strptime(stri, convdef)
elif 'd' in convdef or 's' in convdef:
@@ -234,27 +305,13 @@ def _convert(convdef, stri):
return result
-def _collect_keyvals_from_parsedef(parsedef):
- '''Collect dict keys and values from parsedef.
- '''
-
- keys, vals = [], []
-
- for itm in parsedef:
- if isinstance(itm, dict):
- keys.append(list(itm.keys())[0])
- vals.append(list(itm.values())[0])
-
- return keys, vals
-
-
def parse(fmt, stri):
'''Parse keys and corresponding values from *stri* using format
described in *fmt* string.
'''
parsedef, convdef = _extract_parsedef(fmt)
- keyvals = _extract_values(parsedef, stri)
+ keyvals = regex_formatter.extract_values(fmt, stri)
for key in convdef.keys():
keyvals[key] = _convert(convdef[key], keyvals[key])
@@ -262,11 +319,8 @@ def parse(fmt, stri):
def compose(fmt, keyvals):
- '''Return string composed according to *fmt* string and filled
- with values with the corresponding keys in *keyvals* dictionary.
- '''
-
- return fmt.format(**keyvals)
+ """Convert parameters in `keyvals` to a string based on `fmt` string."""
+ return formatter.format(fmt, **keyvals)
DT_FMT = {
@@ -297,70 +351,54 @@ def compose(fmt, keyvals):
}
+class GlobifyFormatter(string.Formatter):
+
+ # special string to mark a parameter not being specified
+ UNPROVIDED_VALUE = ''
+
+ def get_value(self, key, args, kwargs):
+ try:
+ return super(GlobifyFormatter, self).get_value(key, args, kwargs)
+ except (IndexError, KeyError):
+ # assumes that
+ return self.UNPROVIDED_VALUE
+
+ def format_field(self, value, format_spec):
+ if not isinstance(value, (list, tuple)) and value != self.UNPROVIDED_VALUE:
+ return super(GlobifyFormatter, self).format_field(value, format_spec)
+ elif value != self.UNPROVIDED_VALUE:
+ # partial provided date/time fields
+ # specified with a tuple/list of 2 elements
+ # (value, partial format string)
+ value, dt_fmt = value
+ for fmt_letter in dt_fmt:
+ fmt = '%' + fmt_letter
+ format_spec = format_spec.replace(fmt, value.strftime(fmt))
+
+ # Replace format spec with glob patterns (*, ?, etc)
+ if not format_spec:
+ return '*'
+ if '%' in format_spec:
+ replace_str = format_spec
+ for fmt_key, fmt_val in DT_FMT.items():
+ replace_str = replace_str.replace(fmt_key, fmt_val)
+ return replace_str
+ if not re.search('[0-9]+', format_spec):
+ # non-integer type
+ return '*'
+ return '?' * _get_number_from_fmt(format_spec)
+
+
+globify_formatter = GlobifyFormatter()
+
+
def globify(fmt, keyvals=None):
- '''Generate a string useable with glob.glob() from format string
+ """Generate a string usable with glob.glob() from format string
*fmt* and *keyvals* dictionary.
- '''
-
+ """
if keyvals is None:
keyvals = {}
- else:
- keyvals = keyvals.copy()
- parsedef, _ = _extract_parsedef(fmt)
- all_keys, all_vals = _collect_keyvals_from_parsedef(parsedef)
- replace_str = ''
- for key, val in zip(all_keys, all_vals):
- if key not in list(keyvals.keys()):
- # replace depending on the format defined in all_vals[key]
- if val is None:
- replace_str = '*'
- elif '%' in val:
- # calculate the length of datetime
- replace_str = val
- for fmt_key, fmt_val in DT_FMT.items():
- replace_str = replace_str.replace(fmt_key, fmt_val)
- fmt = fmt.replace(key + ':' + val, key)
- elif not re.search('[0-9]+', val):
- if 'd' in val:
- val2 = val.replace('d', 's')
- fmt = fmt.replace(key + ':' + val, key + ':' + val2)
- replace_str = '*'
- else:
- if 'd' in val:
- val2 = val.lstrip('0').replace('d', 's')
- fmt = fmt.replace(key + ':' + val, key + ':' + val2)
- num = _get_number_from_fmt(val)
- replace_str = num * '?'
- keyvals[key] = replace_str
- else:
- # Check partial datetime usage
- if isinstance(keyvals[key], list) or \
- isinstance(keyvals[key], tuple):
- conv_chars = keyvals[key][1]
- else:
- continue
-
- val2 = list(val)
- prev = 0
- datet = keyvals[key][0] # assume datetime
- while True:
- idx = val.find('%', prev)
- # Stop if no finds
- if idx == -1:
- break
- if val[idx + 1] not in conv_chars:
- tmp = '{0:%' + val[idx + 1] + '}'
- # calculate how many '?' are needed
- num = len(tmp.format(datet))
- val2[idx:idx + num] = num * '?'
- prev = idx + 1
- val2 = ''.join(val2)
- fmt = fmt.replace(key + ':' + val, key + ':' + val2)
- keyvals[key] = keyvals[key][0]
-
- result = compose(fmt, keyvals)
-
- return result
+ return globify_formatter.format(fmt, **keyvals)
def validate(fmt, stri):
@@ -389,7 +427,7 @@ def is_one2one(fmt):
Note: This test only applies to sensible usage of the format string.
If string or numeric data is causes overflow, e.g.
if composing "abcd" into {3s}, one to one correspondence will always
- be broken in such cases. This off course also applies to precision
+ be broken in such cases. This of course also applies to precision
losses when using datetime data.
"""
# look for some bad patterns
diff --git a/trollsift/tests/integrationtests/test_parser.py b/trollsift/tests/integrationtests/test_parser.py
index ded15d1..6475a0e 100644
--- a/trollsift/tests/integrationtests/test_parser.py
+++ b/trollsift/tests/integrationtests/test_parser.py
@@ -51,10 +51,30 @@ def assertItemsEqual(self, a, b):
self.assertEqual(len(a), len(b))
+class TestParserVIIRSSDR(unittest.TestCase):
+
+ def setUp(self):
+ self.fmt = 'SVI01_{platform_shortname}_d{start_time:%Y%m%d_t%H%M%S%f}_e{end_time:%H%M%S%f}_b{orbit:5d}_c{creation_time:%Y%m%d%H%M%S%f}_{source}.h5'
+ self.string = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
+ self.data = {'platform_shortname': 'npp',
+ 'start_time': dt.datetime(2012, 2, 25, 18, 1, 24, 500000), 'orbit': 1708,
+ 'end_time': dt.datetime(1900, 1, 1, 18, 2, 48, 700000),
+ 'source': 'noaa_ops',
+ 'creation_time': dt.datetime(2012, 2, 26, 0, 21, 30, 255476)}
+ self.p = Parser(self.fmt)
+
+ def test_parse(self):
+ # Run
+ result = self.p.parse(self.string)
+ # Assert
+ self.assertDictEqual(result, self.data)
+
+
def suite():
"""The suite for test_parser
"""
loader = unittest.TestLoader()
mysuite = unittest.TestSuite()
mysuite.addTest(loader.loadTestsFromTestCase(TestParser))
+ mysuite.addTest(loader.loadTestsFromTestCase(TestParserVIIRSSDR))
return mysuite
diff --git a/trollsift/tests/unittests/test_parser.py b/trollsift/tests/unittests/test_parser.py
index 04cbdc8..d0bab35 100644
--- a/trollsift/tests/unittests/test_parser.py
+++ b/trollsift/tests/unittests/test_parser.py
@@ -1,8 +1,8 @@
import unittest
import datetime as dt
-from trollsift.parser import _extract_parsedef, _extract_values
-from trollsift.parser import _convert, _collect_keyvals_from_parsedef
+from trollsift.parser import _extract_parsedef, regex_formatter
+from trollsift.parser import _convert
from trollsift.parser import parse, globify, validate, is_one2one
@@ -28,122 +28,73 @@ def test_extract_parsedef(self):
'_', {'orbit': '05d'}, '.l1b'])
def test_extract_values(self):
- # Run
- parsedef = ['/somedir/', {'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': '2s'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': 'd'}, '.l1b']
- result = _extract_values(parsedef, self.string)
- # Assert
+ fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}.l1b"
+ result = regex_formatter.extract_values(fmt, self.string)
self.assertDictEqual(result, {'directory': 'otherdir',
'platform': 'noaa', 'platnum': '16',
'time': '20140210_1004', 'orbit': '69022'})
def test_extract_values_end(self):
- # Run
- parsedef = ['/somedir/', {'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': '2s'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': 'd'}]
- result = _extract_values(parsedef, self.string3)
- # Assert
+ fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}"
+ result = regex_formatter.extract_values(fmt, self.string3)
self.assertDictEqual(result, {'directory': 'otherdir',
'platform': 'noaa', 'platnum': '16',
'time': '20140210_1004', 'orbit': '69022'})
def test_extract_values_beginning(self):
- # Run
- parsedef = [{'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': '2s'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': 'd'}]
- result = _extract_values(parsedef, self.string4)
- # Assert
- self.assertDictEqual(result, {'directory': '/somedir/otherdir',
- 'platform': 'noaa', 'platnum': '16',
- 'time': '20140210_1004', 'orbit': '69022'})
-
- def test_extract_values_beginning(self):
- # Run
- parsedef = [{'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': '2s'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': 'd'}]
- result = _extract_values(parsedef, self.string4)
- # Assert
+ fmt = "{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}"
+ result = regex_formatter.extract_values(fmt, self.string4)
self.assertDictEqual(result, {'directory': '/somedir/otherdir',
'platform': 'noaa', 'platnum': '16',
'time': '20140210_1004', 'orbit': '69022'})
def test_extract_values_s4spair(self):
- # Run
- parsedef = [{'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': 's'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': 'd'}]
- result = _extract_values(parsedef, self.string4)
- # Assert
+ fmt = "{directory}/hrpt_{platform:4s}{platnum:s}_{time:%Y%m%d_%H%M}_{orbit:d}"
+ result = regex_formatter.extract_values(fmt, self.string4)
self.assertDictEqual(result, {'directory': '/somedir/otherdir',
'platform': 'noaa', 'platnum': '16',
'time': '20140210_1004', 'orbit': '69022'})
def test_extract_values_ss2pair(self):
- # Run
- parsedef = [{'directory': None}, '/hrpt_',
- {'platform': 's'}, {'platnum': 's2'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': 'd'}]
- result = _extract_values(parsedef, self.string4)
- # Assert
+ fmt = "{directory}/hrpt_{platform:s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}"
+ result = regex_formatter.extract_values(fmt, self.string4)
self.assertDictEqual(result, {'directory': '/somedir/otherdir',
'platform': 'noaa', 'platnum': '16',
'time': '20140210_1004', 'orbit': '69022'})
def test_extract_values_ss2pair_end(self):
- # Run
- parsedef = [{'directory': None}, '/hrpt_',
- {'platform': 's'}, {'platnum': 's2'}]
- result = _extract_values(parsedef, "/somedir/otherdir/hrpt_noaa16")
- # Assert
+ fmt = "{directory}/hrpt_{platform:s}{platnum:2s}"
+ result = regex_formatter.extract_values(fmt, "/somedir/otherdir/hrpt_noaa16")
self.assertDictEqual(result, {'directory': '/somedir/otherdir',
'platform': 'noaa', 'platnum': '16'})
def test_extract_values_sdatetimepair_end(self):
- # Run
- parsedef = [{'directory': None}, '/hrpt_',
- {'platform': 's'}, {'date': '%Y%m%d'}]
- result = _extract_values(
- parsedef, "/somedir/otherdir/hrpt_noaa20140212")
- # Assert
+ fmt = "{directory}/hrpt_{platform:s}{date:%Y%m%d}"
+ result = regex_formatter.extract_values(fmt, "/somedir/otherdir/hrpt_noaa20140212")
self.assertDictEqual(result, {'directory': '/somedir/otherdir',
'platform': 'noaa', 'date': '20140212'})
def test_extract_values_everything(self):
- # Run
- parsedef = [{'everything': None}]
- result = _extract_values(parsedef, self.string)
- # Assert
+ fmt = "{everything}"
+ result = regex_formatter.extract_values(fmt, self.string)
self.assertDictEqual(
result, {'everything': '/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b'})
def test_extract_values_padding2(self):
- # Run
- parsedef = ['/somedir/', {'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': '2s'},
- '_', {'time': '%Y%m%d_%H%M'}, '_',
- {'orbit': '0>5d'}, '.l1b']
- result = _extract_values(parsedef, self.string2)
+ fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:0>5d}.l1b"
+ # parsedef = ['/somedir/', {'directory': None}, '/hrpt_',
+ # {'platform': '4s'}, {'platnum': '2s'},
+ # '_', {'time': '%Y%m%d_%H%M'}, '_',
+ # {'orbit': '0>5d'}, '.l1b']
+ result = regex_formatter.extract_values(fmt, self.string2)
# Assert
self.assertDictEqual(result, {'directory': 'otherdir',
'platform': 'noaa', 'platnum': '16',
'time': '20140210_1004', 'orbit': '00022'})
def test_extract_values_fails(self):
- # Run
- parsedef = ['/somedir/', {'directory': None}, '/hrpt_',
- {'platform': '4s'}, {'platnum': '2s'},
- '_', {'time': '%Y%m%d_%H%M'}, '_', {'orbit': '4d'}, '.l1b']
- self.assertRaises(ValueError, _extract_values, parsedef, self.string)
+ fmt = '/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:4d}.l1b'
+ self.assertRaises(ValueError, regex_formatter.extract_values, fmt, self.string)
def test_convert_digits(self):
self.assertEqual(_convert('d', '69022'), 69022)
@@ -164,6 +115,17 @@ def test_parse(self):
'time': dt.datetime(2014, 2, 12, 14, 12),
'orbit': 12345})
+ def test_parse_wildcards(self):
+ # Run
+ result = parse(
+ "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}",
+ "hrpt_noaa19_20140212_1412_12345.l1b")
+ # Assert
+ self.assertDictEqual(result, {'platform': 'noaa', 'platnum': '19',
+ 'time': dt.datetime(2014, 2, 12, 14, 12),
+ 'orbit': 12345,
+ 'ext': '.l1b'})
+
def test_parse_align(self):
filepattern="H-000-{hrit_format:4s}__-{platform_name:4s}________-{channel_name:_<9s}-{segment:_<9s}-{start_time:%Y%m%d%H%M}-__"
result = parse(filepattern, "H-000-MSG3__-MSG3________-IR_039___-000007___-201506051700-__")
@@ -173,7 +135,6 @@ def test_parse_align(self):
'segment': '000007',
'start_time': dt.datetime(2015, 6, 5, 17, 0)})
-
def test_globify_simple(self):
# Run
result = globify('{a}_{b}.end', {'a': 'a', 'b': 'b'})
@@ -236,21 +197,6 @@ def test_globify_datetime_nosub(self):
# Assert
self.assertEqual(result, 'hrpt_noaa??_????????_????_*.l1b')
- def test_collect_keyvals_from_parsedef(self):
- # Run
- keys, vals = _collect_keyvals_from_parsedef(['/somedir/',
- {'directory': None},
- '/hrpt_',
- {'platform': '4s'},
- {'platnum': '2s'}, '_',
- {'time': '%Y%m%d_%H%M'},
- '_', {'orbit': '05d'},
- '.l1b'])
- # Assert
- self.assertEqual(keys, ['directory', 'platform',
- 'platnum', 'time', 'orbit'])
- self.assertEqual(vals, [None, '4s', '2s', '%Y%m%d_%H%M', '05d'])
-
def test_validate(self):
# These cases are True
self.assertTrue(
@@ -285,6 +231,35 @@ def test_is_one2one(self):
self.assertFalse(is_one2one(
"/somedir/{directory}/somedata_{platform:4s}_{time:%Y%d%m-%H%M}_{orbit:d}.l1b"))
+ def test_compose(self):
+ """Test the compose method's custom conversion options."""
+ from trollsift import compose
+ key_vals = {'a': 'this Is A-Test b_test c test'}
+
+ new_str = compose("{a!c}", key_vals)
+ self.assertEqual(new_str, 'This is a-test b_test c test')
+ new_str = compose("{a!h}", key_vals)
+ self.assertEqual(new_str, 'thisisatestbtestctest')
+ new_str = compose("{a!H}", key_vals)
+ self.assertEqual(new_str, 'THISISATESTBTESTCTEST')
+ new_str = compose("{a!l}", key_vals)
+ self.assertEqual(new_str, 'this is a-test b_test c test')
+ new_str = compose("{a!R}", key_vals)
+ self.assertEqual(new_str, 'thisIsATestbtestctest')
+ new_str = compose("{a!t}", key_vals)
+ self.assertEqual(new_str, 'This Is A-Test B_Test C Test')
+ new_str = compose("{a!u}", key_vals)
+ self.assertEqual(new_str, 'THIS IS A-TEST B_TEST C TEST')
+ # builtin repr
+ new_str = compose("{a!r}", key_vals)
+ self.assertEqual(new_str, '\'this Is A-Test b_test c test\'')
+ # no formatting
+ new_str = compose("{a}", key_vals)
+ self.assertEqual(new_str, 'this Is A-Test b_test c test')
+ # bad formatter
+ self.assertRaises(ValueError, compose, "{a!X}", key_vals)
+ self.assertEqual(new_str, 'this Is A-Test b_test c test')
+
def assertDictEqual(self, a, b):
for key in a:
self.assertTrue(key in b)