diff --git a/doc/source/index.rst b/doc/source/index.rst index 21d53c2..d05aadb 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -4,8 +4,8 @@ contain the root `toctree` directive. .. meta:: - description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names - keywords: Python, pytroll, format, parse, filter, string + :description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names + :keywords: Python, pytroll, format, parse, filter, string Welcome to the trollsift documentation! ========================================= @@ -26,7 +26,6 @@ Contents installation usage - examples api Indices and tables diff --git a/doc/source/usage.rst b/doc/source/usage.rst index acfc244..ad426af 100644 --- a/doc/source/usage.rst +++ b/doc/source/usage.rst @@ -1,9 +1,3 @@ - -.. .. sectnum:: -.. :depth: 4 -.. :start: 2 -.. :suffix: . - .. _string-format: https://docs.python.org/2/library/string.html#format-string-syntax Usage @@ -44,6 +38,14 @@ a new file name, >>> p.compose(data) '/somedir/otherdir/hrpt_noaa16_20120101_0101_69022.l1b' +In addition to python's builtin string formatting functionality trollsift also +provides extra conversion options such as making all characters lowercase: + + >>> my_parser = Parser("{platform_name:l}") + >>> my_parser.compose({'platform_name': 'NPP'}) + 'npp' + +For all of the options see :class:`~trollsift.parser.StringFormatter`. standalone parse and compose +++++++++++++++++++++++++++++++++++++++++ diff --git a/setup.cfg b/setup.cfg index dd67fc4..accf151 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,4 +6,7 @@ requires=python python-six release=1 [bdist_wheel] -universal=1 \ No newline at end of file +universal=1 + +[flake8] +max-line-length = 120 diff --git a/trollsift/parser.py b/trollsift/parser.py index 7d04fdf..00ac585 100644 --- a/trollsift/parser.py +++ b/trollsift/parser.py @@ -22,8 +22,8 @@ # along with this program. If not, see . -'''Parser class -''' +"""Parser class +""" import re import datetime as dt @@ -33,9 +33,8 @@ class Parser(object): - - '''Parser class - ''' + """Parser class + """ def __init__(self, fmt): self.fmt = fmt @@ -55,6 +54,8 @@ def compose(self, keyvals): ''' return compose(self.fmt, keyvals) + format = compose + def globify(self, keyvals=None): '''Generate a string useable with glob.glob() from format string *fmt* and *keyvals* dictionary. @@ -88,98 +89,171 @@ def is_one2one(self): return is_one2one(self.fmt) -def _extract_parsedef(fmt): - '''Retrieve parse definition from the format string *fmt*. - ''' +class StringFormatter(string.Formatter): + """Custom string formatter class for basic strings. + + This formatter adds a few special conversions for assisting with common + trollsift situations like making a parameter lowercase or removing + hyphens. The added conversions are listed below and can be used in a + format string by prefixing them with an `!` like so: + + >>> fstr = "{!u}_{!l}" + >>> formatter = StringFormatter() + >>> formatter.format(fstr, "to_upper", "To_LowerCase") + "TO_UPPER_to_lowercase" + + - c: Make capitalized version of string (first character upper case, all lowercase after that) by executing the + parameter's `.capitalize()` method. + - h: A combination of 'R' and 'l'. + - H: A combination of 'R' and 'u'. + - l: Make all characters lowercase by executing the parameter's `.lower()` method. + - R: Remove all separators from the parameter including '-', '_', ' ', and ':'. + - t: Title case the string by executing the parameter's `.title()` method. + - u: Make all characters uppercase by executing the parameter's `.upper()` method. + """ + CONV_FUNCS = { + 'c': 'capitalize', + 'h': 'lower', + 'H': 'upper', + 'l': 'lower', + 't': 'title', + 'u': 'upper' + } + + def convert_field(self, value, conversion): + """Apply conversions mentioned above.""" + func = self.CONV_FUNCS.get(conversion) + if func is not None: + value = getattr(value, func)() + elif conversion not in ['R']: + # default conversion ('r', 's') + return super(StringFormatter, self).convert_field(value, conversion) + + if conversion in ['h', 'H', 'R']: + value = value.replace('-', '').replace('_', '').replace(':', '').replace(' ', '') + return value + + +formatter = StringFormatter() + + +def _extract_parsedef(fmt): + """Retrieve parse definition from the format string `fmt`.""" parsedef = [] convdef = {} - - for part1 in fmt.split('}'): - part2 = part1.split('{', 1) - if part2[0] is not '': - parsedef.append(part2[0]) - if len(part2) > 1 and part2[1] is not '': - if ':' in part2[1]: - part2 = part2[1].split(':', 1) - parsedef.append({part2[0]: part2[1]}) - convdef[part2[0]] = part2[1] - else: - reg = re.search('(\{' + part2[1] + '\})', fmt) - if reg: - parsedef.append({part2[1]: None}) - else: - parsedef.append(part2[1]) + for literal_text, field_name, format_spec, conversion in formatter.parse(fmt): + if literal_text: + parsedef.append(literal_text) + if field_name is None: + continue + parsedef.append({field_name: format_spec or None}) + convdef[field_name] = format_spec return parsedef, convdef -def _extract_values(parsedef, stri): - """ - Given a parse definition *parsedef* match and extract key value - pairs from input string *stri*. - """ - if len(parsedef) == 0: - return {} - - match = parsedef.pop(0) - # we allow ourselves typechecking - # in case of this subroutine - if isinstance(match, (str, six.text_type)): - # match - if stri.find(match) == 0: - stri_next = stri[len(match):] - return _extract_values(parsedef, stri_next) - else: - raise ValueError - else: - key = list(match)[0] - fmt = match[key] - fmt_list = ["%f", "%a", "%A", "%b", "%B", "%z", "%Z", - "%p", "%c", "%x", "%X"] - if fmt is None or fmt.isalpha() or any([x in fmt for x in fmt_list]): - if len(parsedef) != 0: - next_match = parsedef[0] - # next match is string ... - if isinstance(next_match, (str, six.text_type)): - try: - count = fmt.count(next_match) - except AttributeError: - count = 0 - pos = -1 - for dummy in range(count + 1): - pos = stri.find(next_match, pos + 1) - value = stri[0:pos] - # next match is string key ... - else: - # pick out segment until string match, - # and parse in reverse, - rev_parsedef = [] - x = '' - for x in parsedef: - if isinstance(x, (str, six.text_type)): - break - rev_parsedef.insert(0, x) - rev_parsedef = rev_parsedef + [match] - if isinstance(x, (str, six.text_type)): - rev_stri = stri[:stri.find(x)][::-1] - else: - rev_stri = stri[::-1] - # parse reversely and pick out value - value = _extract_values(rev_parsedef, rev_stri)[key][::-1] - else: - value = stri - stri_next = stri[len(value):] - keyvals = _extract_values(parsedef, stri_next) - keyvals[key] = value - return keyvals +# taken from https://docs.python.org/3/library/re.html#simulating-scanf +spec_regexes = { + 'c': r'.', + 'd': r'[-+]?\d', + 'f': r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?', + 'i': r'[-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+)', + 'o': r'[-+]?[0-7]', + 's': r'\S', + 'x': r'[-+]?(0[xX])?[\dA-Fa-f]', +} +spec_regexes['e'] = spec_regexes['f'] +spec_regexes['E'] = spec_regexes['f'] +spec_regexes['g'] = spec_regexes['f'] +spec_regexes['X'] = spec_regexes['x'] +allow_multiple = ['c', 'd', 'o', 's', 'x', 'X'] + + +class RegexFormatter(string.Formatter): + + # special string to mark a parameter not being specified + UNPROVIDED_VALUE = '' + ESCAPE_CHARACTERS = [x for x in string.punctuation if x not in '\\%'] + ESCAPE_SETS = [(c, '\{}'.format(c)) for c in ESCAPE_CHARACTERS] + + def _escape(self, s): + """Escape bad characters for regular expressions. + + Similar to `re.escape` but allows '%' to pass through. + + """ + for ch, r_ch in self.ESCAPE_SETS: + s = s.replace(ch, r_ch) + return s + + def parse(self, format_string): + parse_ret = super(RegexFormatter, self).parse(format_string) + for literal_text, field_name, format_spec, conversion in parse_ret: + # the parent class will call parse multiple times moving + # 'format_spec' to 'literal_text'. We only escape 'literal_text' + # so we don't escape things twice. + literal_text = self._escape(literal_text) + yield literal_text, field_name, format_spec, conversion + + def get_value(self, key, args, kwargs): + try: + return super(RegexFormatter, self).get_value(key, args, kwargs) + except (IndexError, KeyError): + return key, self.UNPROVIDED_VALUE + + def _regex_datetime(self, format_spec): + replace_str = format_spec + for fmt_key, fmt_val in DT_FMT.items(): + if fmt_key == '%%': + # special case + replace_str.replace('%%', '%') + continue + count = fmt_val.count('?') + # either a series of numbers or letters/numbers + regex = r'\d{{{:d}}}'.format(count) if count else r'[^ \t\n\r\f\v\-_:]+' + replace_str = replace_str.replace(fmt_key, regex) + return replace_str + + def regex_field(self, value, format_spec): + if value != self.UNPROVIDED_VALUE: + return super(RegexFormatter, self).format_field(value, format_spec) + + # Replace format spec with glob patterns (*, ?, etc) + if not format_spec: + return r'.*' + if '%' in format_spec: + return self._regex_datetime(format_spec) + char_type = spec_regexes[format_spec[-1]] + num_match = re.search('[0-9]+', format_spec) + num = 0 if num_match is None else int(num_match.group(0)) + has_multiple = format_spec[-1] in allow_multiple + if num == 0 and has_multiple: + # don't know the count + return r'{}*'.format(char_type) + elif num == 0: + # floats and other types can't have multiple + return char_type + elif format_spec[-1] in allow_multiple: + return r'{}{{{:d}}}'.format(char_type, num) else: - # find number of chars - num = _get_number_from_fmt(fmt) - value = stri[0:num] - stri_next = stri[len(value):] - keyvals = _extract_values(parsedef, stri_next) - keyvals[key] = value - return keyvals + return r'{}'.format(char_type) + + def format_field(self, value, format_spec): + if not isinstance(value, tuple) or value[1] != self.UNPROVIDED_VALUE: + return super(RegexFormatter, self).format_field(value, format_spec) + field_name, value = value + new_value = self.regex_field(value, format_spec) + return '(?P<{}>{})'.format(field_name, new_value) + + def extract_values(self, fmt, stri): + regex = self.format(fmt) + match = re.match(regex, stri) + if match is None: + raise ValueError("String does not match pattern.") + return match.groupdict() + + +regex_formatter = RegexFormatter() def _get_number_from_fmt(fmt): @@ -197,10 +271,7 @@ def _get_number_from_fmt(fmt): def _convert(convdef, stri): - '''Convert the string *stri* to the given conversion definition - *convdef*. - ''' - + """Convert the string *stri* to the given conversion definition *convdef*.""" if '%' in convdef: result = dt.datetime.strptime(stri, convdef) elif 'd' in convdef or 's' in convdef: @@ -234,27 +305,13 @@ def _convert(convdef, stri): return result -def _collect_keyvals_from_parsedef(parsedef): - '''Collect dict keys and values from parsedef. - ''' - - keys, vals = [], [] - - for itm in parsedef: - if isinstance(itm, dict): - keys.append(list(itm.keys())[0]) - vals.append(list(itm.values())[0]) - - return keys, vals - - def parse(fmt, stri): '''Parse keys and corresponding values from *stri* using format described in *fmt* string. ''' parsedef, convdef = _extract_parsedef(fmt) - keyvals = _extract_values(parsedef, stri) + keyvals = regex_formatter.extract_values(fmt, stri) for key in convdef.keys(): keyvals[key] = _convert(convdef[key], keyvals[key]) @@ -262,11 +319,8 @@ def parse(fmt, stri): def compose(fmt, keyvals): - '''Return string composed according to *fmt* string and filled - with values with the corresponding keys in *keyvals* dictionary. - ''' - - return fmt.format(**keyvals) + """Convert parameters in `keyvals` to a string based on `fmt` string.""" + return formatter.format(fmt, **keyvals) DT_FMT = { @@ -297,70 +351,54 @@ def compose(fmt, keyvals): } +class GlobifyFormatter(string.Formatter): + + # special string to mark a parameter not being specified + UNPROVIDED_VALUE = '' + + def get_value(self, key, args, kwargs): + try: + return super(GlobifyFormatter, self).get_value(key, args, kwargs) + except (IndexError, KeyError): + # assumes that + return self.UNPROVIDED_VALUE + + def format_field(self, value, format_spec): + if not isinstance(value, (list, tuple)) and value != self.UNPROVIDED_VALUE: + return super(GlobifyFormatter, self).format_field(value, format_spec) + elif value != self.UNPROVIDED_VALUE: + # partial provided date/time fields + # specified with a tuple/list of 2 elements + # (value, partial format string) + value, dt_fmt = value + for fmt_letter in dt_fmt: + fmt = '%' + fmt_letter + format_spec = format_spec.replace(fmt, value.strftime(fmt)) + + # Replace format spec with glob patterns (*, ?, etc) + if not format_spec: + return '*' + if '%' in format_spec: + replace_str = format_spec + for fmt_key, fmt_val in DT_FMT.items(): + replace_str = replace_str.replace(fmt_key, fmt_val) + return replace_str + if not re.search('[0-9]+', format_spec): + # non-integer type + return '*' + return '?' * _get_number_from_fmt(format_spec) + + +globify_formatter = GlobifyFormatter() + + def globify(fmt, keyvals=None): - '''Generate a string useable with glob.glob() from format string + """Generate a string usable with glob.glob() from format string *fmt* and *keyvals* dictionary. - ''' - + """ if keyvals is None: keyvals = {} - else: - keyvals = keyvals.copy() - parsedef, _ = _extract_parsedef(fmt) - all_keys, all_vals = _collect_keyvals_from_parsedef(parsedef) - replace_str = '' - for key, val in zip(all_keys, all_vals): - if key not in list(keyvals.keys()): - # replace depending on the format defined in all_vals[key] - if val is None: - replace_str = '*' - elif '%' in val: - # calculate the length of datetime - replace_str = val - for fmt_key, fmt_val in DT_FMT.items(): - replace_str = replace_str.replace(fmt_key, fmt_val) - fmt = fmt.replace(key + ':' + val, key) - elif not re.search('[0-9]+', val): - if 'd' in val: - val2 = val.replace('d', 's') - fmt = fmt.replace(key + ':' + val, key + ':' + val2) - replace_str = '*' - else: - if 'd' in val: - val2 = val.lstrip('0').replace('d', 's') - fmt = fmt.replace(key + ':' + val, key + ':' + val2) - num = _get_number_from_fmt(val) - replace_str = num * '?' - keyvals[key] = replace_str - else: - # Check partial datetime usage - if isinstance(keyvals[key], list) or \ - isinstance(keyvals[key], tuple): - conv_chars = keyvals[key][1] - else: - continue - - val2 = list(val) - prev = 0 - datet = keyvals[key][0] # assume datetime - while True: - idx = val.find('%', prev) - # Stop if no finds - if idx == -1: - break - if val[idx + 1] not in conv_chars: - tmp = '{0:%' + val[idx + 1] + '}' - # calculate how many '?' are needed - num = len(tmp.format(datet)) - val2[idx:idx + num] = num * '?' - prev = idx + 1 - val2 = ''.join(val2) - fmt = fmt.replace(key + ':' + val, key + ':' + val2) - keyvals[key] = keyvals[key][0] - - result = compose(fmt, keyvals) - - return result + return globify_formatter.format(fmt, **keyvals) def validate(fmt, stri): @@ -389,7 +427,7 @@ def is_one2one(fmt): Note: This test only applies to sensible usage of the format string. If string or numeric data is causes overflow, e.g. if composing "abcd" into {3s}, one to one correspondence will always - be broken in such cases. This off course also applies to precision + be broken in such cases. This of course also applies to precision losses when using datetime data. """ # look for some bad patterns diff --git a/trollsift/tests/integrationtests/test_parser.py b/trollsift/tests/integrationtests/test_parser.py index ded15d1..6475a0e 100644 --- a/trollsift/tests/integrationtests/test_parser.py +++ b/trollsift/tests/integrationtests/test_parser.py @@ -51,10 +51,30 @@ def assertItemsEqual(self, a, b): self.assertEqual(len(a), len(b)) +class TestParserVIIRSSDR(unittest.TestCase): + + def setUp(self): + self.fmt = 'SVI01_{platform_shortname}_d{start_time:%Y%m%d_t%H%M%S%f}_e{end_time:%H%M%S%f}_b{orbit:5d}_c{creation_time:%Y%m%d%H%M%S%f}_{source}.h5' + self.string = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5' + self.data = {'platform_shortname': 'npp', + 'start_time': dt.datetime(2012, 2, 25, 18, 1, 24, 500000), 'orbit': 1708, + 'end_time': dt.datetime(1900, 1, 1, 18, 2, 48, 700000), + 'source': 'noaa_ops', + 'creation_time': dt.datetime(2012, 2, 26, 0, 21, 30, 255476)} + self.p = Parser(self.fmt) + + def test_parse(self): + # Run + result = self.p.parse(self.string) + # Assert + self.assertDictEqual(result, self.data) + + def suite(): """The suite for test_parser """ loader = unittest.TestLoader() mysuite = unittest.TestSuite() mysuite.addTest(loader.loadTestsFromTestCase(TestParser)) + mysuite.addTest(loader.loadTestsFromTestCase(TestParserVIIRSSDR)) return mysuite diff --git a/trollsift/tests/unittests/test_parser.py b/trollsift/tests/unittests/test_parser.py index 04cbdc8..d0bab35 100644 --- a/trollsift/tests/unittests/test_parser.py +++ b/trollsift/tests/unittests/test_parser.py @@ -1,8 +1,8 @@ import unittest import datetime as dt -from trollsift.parser import _extract_parsedef, _extract_values -from trollsift.parser import _convert, _collect_keyvals_from_parsedef +from trollsift.parser import _extract_parsedef, regex_formatter +from trollsift.parser import _convert from trollsift.parser import parse, globify, validate, is_one2one @@ -28,122 +28,73 @@ def test_extract_parsedef(self): '_', {'orbit': '05d'}, '.l1b']) def test_extract_values(self): - # Run - parsedef = ['/somedir/', {'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': '2s'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': 'd'}, '.l1b'] - result = _extract_values(parsedef, self.string) - # Assert + fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}.l1b" + result = regex_formatter.extract_values(fmt, self.string) self.assertDictEqual(result, {'directory': 'otherdir', 'platform': 'noaa', 'platnum': '16', 'time': '20140210_1004', 'orbit': '69022'}) def test_extract_values_end(self): - # Run - parsedef = ['/somedir/', {'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': '2s'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': 'd'}] - result = _extract_values(parsedef, self.string3) - # Assert + fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}" + result = regex_formatter.extract_values(fmt, self.string3) self.assertDictEqual(result, {'directory': 'otherdir', 'platform': 'noaa', 'platnum': '16', 'time': '20140210_1004', 'orbit': '69022'}) def test_extract_values_beginning(self): - # Run - parsedef = [{'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': '2s'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': 'd'}] - result = _extract_values(parsedef, self.string4) - # Assert - self.assertDictEqual(result, {'directory': '/somedir/otherdir', - 'platform': 'noaa', 'platnum': '16', - 'time': '20140210_1004', 'orbit': '69022'}) - - def test_extract_values_beginning(self): - # Run - parsedef = [{'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': '2s'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': 'd'}] - result = _extract_values(parsedef, self.string4) - # Assert + fmt = "{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}" + result = regex_formatter.extract_values(fmt, self.string4) self.assertDictEqual(result, {'directory': '/somedir/otherdir', 'platform': 'noaa', 'platnum': '16', 'time': '20140210_1004', 'orbit': '69022'}) def test_extract_values_s4spair(self): - # Run - parsedef = [{'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': 's'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': 'd'}] - result = _extract_values(parsedef, self.string4) - # Assert + fmt = "{directory}/hrpt_{platform:4s}{platnum:s}_{time:%Y%m%d_%H%M}_{orbit:d}" + result = regex_formatter.extract_values(fmt, self.string4) self.assertDictEqual(result, {'directory': '/somedir/otherdir', 'platform': 'noaa', 'platnum': '16', 'time': '20140210_1004', 'orbit': '69022'}) def test_extract_values_ss2pair(self): - # Run - parsedef = [{'directory': None}, '/hrpt_', - {'platform': 's'}, {'platnum': 's2'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': 'd'}] - result = _extract_values(parsedef, self.string4) - # Assert + fmt = "{directory}/hrpt_{platform:s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:d}" + result = regex_formatter.extract_values(fmt, self.string4) self.assertDictEqual(result, {'directory': '/somedir/otherdir', 'platform': 'noaa', 'platnum': '16', 'time': '20140210_1004', 'orbit': '69022'}) def test_extract_values_ss2pair_end(self): - # Run - parsedef = [{'directory': None}, '/hrpt_', - {'platform': 's'}, {'platnum': 's2'}] - result = _extract_values(parsedef, "/somedir/otherdir/hrpt_noaa16") - # Assert + fmt = "{directory}/hrpt_{platform:s}{platnum:2s}" + result = regex_formatter.extract_values(fmt, "/somedir/otherdir/hrpt_noaa16") self.assertDictEqual(result, {'directory': '/somedir/otherdir', 'platform': 'noaa', 'platnum': '16'}) def test_extract_values_sdatetimepair_end(self): - # Run - parsedef = [{'directory': None}, '/hrpt_', - {'platform': 's'}, {'date': '%Y%m%d'}] - result = _extract_values( - parsedef, "/somedir/otherdir/hrpt_noaa20140212") - # Assert + fmt = "{directory}/hrpt_{platform:s}{date:%Y%m%d}" + result = regex_formatter.extract_values(fmt, "/somedir/otherdir/hrpt_noaa20140212") self.assertDictEqual(result, {'directory': '/somedir/otherdir', 'platform': 'noaa', 'date': '20140212'}) def test_extract_values_everything(self): - # Run - parsedef = [{'everything': None}] - result = _extract_values(parsedef, self.string) - # Assert + fmt = "{everything}" + result = regex_formatter.extract_values(fmt, self.string) self.assertDictEqual( result, {'everything': '/somedir/otherdir/hrpt_noaa16_20140210_1004_69022.l1b'}) def test_extract_values_padding2(self): - # Run - parsedef = ['/somedir/', {'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': '2s'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', - {'orbit': '0>5d'}, '.l1b'] - result = _extract_values(parsedef, self.string2) + fmt = "/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:0>5d}.l1b" + # parsedef = ['/somedir/', {'directory': None}, '/hrpt_', + # {'platform': '4s'}, {'platnum': '2s'}, + # '_', {'time': '%Y%m%d_%H%M'}, '_', + # {'orbit': '0>5d'}, '.l1b'] + result = regex_formatter.extract_values(fmt, self.string2) # Assert self.assertDictEqual(result, {'directory': 'otherdir', 'platform': 'noaa', 'platnum': '16', 'time': '20140210_1004', 'orbit': '00022'}) def test_extract_values_fails(self): - # Run - parsedef = ['/somedir/', {'directory': None}, '/hrpt_', - {'platform': '4s'}, {'platnum': '2s'}, - '_', {'time': '%Y%m%d_%H%M'}, '_', {'orbit': '4d'}, '.l1b'] - self.assertRaises(ValueError, _extract_values, parsedef, self.string) + fmt = '/somedir/{directory}/hrpt_{platform:4s}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:4d}.l1b' + self.assertRaises(ValueError, regex_formatter.extract_values, fmt, self.string) def test_convert_digits(self): self.assertEqual(_convert('d', '69022'), 69022) @@ -164,6 +115,17 @@ def test_parse(self): 'time': dt.datetime(2014, 2, 12, 14, 12), 'orbit': 12345}) + def test_parse_wildcards(self): + # Run + result = parse( + "hrpt_{platform}{platnum:2s}_{time:%Y%m%d_%H%M}_{orbit:05d}{ext}", + "hrpt_noaa19_20140212_1412_12345.l1b") + # Assert + self.assertDictEqual(result, {'platform': 'noaa', 'platnum': '19', + 'time': dt.datetime(2014, 2, 12, 14, 12), + 'orbit': 12345, + 'ext': '.l1b'}) + def test_parse_align(self): filepattern="H-000-{hrit_format:4s}__-{platform_name:4s}________-{channel_name:_<9s}-{segment:_<9s}-{start_time:%Y%m%d%H%M}-__" result = parse(filepattern, "H-000-MSG3__-MSG3________-IR_039___-000007___-201506051700-__") @@ -173,7 +135,6 @@ def test_parse_align(self): 'segment': '000007', 'start_time': dt.datetime(2015, 6, 5, 17, 0)}) - def test_globify_simple(self): # Run result = globify('{a}_{b}.end', {'a': 'a', 'b': 'b'}) @@ -236,21 +197,6 @@ def test_globify_datetime_nosub(self): # Assert self.assertEqual(result, 'hrpt_noaa??_????????_????_*.l1b') - def test_collect_keyvals_from_parsedef(self): - # Run - keys, vals = _collect_keyvals_from_parsedef(['/somedir/', - {'directory': None}, - '/hrpt_', - {'platform': '4s'}, - {'platnum': '2s'}, '_', - {'time': '%Y%m%d_%H%M'}, - '_', {'orbit': '05d'}, - '.l1b']) - # Assert - self.assertEqual(keys, ['directory', 'platform', - 'platnum', 'time', 'orbit']) - self.assertEqual(vals, [None, '4s', '2s', '%Y%m%d_%H%M', '05d']) - def test_validate(self): # These cases are True self.assertTrue( @@ -285,6 +231,35 @@ def test_is_one2one(self): self.assertFalse(is_one2one( "/somedir/{directory}/somedata_{platform:4s}_{time:%Y%d%m-%H%M}_{orbit:d}.l1b")) + def test_compose(self): + """Test the compose method's custom conversion options.""" + from trollsift import compose + key_vals = {'a': 'this Is A-Test b_test c test'} + + new_str = compose("{a!c}", key_vals) + self.assertEqual(new_str, 'This is a-test b_test c test') + new_str = compose("{a!h}", key_vals) + self.assertEqual(new_str, 'thisisatestbtestctest') + new_str = compose("{a!H}", key_vals) + self.assertEqual(new_str, 'THISISATESTBTESTCTEST') + new_str = compose("{a!l}", key_vals) + self.assertEqual(new_str, 'this is a-test b_test c test') + new_str = compose("{a!R}", key_vals) + self.assertEqual(new_str, 'thisIsATestbtestctest') + new_str = compose("{a!t}", key_vals) + self.assertEqual(new_str, 'This Is A-Test B_Test C Test') + new_str = compose("{a!u}", key_vals) + self.assertEqual(new_str, 'THIS IS A-TEST B_TEST C TEST') + # builtin repr + new_str = compose("{a!r}", key_vals) + self.assertEqual(new_str, '\'this Is A-Test b_test c test\'') + # no formatting + new_str = compose("{a}", key_vals) + self.assertEqual(new_str, 'this Is A-Test b_test c test') + # bad formatter + self.assertRaises(ValueError, compose, "{a!X}", key_vals) + self.assertEqual(new_str, 'this Is A-Test b_test c test') + def assertDictEqual(self, a, b): for key in a: self.assertTrue(key in b)