Merge 34bb659 into e79a91e

pytroll · Sep 18, 2018 · 2a02a3d · 2a02a3d
2 parents e79a91e + 34bb659
commit 2a02a3d
Show file tree

Hide file tree

Showing 6 changed files with 174 additions and 105 deletions.
diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -4,8 +4,8 @@
    contain the root `toctree` directive.
 
 .. meta::
-   description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names
-   keywords: Python, pytroll, format, parse, filter, string
+   :description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names
+   :keywords: Python, pytroll, format, parse, filter, string
 
 Welcome to the trollsift documentation!
 =========================================
@@ -26,7 +26,6 @@ Contents
 
    installation
    usage
-   examples
    api
 
 Indices and tables

diff --git a/doc/source/usage.rst b/doc/source/usage.rst
@@ -1,9 +1,3 @@
-
-.. .. sectnum::
-..   :depth: 4
-..   :start: 2
-..   :suffix: .
-
 .. _string-format: https://docs.python.org/2/library/string.html#format-string-syntax
 
 Usage
@@ -44,6 +38,14 @@ a new file name,
   >>> p.compose(data)
   '/somedir/otherdir/hrpt_noaa16_20120101_0101_69022.l1b'
 
+In addition to python's builtin string formatting functionality trollsift also
+provides extra conversion options such as making all characters lowercase:
+
+  >>> my_parser = Parser("{platform_name:l}")
+  >>> my_parser.compose({'platform_name': 'NPP'})
+  'npp'
+
+For all of the options see :class:`~trollsift.parser.StringFormatter`.
 
 standalone parse and compose
 +++++++++++++++++++++++++++++++++++++++++

diff --git a/setup.cfg b/setup.cfg
@@ -6,4 +6,7 @@ requires=python python-six
 release=1
 
 [bdist_wheel]
-universal=1
+universal=1
+
+[flake8]
+max-line-length = 120
diff --git a/trollsift/parser.py b/trollsift/parser.py
@@ -22,8 +22,8 @@
 # along with this program. If not, see <http://www.gnu.org/licenses/>.
 
 
-'''Parser class
-'''
+"""Parser class
+"""
 
 import re
 import datetime as dt
@@ -33,9 +33,8 @@
 
 
 class Parser(object):
-
-    '''Parser class
-    '''
+    """Parser class
+    """
 
     def __init__(self, fmt):
         self.fmt = fmt
@@ -55,6 +54,8 @@ def compose(self, keyvals):
         '''
         return compose(self.fmt, keyvals)
 
+    format = compose
+
     def globify(self, keyvals=None):
         '''Generate a  string useable with glob.glob()  from format string
         *fmt* and *keyvals* dictionary.
@@ -88,28 +89,66 @@ def is_one2one(self):
         return is_one2one(self.fmt)
 
 
-def _extract_parsedef(fmt):
-    '''Retrieve parse definition from the format string *fmt*.
-    '''
+class StringFormatter(string.Formatter):
+    """Custom string formatter class for basic strings.
+
+    This formatter adds a few special conversions for assisting with common
+    trollsift situations like making a parameter lowercase or removing
+    hyphens. The added conversions are listed below and can be used in a
+    format string by prefixing them with an `!` like so:
+
+    >>> fstr = "{!u}_{!l}"
+    >>> formatter = StringFormatter()
+    >>> formatter.format(fstr, "to_upper", "To_LowerCase")
+    "TO_UPPER_to_lowercase"
+
+    - c: Make capitalized version of string (first character upper case, all lowercase after that) by executing the
+      parameter's `.capitalize()` method.
+    - h: A combination of 'R' and 'l'.
+    - H: A combination of 'R' and 'u'.
+    - l: Make all characters lowercase by executing the parameter's `.lower()` method.
+    - R: Remove all separators from the parameter including '-', '_', ' ', and ':'.
+    - t: Title case the string by executing the parameter's `.title()` method.
+    - u: Make all characters uppercase by executing the parameter's `.upper()` method.
+
+    """
+    CONV_FUNCS = {
+        'c': 'capitalize',
+        'h': 'lower',
+        'H': 'upper',
+        'l': 'lower',
+        't': 'title',
+        'u': 'upper'
+    }
+
+    def convert_field(self, value, conversion):
+        """Apply conversions mentioned above."""
+        func = self.CONV_FUNCS.get(conversion)
+        if func is not None:
+            value = getattr(value, func)()
+        elif conversion not in ['R']:
+            # default conversion ('r', 's')
+            return super(StringFormatter, self).convert_field(value, conversion)
+
+        if conversion in ['h', 'H', 'R']:
+            value = value.replace('-', '').replace('_', '').replace(':', '').replace(' ', '')
+        return value
+
+
+formatter = StringFormatter()
 
+
+def _extract_parsedef(fmt):
+    """Retrieve parse definition from the format string `fmt`."""
     parsedef = []
     convdef = {}
-
-    for part1 in fmt.split('}'):
-        part2 = part1.split('{', 1)
-        if part2[0] is not '':
-            parsedef.append(part2[0])
-        if len(part2) > 1 and part2[1] is not '':
-            if ':' in part2[1]:
-                part2 = part2[1].split(':', 1)
-                parsedef.append({part2[0]: part2[1]})
-                convdef[part2[0]] = part2[1]
-            else:
-                reg = re.search('(\{' + part2[1] + '\})', fmt)
-                if reg:
-                    parsedef.append({part2[1]: None})
-                else:
-                    parsedef.append(part2[1])
+    for literal_text, field_name, format_spec, conversion in formatter.parse(fmt):
+        if literal_text:
+            parsedef.append(literal_text)
+        if field_name is None:
+            continue
+        parsedef.append({field_name: format_spec or None})
+        convdef[field_name] = format_spec
     return parsedef, convdef
 
 
@@ -197,10 +236,7 @@ def _get_number_from_fmt(fmt):
 
 
 def _convert(convdef, stri):
-    '''Convert the string *stri* to the given conversion definition
-    *convdef*.
-    '''
-
+    """Convert the string *stri* to the given conversion definition *convdef*."""
     if '%' in convdef:
         result = dt.datetime.strptime(stri, convdef)
     elif 'd' in convdef or 's' in convdef:
@@ -262,11 +298,8 @@ def parse(fmt, stri):
 
 
 def compose(fmt, keyvals):
-    '''Return string composed according to *fmt* string and filled
-    with values with the corresponding keys in *keyvals* dictionary.
-    '''
-
-    return fmt.format(**keyvals)
+    """Convert parameters in `keyvals` to a string based on `fmt` string."""
+    return formatter.format(fmt, **keyvals)
 
 
 DT_FMT = {
@@ -297,70 +330,54 @@ def compose(fmt, keyvals):
 }
 
 
+class GlobifyFormatter(string.Formatter):
+
+    # special string to mark a parameter not being specified
+    UNPROVIDED_VALUE = '<trollsift unprovided value>'
+
+    def get_value(self, key, args, kwargs):
+        try:
+            return super(GlobifyFormatter, self).get_value(key, args, kwargs)
+        except (IndexError, KeyError):
+            # assumes that
+            return self.UNPROVIDED_VALUE
+
+    def format_field(self, value, format_spec):
+        if not isinstance(value, (list, tuple)) and value != self.UNPROVIDED_VALUE:
+            return super(GlobifyFormatter, self).format_field(value, format_spec)
+        elif value != self.UNPROVIDED_VALUE:
+            # partial provided date/time fields
+            # specified with a tuple/list of 2 elements
+            # (value, partial format string)
+            value, dt_fmt = value
+            for fmt_letter in dt_fmt:
+                fmt = '%' + fmt_letter
+                format_spec = format_spec.replace(fmt, value.strftime(fmt))
+
+        # Replace format spec with glob patterns (*, ?, etc)
+        if not format_spec:
+            return '*'
+        if '%' in format_spec:
+            replace_str = format_spec
+            for fmt_key, fmt_val in DT_FMT.items():
+                replace_str = replace_str.replace(fmt_key, fmt_val)
+            return replace_str
+        if not re.search('[0-9]+', format_spec):
+            # non-integer type
+            return '*'
+        return '?' * _get_number_from_fmt(format_spec)
+
+
+globify_formatter = GlobifyFormatter()
+
+
 def globify(fmt, keyvals=None):
-    '''Generate a string useable with glob.glob() from format string
+    """Generate a string usable with glob.glob() from format string
     *fmt* and *keyvals* dictionary.
-    '''
-
+    """
     if keyvals is None:
         keyvals = {}
-    else:
-        keyvals = keyvals.copy()
-    parsedef, _ = _extract_parsedef(fmt)
-    all_keys, all_vals = _collect_keyvals_from_parsedef(parsedef)
-    replace_str = ''
-    for key, val in zip(all_keys, all_vals):
-        if key not in list(keyvals.keys()):
-            # replace depending on the format defined in all_vals[key]
-            if val is None:
-                replace_str = '*'
-            elif '%' in val:
-                # calculate the length of datetime
-                replace_str = val
-                for fmt_key, fmt_val in DT_FMT.items():
-                    replace_str = replace_str.replace(fmt_key, fmt_val)
-                fmt = fmt.replace(key + ':' + val, key)
-            elif not re.search('[0-9]+', val):
-                if 'd' in val:
-                    val2 = val.replace('d', 's')
-                    fmt = fmt.replace(key + ':' + val, key + ':' + val2)
-                replace_str = '*'
-            else:
-                if 'd' in val:
-                    val2 = val.lstrip('0').replace('d', 's')
-                    fmt = fmt.replace(key + ':' + val, key + ':' + val2)
-                num = _get_number_from_fmt(val)
-                replace_str = num * '?'
-            keyvals[key] = replace_str
-        else:
-            # Check partial datetime usage
-            if isinstance(keyvals[key], list) or \
-                    isinstance(keyvals[key], tuple):
-                conv_chars = keyvals[key][1]
-            else:
-                continue
-
-            val2 = list(val)
-            prev = 0
-            datet = keyvals[key][0]  # assume datetime
-            while True:
-                idx = val.find('%', prev)
-                # Stop if no finds
-                if idx == -1:
-                    break
-                if val[idx + 1] not in conv_chars:
-                    tmp = '{0:%' + val[idx + 1] + '}'
-                    # calculate how many '?' are needed
-                    num = len(tmp.format(datet))
-                    val2[idx:idx + num] = num * '?'
-                prev = idx + 1
-            val2 = ''.join(val2)
-            fmt = fmt.replace(key + ':' + val, key + ':' + val2)
-            keyvals[key] = keyvals[key][0]
-
-    result = compose(fmt, keyvals)
-
-    return result
+    return globify_formatter.format(fmt, **keyvals)
 
 
 def validate(fmt, stri):
@@ -389,7 +406,7 @@ def is_one2one(fmt):
     Note: This test only applies to sensible usage of the format string.
     If string or numeric data is causes overflow, e.g. 
     if composing "abcd" into {3s}, one to one correspondence will always 
-    be broken in such cases. This off course also applies to precision 
+    be broken in such cases. This of course also applies to precision
     losses when using  datetime data.
     """
     # look for some bad patterns

diff --git a/trollsift/tests/integrationtests/test_parser.py b/trollsift/tests/integrationtests/test_parser.py
@@ -51,10 +51,30 @@ def assertItemsEqual(self, a, b):
         self.assertEqual(len(a), len(b))
 
 
+class TestParserVIIRSSDR(unittest.TestCase):
+
+    def setUp(self):
+        self.fmt = 'SVI01_{platform_shortname}_d{start_time:%Y%m%d_t%H%M%S%f}_e{end_time:%H%M%S%f}_b{orbit:5d}_c{creation_time:%Y%m%d%H%M%S%f}_{source}.h5'
+        self.string = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
+        self.data = {'platform_shortname': 'npp',
+                     'start_time': dt.datetime(2012, 2, 25, 18, 1, 24, 500000), 'orbit': 1708,
+                     'end_time': dt.datetime(1900, 1, 1, 18, 2, 48, 700000),
+                     'source': 'noaa_ops',
+                     'creation_time': dt.datetime(2012, 2, 26, 0, 21, 30, 255476)}
+        self.p = Parser(self.fmt)
+
+    def test_parse(self):
+        # Run
+        result = self.p.parse(self.string)
+        # Assert
+        self.assertDictEqual(result, self.data)
+
+
 def suite():
     """The suite for test_parser
     """
     loader = unittest.TestLoader()
     mysuite = unittest.TestSuite()
     mysuite.addTest(loader.loadTestsFromTestCase(TestParser))
+    mysuite.addTest(loader.loadTestsFromTestCase(TestParserVIIRSSDR))
     return mysuite
diff --git a/trollsift/tests/unittests/test_parser.py b/trollsift/tests/unittests/test_parser.py
@@ -173,7 +173,6 @@ def test_parse_align(self):
                                       'segment': '000007',
                                       'start_time': dt.datetime(2015, 6, 5, 17, 0)})
 
-
     def test_globify_simple(self):
         # Run
         result = globify('{a}_{b}.end', {'a': 'a', 'b': 'b'})
@@ -285,6 +284,35 @@ def test_is_one2one(self):
         self.assertFalse(is_one2one(
             "/somedir/{directory}/somedata_{platform:4s}_{time:%Y%d%m-%H%M}_{orbit:d}.l1b"))
 
+    def test_compose(self):
+        """Test the compose method's custom conversion options."""
+        from trollsift import compose
+        key_vals = {'a': 'this Is A-Test b_test c test'}
+
+        new_str = compose("{a!c}", key_vals)
+        self.assertEqual(new_str, 'This is a-test b_test c test')
+        new_str = compose("{a!h}", key_vals)
+        self.assertEqual(new_str, 'thisisatestbtestctest')
+        new_str = compose("{a!H}", key_vals)
+        self.assertEqual(new_str, 'THISISATESTBTESTCTEST')
+        new_str = compose("{a!l}", key_vals)
+        self.assertEqual(new_str, 'this is a-test b_test c test')
+        new_str = compose("{a!R}", key_vals)
+        self.assertEqual(new_str, 'thisIsATestbtestctest')
+        new_str = compose("{a!t}", key_vals)
+        self.assertEqual(new_str, 'This Is A-Test B_Test C Test')
+        new_str = compose("{a!u}", key_vals)
+        self.assertEqual(new_str, 'THIS IS A-TEST B_TEST C TEST')
+        # builtin repr
+        new_str = compose("{a!r}", key_vals)
+        self.assertEqual(new_str, '\'this Is A-Test b_test c test\'')
+        # no formatting
+        new_str = compose("{a}", key_vals)
+        self.assertEqual(new_str, 'this Is A-Test b_test c test')
+        # bad formatter
+        self.assertRaises(ValueError, compose, "{a!X}", key_vals)
+        self.assertEqual(new_str, 'this Is A-Test b_test c test')
+
     def assertDictEqual(self, a, b):
         for key in a:
             self.assertTrue(key in b)