Skip to content

Commit

Permalink
Merge 34bb659 into e79a91e
Browse files Browse the repository at this point in the history
  • Loading branch information
djhoese committed Sep 18, 2018
2 parents e79a91e + 34bb659 commit 2a02a3d
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 105 deletions.
5 changes: 2 additions & 3 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
contain the root `toctree` directive.
.. meta::
description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names
keywords: Python, pytroll, format, parse, filter, string
:description: Trollsift project, modules for formatting, parsing and filtering satellite granule file names
:keywords: Python, pytroll, format, parse, filter, string

Welcome to the trollsift documentation!
=========================================
Expand All @@ -26,7 +26,6 @@ Contents

installation
usage
examples
api

Indices and tables
Expand Down
14 changes: 8 additions & 6 deletions doc/source/usage.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@

.. .. sectnum::
.. :depth: 4
.. :start: 2
.. :suffix: .
.. _string-format: https://docs.python.org/2/library/string.html#format-string-syntax

Usage
Expand Down Expand Up @@ -44,6 +38,14 @@ a new file name,
>>> p.compose(data)
'/somedir/otherdir/hrpt_noaa16_20120101_0101_69022.l1b'

In addition to python's builtin string formatting functionality trollsift also
provides extra conversion options such as making all characters lowercase:

>>> my_parser = Parser("{platform_name:l}")
>>> my_parser.compose({'platform_name': 'NPP'})
'npp'

For all of the options see :class:`~trollsift.parser.StringFormatter`.

standalone parse and compose
+++++++++++++++++++++++++++++++++++++++++
Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ requires=python python-six
release=1

[bdist_wheel]
universal=1
universal=1

[flake8]
max-line-length = 120
205 changes: 111 additions & 94 deletions trollsift/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.


'''Parser class
'''
"""Parser class
"""

import re
import datetime as dt
Expand All @@ -33,9 +33,8 @@


class Parser(object):

'''Parser class
'''
"""Parser class
"""

def __init__(self, fmt):
self.fmt = fmt
Expand All @@ -55,6 +54,8 @@ def compose(self, keyvals):
'''
return compose(self.fmt, keyvals)

format = compose

def globify(self, keyvals=None):
'''Generate a string useable with glob.glob() from format string
*fmt* and *keyvals* dictionary.
Expand Down Expand Up @@ -88,28 +89,66 @@ def is_one2one(self):
return is_one2one(self.fmt)


def _extract_parsedef(fmt):
'''Retrieve parse definition from the format string *fmt*.
'''
class StringFormatter(string.Formatter):
"""Custom string formatter class for basic strings.
This formatter adds a few special conversions for assisting with common
trollsift situations like making a parameter lowercase or removing
hyphens. The added conversions are listed below and can be used in a
format string by prefixing them with an `!` like so:
>>> fstr = "{!u}_{!l}"
>>> formatter = StringFormatter()
>>> formatter.format(fstr, "to_upper", "To_LowerCase")
"TO_UPPER_to_lowercase"
- c: Make capitalized version of string (first character upper case, all lowercase after that) by executing the
parameter's `.capitalize()` method.
- h: A combination of 'R' and 'l'.
- H: A combination of 'R' and 'u'.
- l: Make all characters lowercase by executing the parameter's `.lower()` method.
- R: Remove all separators from the parameter including '-', '_', ' ', and ':'.
- t: Title case the string by executing the parameter's `.title()` method.
- u: Make all characters uppercase by executing the parameter's `.upper()` method.
"""
CONV_FUNCS = {
'c': 'capitalize',
'h': 'lower',
'H': 'upper',
'l': 'lower',
't': 'title',
'u': 'upper'
}

def convert_field(self, value, conversion):
"""Apply conversions mentioned above."""
func = self.CONV_FUNCS.get(conversion)
if func is not None:
value = getattr(value, func)()
elif conversion not in ['R']:
# default conversion ('r', 's')
return super(StringFormatter, self).convert_field(value, conversion)

if conversion in ['h', 'H', 'R']:
value = value.replace('-', '').replace('_', '').replace(':', '').replace(' ', '')
return value


formatter = StringFormatter()


def _extract_parsedef(fmt):
"""Retrieve parse definition from the format string `fmt`."""
parsedef = []
convdef = {}

for part1 in fmt.split('}'):
part2 = part1.split('{', 1)
if part2[0] is not '':
parsedef.append(part2[0])
if len(part2) > 1 and part2[1] is not '':
if ':' in part2[1]:
part2 = part2[1].split(':', 1)
parsedef.append({part2[0]: part2[1]})
convdef[part2[0]] = part2[1]
else:
reg = re.search('(\{' + part2[1] + '\})', fmt)
if reg:
parsedef.append({part2[1]: None})
else:
parsedef.append(part2[1])
for literal_text, field_name, format_spec, conversion in formatter.parse(fmt):
if literal_text:
parsedef.append(literal_text)
if field_name is None:
continue
parsedef.append({field_name: format_spec or None})
convdef[field_name] = format_spec
return parsedef, convdef


Expand Down Expand Up @@ -197,10 +236,7 @@ def _get_number_from_fmt(fmt):


def _convert(convdef, stri):
'''Convert the string *stri* to the given conversion definition
*convdef*.
'''

"""Convert the string *stri* to the given conversion definition *convdef*."""
if '%' in convdef:
result = dt.datetime.strptime(stri, convdef)
elif 'd' in convdef or 's' in convdef:
Expand Down Expand Up @@ -262,11 +298,8 @@ def parse(fmt, stri):


def compose(fmt, keyvals):
'''Return string composed according to *fmt* string and filled
with values with the corresponding keys in *keyvals* dictionary.
'''

return fmt.format(**keyvals)
"""Convert parameters in `keyvals` to a string based on `fmt` string."""
return formatter.format(fmt, **keyvals)


DT_FMT = {
Expand Down Expand Up @@ -297,70 +330,54 @@ def compose(fmt, keyvals):
}


class GlobifyFormatter(string.Formatter):

# special string to mark a parameter not being specified
UNPROVIDED_VALUE = '<trollsift unprovided value>'

def get_value(self, key, args, kwargs):
try:
return super(GlobifyFormatter, self).get_value(key, args, kwargs)
except (IndexError, KeyError):
# assumes that
return self.UNPROVIDED_VALUE

def format_field(self, value, format_spec):
if not isinstance(value, (list, tuple)) and value != self.UNPROVIDED_VALUE:
return super(GlobifyFormatter, self).format_field(value, format_spec)
elif value != self.UNPROVIDED_VALUE:
# partial provided date/time fields
# specified with a tuple/list of 2 elements
# (value, partial format string)
value, dt_fmt = value
for fmt_letter in dt_fmt:
fmt = '%' + fmt_letter
format_spec = format_spec.replace(fmt, value.strftime(fmt))

# Replace format spec with glob patterns (*, ?, etc)
if not format_spec:
return '*'
if '%' in format_spec:
replace_str = format_spec
for fmt_key, fmt_val in DT_FMT.items():
replace_str = replace_str.replace(fmt_key, fmt_val)
return replace_str
if not re.search('[0-9]+', format_spec):
# non-integer type
return '*'
return '?' * _get_number_from_fmt(format_spec)


globify_formatter = GlobifyFormatter()


def globify(fmt, keyvals=None):
'''Generate a string useable with glob.glob() from format string
"""Generate a string usable with glob.glob() from format string
*fmt* and *keyvals* dictionary.
'''

"""
if keyvals is None:
keyvals = {}
else:
keyvals = keyvals.copy()
parsedef, _ = _extract_parsedef(fmt)
all_keys, all_vals = _collect_keyvals_from_parsedef(parsedef)
replace_str = ''
for key, val in zip(all_keys, all_vals):
if key not in list(keyvals.keys()):
# replace depending on the format defined in all_vals[key]
if val is None:
replace_str = '*'
elif '%' in val:
# calculate the length of datetime
replace_str = val
for fmt_key, fmt_val in DT_FMT.items():
replace_str = replace_str.replace(fmt_key, fmt_val)
fmt = fmt.replace(key + ':' + val, key)
elif not re.search('[0-9]+', val):
if 'd' in val:
val2 = val.replace('d', 's')
fmt = fmt.replace(key + ':' + val, key + ':' + val2)
replace_str = '*'
else:
if 'd' in val:
val2 = val.lstrip('0').replace('d', 's')
fmt = fmt.replace(key + ':' + val, key + ':' + val2)
num = _get_number_from_fmt(val)
replace_str = num * '?'
keyvals[key] = replace_str
else:
# Check partial datetime usage
if isinstance(keyvals[key], list) or \
isinstance(keyvals[key], tuple):
conv_chars = keyvals[key][1]
else:
continue

val2 = list(val)
prev = 0
datet = keyvals[key][0] # assume datetime
while True:
idx = val.find('%', prev)
# Stop if no finds
if idx == -1:
break
if val[idx + 1] not in conv_chars:
tmp = '{0:%' + val[idx + 1] + '}'
# calculate how many '?' are needed
num = len(tmp.format(datet))
val2[idx:idx + num] = num * '?'
prev = idx + 1
val2 = ''.join(val2)
fmt = fmt.replace(key + ':' + val, key + ':' + val2)
keyvals[key] = keyvals[key][0]

result = compose(fmt, keyvals)

return result
return globify_formatter.format(fmt, **keyvals)


def validate(fmt, stri):
Expand Down Expand Up @@ -389,7 +406,7 @@ def is_one2one(fmt):
Note: This test only applies to sensible usage of the format string.
If string or numeric data is causes overflow, e.g.
if composing "abcd" into {3s}, one to one correspondence will always
be broken in such cases. This off course also applies to precision
be broken in such cases. This of course also applies to precision
losses when using datetime data.
"""
# look for some bad patterns
Expand Down
20 changes: 20 additions & 0 deletions trollsift/tests/integrationtests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,30 @@ def assertItemsEqual(self, a, b):
self.assertEqual(len(a), len(b))


class TestParserVIIRSSDR(unittest.TestCase):

def setUp(self):
self.fmt = 'SVI01_{platform_shortname}_d{start_time:%Y%m%d_t%H%M%S%f}_e{end_time:%H%M%S%f}_b{orbit:5d}_c{creation_time:%Y%m%d%H%M%S%f}_{source}.h5'
self.string = 'SVI01_npp_d20120225_t1801245_e1802487_b01708_c20120226002130255476_noaa_ops.h5'
self.data = {'platform_shortname': 'npp',
'start_time': dt.datetime(2012, 2, 25, 18, 1, 24, 500000), 'orbit': 1708,
'end_time': dt.datetime(1900, 1, 1, 18, 2, 48, 700000),
'source': 'noaa_ops',
'creation_time': dt.datetime(2012, 2, 26, 0, 21, 30, 255476)}
self.p = Parser(self.fmt)

def test_parse(self):
# Run
result = self.p.parse(self.string)
# Assert
self.assertDictEqual(result, self.data)


def suite():
"""The suite for test_parser
"""
loader = unittest.TestLoader()
mysuite = unittest.TestSuite()
mysuite.addTest(loader.loadTestsFromTestCase(TestParser))
mysuite.addTest(loader.loadTestsFromTestCase(TestParserVIIRSSDR))
return mysuite
30 changes: 29 additions & 1 deletion trollsift/tests/unittests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def test_parse_align(self):
'segment': '000007',
'start_time': dt.datetime(2015, 6, 5, 17, 0)})


def test_globify_simple(self):
# Run
result = globify('{a}_{b}.end', {'a': 'a', 'b': 'b'})
Expand Down Expand Up @@ -285,6 +284,35 @@ def test_is_one2one(self):
self.assertFalse(is_one2one(
"/somedir/{directory}/somedata_{platform:4s}_{time:%Y%d%m-%H%M}_{orbit:d}.l1b"))

def test_compose(self):
"""Test the compose method's custom conversion options."""
from trollsift import compose
key_vals = {'a': 'this Is A-Test b_test c test'}

new_str = compose("{a!c}", key_vals)
self.assertEqual(new_str, 'This is a-test b_test c test')
new_str = compose("{a!h}", key_vals)
self.assertEqual(new_str, 'thisisatestbtestctest')
new_str = compose("{a!H}", key_vals)
self.assertEqual(new_str, 'THISISATESTBTESTCTEST')
new_str = compose("{a!l}", key_vals)
self.assertEqual(new_str, 'this is a-test b_test c test')
new_str = compose("{a!R}", key_vals)
self.assertEqual(new_str, 'thisIsATestbtestctest')
new_str = compose("{a!t}", key_vals)
self.assertEqual(new_str, 'This Is A-Test B_Test C Test')
new_str = compose("{a!u}", key_vals)
self.assertEqual(new_str, 'THIS IS A-TEST B_TEST C TEST')
# builtin repr
new_str = compose("{a!r}", key_vals)
self.assertEqual(new_str, '\'this Is A-Test b_test c test\'')
# no formatting
new_str = compose("{a}", key_vals)
self.assertEqual(new_str, 'this Is A-Test b_test c test')
# bad formatter
self.assertRaises(ValueError, compose, "{a!X}", key_vals)
self.assertEqual(new_str, 'this Is A-Test b_test c test')

def assertDictEqual(self, a, b):
for key in a:
self.assertTrue(key in b)
Expand Down

0 comments on commit 2a02a3d

Please sign in to comment.