Permalink
Browse files

be more flexible regarding matched ISO datetimes and timezones in gen…

…eral, fix bug in timezones without ":" and improve docs
  • Loading branch information...
1 parent 84187ac commit 01c140472d0d15104a14d0b5ab0321d6a82af14b @r1chardj0n3s committed Dec 4, 2012
Showing with 61 additions and 25 deletions.
  1. +16 −10 README.rst
  2. +20 −12 parse.py
  3. +25 −3 test_parse.py
View
@@ -29,7 +29,7 @@ compile it once:
>>> from parse import compile
>>> p = compile("It's {}, I love it!")
->>> print p
+>>> print(p)
<Parser "It's {}, I love it!">
>>> p.parse("It's spam, I love it!")
<Result ('spam',) {}>
@@ -61,27 +61,27 @@ Some simple parse() format string examples:
>>> parse("Bring me a {}", "Bring me a shrubbery")
<Result ('shrubbery',) {}>
>>> r = parse("The {} who say {}", "The knights who say Ni!")
->>> print r
+>>> print(r)
<Result ('knights', 'Ni!') {}>
->>> print r.fixed
+>>> print(r.fixed)
('knights', 'Ni!')
>>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade")
->>> print r
+>>> print(r)
<Result () {'item': 'hand grenade'}>
->>> print r.named
+>>> print(r.named)
{'item': 'hand grenade'}
->>> print r['item']
+>>> print(r['item'])
hand grenade
Dotted names are possible though the application must make additional sense of
the result:
>>> r = parse("Mmm, {food.type}, I love it!", "Mmm, spam, I love it!")
->>> print r
+>>> print(r)
<Result () {'food.type': 'spam'}>
->>> print r.named
+>>> print(r.named)
{'food.type': 'spam'}
->>> print r['food.type']
+>>> print(r['food.type'])
spam
@@ -184,7 +184,11 @@ Some notes for the date and time types:
- the AM/PM are optional, and if PM is found then 12 hours will be added
to the datetime object's hours amount - even if the hour is greater
than 12 (for consistency.)
-- except in ISO 8601 and e-mail format the timezone is optional.
+- in ISO 8601 the "Z" (UTC) timezone part may be a numeric offset
+- timezones are specified as "+HH:MM" or "-HH:MM". The hour may be one or two
+ digits (0-padded is OK.) Also, the ":" is optional.
+- the timezone is optional in all except the e-mail format (it defaults to
+ UTC.)
- named timezones are not handled yet.
Note: attempting to match too many datetime fields in a single parse() will
@@ -270,6 +274,8 @@ A more complete example of a custom type might be:
**Version history (in brief)**:
+- 1.6.1 be more flexible regarding matched ISO datetimes and timezones in
+ general, fix bug in timezones without ":" and improve docs
- 1.6.0 add support for optional ``pattern`` attribute in user-defined types
(thanks Jens Engel)
- 1.5.3 fix handling of question marks
View
@@ -29,7 +29,7 @@
>>> from parse import compile
>>> p = compile("It's {}, I love it!")
->>> print p
+>>> print(p)
<Parser "It's {}, I love it!">
>>> p.parse("It's spam, I love it!")
<Result ('spam',) {}>
@@ -61,27 +61,27 @@
>>> parse("Bring me a {}", "Bring me a shrubbery")
<Result ('shrubbery',) {}>
>>> r = parse("The {} who say {}", "The knights who say Ni!")
->>> print r
+>>> print(r)
<Result ('knights', 'Ni!') {}>
->>> print r.fixed
+>>> print(r.fixed)
('knights', 'Ni!')
>>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade")
->>> print r
+>>> print(r)
<Result () {'item': 'hand grenade'}>
->>> print r.named
+>>> print(r.named)
{'item': 'hand grenade'}
->>> print r['item']
+>>> print(r['item'])
hand grenade
Dotted names are possible though the application must make additional sense of
the result:
>>> r = parse("Mmm, {food.type}, I love it!", "Mmm, spam, I love it!")
->>> print r
+>>> print(r)
<Result () {'food.type': 'spam'}>
->>> print r.named
+>>> print(r.named)
{'food.type': 'spam'}
->>> print r['food.type']
+>>> print(r['food.type'])
spam
@@ -184,7 +184,11 @@
- the AM/PM are optional, and if PM is found then 12 hours will be added
to the datetime object's hours amount - even if the hour is greater
than 12 (for consistency.)
-- except in ISO 8601 and e-mail format the timezone is optional.
+- in ISO 8601 the "Z" (UTC) timezone part may be a numeric offset
+- timezones are specified as "+HH:MM" or "-HH:MM". The hour may be one or two
+ digits (0-padded is OK.) Also, the ":" is optional.
+- the timezone is optional in all except the e-mail format (it defaults to
+ UTC.)
- named timezones are not handled yet.
Note: attempting to match too many datetime fields in a single parse() will
@@ -270,6 +274,8 @@
**Version history (in brief)**:
+- 1.6.1 be more flexible regarding matched ISO datetimes and timezones in
+ general, fix bug in timezones without ":" and improve docs
- 1.6.0 add support for optional ``pattern`` attribute in user-defined types
(thanks Jens Engel)
- 1.5.3 fix handling of question marks
@@ -308,7 +314,7 @@
This code is copyright 2012 Richard Jones <richard@python.org>
See the end of the source file for the license of use.
'''
-__version__ = '1.6.0'
+__version__ = '1.6.1'
# yes, I now have two problems
import re
@@ -485,6 +491,8 @@ def date_convert(string, match, ymd=None, mdy=None, dmy=None,
sign = tz[0]
if ':' in tz:
tzh, tzm = tz[1:].split(':')
+ elif len(tz) == 4: # 'snnn'
+ tzh, tzm = tz[1], tz[2:4]
else:
tzh, tzm = tz[1:3], tz[3:5]
offset = int(tzm) + int(tzh) * 60
@@ -786,7 +794,7 @@ def f(string, m):
s = r'\d+|0[xX][0-9a-fA-F]+|[0-9a-fA-F]+|0[bB][01]+|0[oO][0-7]+'
self._type_conversions[group] = int_convert(10)
elif type == 'ti':
- s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|[-+]\d\d:\d\d)?' % TIME_PAT
+ s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT
n = self._group_index
self._type_conversions[group] = partial(date_convert, ymd=n+1,
hms=n+4, tz=n+7)
View
@@ -329,8 +329,12 @@ def y(fmt, s, e, tz=None):
if r is None:
self.fail('%r (%r) did not match %r' % (fmt, p._expression, s))
r = r.fixed[0]
- self.assertEqual(r, e,
- '%r found %r in %r, not %r' % (fmt, r, s, e))
+ try:
+ self.assertEqual(r, e,
+ '%r found %r in %r, not %r' % (fmt, r, s, e))
+ except ValueError:
+ self.fail('%r found %r in %r, not %r' % (fmt, r, s, e))
+
if tz is not None:
self.assertEqual(r.tzinfo, tz,
'%r found TZ %r in %r, not %r' % (fmt, r.tzinfo, s, e))
@@ -353,8 +357,12 @@ def n(fmt, s, e):
datetime(1997, 7, 16, 19, 20, 0))
y('a {:ti} b', 'a 1997-07-16T19:20Z b',
datetime(1997, 7, 16, 19, 20, tzinfo=utc))
+ y('a {:ti} b', 'a 1997-07-16T19:20+0100 b',
+ datetime(1997, 7, 16, 19, 20, tzinfo=tz60))
y('a {:ti} b', 'a 1997-07-16T19:20+01:00 b',
datetime(1997, 7, 16, 19, 20, tzinfo=tz60))
+ y('a {:ti} b', 'a 1997-07-16T19:20 +01:00 b',
+ datetime(1997, 7, 16, 19, 20, tzinfo=tz60))
# YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
y('a {:ti} b', 'a 1997-07-16 19:20:30 b',
@@ -365,6 +373,8 @@ def n(fmt, s, e):
datetime(1997, 7, 16, 19, 20, 30, tzinfo=utc))
y('a {:ti} b', 'a 1997-07-16T19:20:30+01:00 b',
datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60))
+ y('a {:ti} b', 'a 1997-07-16T19:20:30 +01:00 b',
+ datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60))
# YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
y('a {:ti} b', 'a 1997-07-16 19:20:30.500000 b',
@@ -383,10 +393,12 @@ def n(fmt, s, e):
# te RFC2822 e-mail format datetime
y('a {:te} b', 'a Mon, 21 Nov 2011 10:21:36 +1000 b', aest_d)
+ y('a {:te} b', 'a Mon, 21 Nov 2011 10:21:36 +10:00 b', aest_d)
y('a {:te} b', 'a 21 Nov 2011 10:21:36 +1000 b', aest_d)
# tg global (day/month) format datetime
y('a {:tg} b', 'a 21/11/2011 10:21:36 AM +1000 b', aest_d)
+ y('a {:tg} b', 'a 21/11/2011 10:21:36 AM +10:00 b', aest_d)
y('a {:tg} b', 'a 21-11-2011 10:21:36 AM +1000 b', aest_d)
y('a {:tg} b', 'a 21/11/2011 10:21:36 +1000 b', aest_d)
y('a {:tg} b', 'a 21/11/2011 10:21:36 b', dt)
@@ -397,6 +409,7 @@ def n(fmt, s, e):
# ta US (month/day) format datetime
y('a {:ta} b', 'a 11/21/2011 10:21:36 AM +1000 b', aest_d)
+ y('a {:ta} b', 'a 11/21/2011 10:21:36 AM +10:00 b', aest_d)
y('a {:ta} b', 'a 11-21-2011 10:21:36 AM +1000 b', aest_d)
y('a {:ta} b', 'a 11/21/2011 10:21:36 +1000 b', aest_d)
y('a {:ta} b', 'a 11/21/2011 10:21:36 b', dt)
@@ -408,21 +421,29 @@ def n(fmt, s, e):
# th HTTP log format date/time datetime
y('a {:th} b', 'a 21/Nov/2011:10:21:36 +1000 b', aest_d)
+ y('a {:th} b', 'a 21/Nov/2011:10:21:36 +10:00 b', aest_d)
d = datetime(2011, 11, 21, 10, 21, 36)
# tc ctime() format datetime
y('a {:tc} b', 'a Mon Nov 21 10:21:36 2011 b', d)
t530 = parse.FixedTzOffset(-5*60 - 30, '-5:30')
+ t830 = parse.FixedTzOffset(-8*60 - 30, '-8:30')
# tt Time time
y('a {:tt} b', 'a 10:21:36 AM +1000 b', time(10, 21, 36, tzinfo=aest))
+ y('a {:tt} b', 'a 10:21:36 AM +10:00 b', time(10, 21, 36, tzinfo=aest))
y('a {:tt} b', 'a 10:21:36 AM b', time(10, 21, 36))
y('a {:tt} b', 'a 10:21:36 PM b', time(22, 21, 36))
y('a {:tt} b', 'a 10:21:36 b', time(10, 21, 36))
y('a {:tt} b', 'a 10:21 b', time(10, 21))
y('a {:tt} b', 'a 10:21:36 PM -5:30 b', time(22, 21, 36, tzinfo=t530))
+ y('a {:tt} b', 'a 10:21:36 PM -530 b', time(22, 21, 36, tzinfo=t530))
+ y('a {:tt} b', 'a 10:21:36 PM -05:30 b', time(22, 21, 36, tzinfo=t530))
+ y('a {:tt} b', 'a 10:21:36 PM -0530 b', time(22, 21, 36, tzinfo=t530))
+ y('a {:tt} b', 'a 10:21:36 PM -08:30 b', time(22, 21, 36, tzinfo=t830))
+ y('a {:tt} b', 'a 10:21:36 PM -0830 b', time(22, 21, 36, tzinfo=t830))
def test_datetime_group_count(self):
# test we increment the group count correctly for datetimes
@@ -491,7 +512,8 @@ def test_mixed_types(self):
''')
self.assertNotEqual(r, None)
self.assertEqual(r.fixed[22], 'spam')
- # variants
+
+ def test_mixed_type_variant(self):
r = parse.parse('''
letters: {:w}
non-letters: {:W}

0 comments on commit 01c1404

Please sign in to comment.