Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

be more flexible regarding matched ISO datetimes and timezones in gen…

…eral, fix bug in timezones without ":" and improve docs
  • Loading branch information...
commit 01c140472d0d15104a14d0b5ab0321d6a82af14b 1 parent 84187ac
Richard Jones authored

Showing 3 changed files with 61 additions and 25 deletions. Show diff stats Hide diff stats

  1. +16 10 README.rst
  2. +20 12 parse.py
  3. +25 3 test_parse.py
26 README.rst
Source Rendered
@@ -29,7 +29,7 @@ compile it once:
29 29
30 30 >>> from parse import compile
31 31 >>> p = compile("It's {}, I love it!")
32   ->>> print p
  32 +>>> print(p)
33 33 <Parser "It's {}, I love it!">
34 34 >>> p.parse("It's spam, I love it!")
35 35 <Result ('spam',) {}>
@@ -61,27 +61,27 @@ Some simple parse() format string examples:
61 61 >>> parse("Bring me a {}", "Bring me a shrubbery")
62 62 <Result ('shrubbery',) {}>
63 63 >>> r = parse("The {} who say {}", "The knights who say Ni!")
64   ->>> print r
  64 +>>> print(r)
65 65 <Result ('knights', 'Ni!') {}>
66   ->>> print r.fixed
  66 +>>> print(r.fixed)
67 67 ('knights', 'Ni!')
68 68 >>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade")
69   ->>> print r
  69 +>>> print(r)
70 70 <Result () {'item': 'hand grenade'}>
71   ->>> print r.named
  71 +>>> print(r.named)
72 72 {'item': 'hand grenade'}
73   ->>> print r['item']
  73 +>>> print(r['item'])
74 74 hand grenade
75 75
76 76 Dotted names are possible though the application must make additional sense of
77 77 the result:
78 78
79 79 >>> r = parse("Mmm, {food.type}, I love it!", "Mmm, spam, I love it!")
80   ->>> print r
  80 +>>> print(r)
81 81 <Result () {'food.type': 'spam'}>
82   ->>> print r.named
  82 +>>> print(r.named)
83 83 {'food.type': 'spam'}
84   ->>> print r['food.type']
  84 +>>> print(r['food.type'])
85 85 spam
86 86
87 87
@@ -184,7 +184,11 @@ Some notes for the date and time types:
184 184 - the AM/PM are optional, and if PM is found then 12 hours will be added
185 185 to the datetime object's hours amount - even if the hour is greater
186 186 than 12 (for consistency.)
187   -- except in ISO 8601 and e-mail format the timezone is optional.
  187 +- in ISO 8601 the "Z" (UTC) timezone part may be a numeric offset
  188 +- timezones are specified as "+HH:MM" or "-HH:MM". The hour may be one or two
  189 + digits (0-padded is OK.) Also, the ":" is optional.
  190 +- the timezone is optional in all except the e-mail format (it defaults to
  191 + UTC.)
188 192 - named timezones are not handled yet.
189 193
190 194 Note: attempting to match too many datetime fields in a single parse() will
@@ -270,6 +274,8 @@ A more complete example of a custom type might be:
270 274
271 275 **Version history (in brief)**:
272 276
  277 +- 1.6.1 be more flexible regarding matched ISO datetimes and timezones in
  278 + general, fix bug in timezones without ":" and improve docs
273 279 - 1.6.0 add support for optional ``pattern`` attribute in user-defined types
274 280 (thanks Jens Engel)
275 281 - 1.5.3 fix handling of question marks
32 parse.py
@@ -29,7 +29,7 @@
29 29
30 30 >>> from parse import compile
31 31 >>> p = compile("It's {}, I love it!")
32   ->>> print p
  32 +>>> print(p)
33 33 <Parser "It's {}, I love it!">
34 34 >>> p.parse("It's spam, I love it!")
35 35 <Result ('spam',) {}>
@@ -61,27 +61,27 @@
61 61 >>> parse("Bring me a {}", "Bring me a shrubbery")
62 62 <Result ('shrubbery',) {}>
63 63 >>> r = parse("The {} who say {}", "The knights who say Ni!")
64   ->>> print r
  64 +>>> print(r)
65 65 <Result ('knights', 'Ni!') {}>
66   ->>> print r.fixed
  66 +>>> print(r.fixed)
67 67 ('knights', 'Ni!')
68 68 >>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade")
69   ->>> print r
  69 +>>> print(r)
70 70 <Result () {'item': 'hand grenade'}>
71   ->>> print r.named
  71 +>>> print(r.named)
72 72 {'item': 'hand grenade'}
73   ->>> print r['item']
  73 +>>> print(r['item'])
74 74 hand grenade
75 75
76 76 Dotted names are possible though the application must make additional sense of
77 77 the result:
78 78
79 79 >>> r = parse("Mmm, {food.type}, I love it!", "Mmm, spam, I love it!")
80   ->>> print r
  80 +>>> print(r)
81 81 <Result () {'food.type': 'spam'}>
82   ->>> print r.named
  82 +>>> print(r.named)
83 83 {'food.type': 'spam'}
84   ->>> print r['food.type']
  84 +>>> print(r['food.type'])
85 85 spam
86 86
87 87
@@ -184,7 +184,11 @@
184 184 - the AM/PM are optional, and if PM is found then 12 hours will be added
185 185 to the datetime object's hours amount - even if the hour is greater
186 186 than 12 (for consistency.)
187   -- except in ISO 8601 and e-mail format the timezone is optional.
  187 +- in ISO 8601 the "Z" (UTC) timezone part may be a numeric offset
  188 +- timezones are specified as "+HH:MM" or "-HH:MM". The hour may be one or two
  189 + digits (0-padded is OK.) Also, the ":" is optional.
  190 +- the timezone is optional in all except the e-mail format (it defaults to
  191 + UTC.)
188 192 - named timezones are not handled yet.
189 193
190 194 Note: attempting to match too many datetime fields in a single parse() will
@@ -270,6 +274,8 @@
270 274
271 275 **Version history (in brief)**:
272 276
  277 +- 1.6.1 be more flexible regarding matched ISO datetimes and timezones in
  278 + general, fix bug in timezones without ":" and improve docs
273 279 - 1.6.0 add support for optional ``pattern`` attribute in user-defined types
274 280 (thanks Jens Engel)
275 281 - 1.5.3 fix handling of question marks
@@ -308,7 +314,7 @@
308 314 This code is copyright 2012 Richard Jones <richard@python.org>
309 315 See the end of the source file for the license of use.
310 316 '''
311   -__version__ = '1.6.0'
  317 +__version__ = '1.6.1'
312 318
313 319 # yes, I now have two problems
314 320 import re
@@ -485,6 +491,8 @@ def date_convert(string, match, ymd=None, mdy=None, dmy=None,
485 491 sign = tz[0]
486 492 if ':' in tz:
487 493 tzh, tzm = tz[1:].split(':')
  494 + elif len(tz) == 4: # 'snnn'
  495 + tzh, tzm = tz[1], tz[2:4]
488 496 else:
489 497 tzh, tzm = tz[1:3], tz[3:5]
490 498 offset = int(tzm) + int(tzh) * 60
@@ -786,7 +794,7 @@ def f(string, m):
786 794 s = r'\d+|0[xX][0-9a-fA-F]+|[0-9a-fA-F]+|0[bB][01]+|0[oO][0-7]+'
787 795 self._type_conversions[group] = int_convert(10)
788 796 elif type == 'ti':
789   - s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|[-+]\d\d:\d\d)?' % TIME_PAT
  797 + s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT
790 798 n = self._group_index
791 799 self._type_conversions[group] = partial(date_convert, ymd=n+1,
792 800 hms=n+4, tz=n+7)
28 test_parse.py
@@ -329,8 +329,12 @@ def y(fmt, s, e, tz=None):
329 329 if r is None:
330 330 self.fail('%r (%r) did not match %r' % (fmt, p._expression, s))
331 331 r = r.fixed[0]
332   - self.assertEqual(r, e,
333   - '%r found %r in %r, not %r' % (fmt, r, s, e))
  332 + try:
  333 + self.assertEqual(r, e,
  334 + '%r found %r in %r, not %r' % (fmt, r, s, e))
  335 + except ValueError:
  336 + self.fail('%r found %r in %r, not %r' % (fmt, r, s, e))
  337 +
334 338 if tz is not None:
335 339 self.assertEqual(r.tzinfo, tz,
336 340 '%r found TZ %r in %r, not %r' % (fmt, r.tzinfo, s, e))
@@ -353,8 +357,12 @@ def n(fmt, s, e):
353 357 datetime(1997, 7, 16, 19, 20, 0))
354 358 y('a {:ti} b', 'a 1997-07-16T19:20Z b',
355 359 datetime(1997, 7, 16, 19, 20, tzinfo=utc))
  360 + y('a {:ti} b', 'a 1997-07-16T19:20+0100 b',
  361 + datetime(1997, 7, 16, 19, 20, tzinfo=tz60))
356 362 y('a {:ti} b', 'a 1997-07-16T19:20+01:00 b',
357 363 datetime(1997, 7, 16, 19, 20, tzinfo=tz60))
  364 + y('a {:ti} b', 'a 1997-07-16T19:20 +01:00 b',
  365 + datetime(1997, 7, 16, 19, 20, tzinfo=tz60))
358 366
359 367 # YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
360 368 y('a {:ti} b', 'a 1997-07-16 19:20:30 b',
@@ -365,6 +373,8 @@ def n(fmt, s, e):
365 373 datetime(1997, 7, 16, 19, 20, 30, tzinfo=utc))
366 374 y('a {:ti} b', 'a 1997-07-16T19:20:30+01:00 b',
367 375 datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60))
  376 + y('a {:ti} b', 'a 1997-07-16T19:20:30 +01:00 b',
  377 + datetime(1997, 7, 16, 19, 20, 30, tzinfo=tz60))
368 378
369 379 # YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
370 380 y('a {:ti} b', 'a 1997-07-16 19:20:30.500000 b',
@@ -383,10 +393,12 @@ def n(fmt, s, e):
383 393
384 394 # te RFC2822 e-mail format datetime
385 395 y('a {:te} b', 'a Mon, 21 Nov 2011 10:21:36 +1000 b', aest_d)
  396 + y('a {:te} b', 'a Mon, 21 Nov 2011 10:21:36 +10:00 b', aest_d)
386 397 y('a {:te} b', 'a 21 Nov 2011 10:21:36 +1000 b', aest_d)
387 398
388 399 # tg global (day/month) format datetime
389 400 y('a {:tg} b', 'a 21/11/2011 10:21:36 AM +1000 b', aest_d)
  401 + y('a {:tg} b', 'a 21/11/2011 10:21:36 AM +10:00 b', aest_d)
390 402 y('a {:tg} b', 'a 21-11-2011 10:21:36 AM +1000 b', aest_d)
391 403 y('a {:tg} b', 'a 21/11/2011 10:21:36 +1000 b', aest_d)
392 404 y('a {:tg} b', 'a 21/11/2011 10:21:36 b', dt)
@@ -397,6 +409,7 @@ def n(fmt, s, e):
397 409
398 410 # ta US (month/day) format datetime
399 411 y('a {:ta} b', 'a 11/21/2011 10:21:36 AM +1000 b', aest_d)
  412 + y('a {:ta} b', 'a 11/21/2011 10:21:36 AM +10:00 b', aest_d)
400 413 y('a {:ta} b', 'a 11-21-2011 10:21:36 AM +1000 b', aest_d)
401 414 y('a {:ta} b', 'a 11/21/2011 10:21:36 +1000 b', aest_d)
402 415 y('a {:ta} b', 'a 11/21/2011 10:21:36 b', dt)
@@ -408,6 +421,7 @@ def n(fmt, s, e):
408 421
409 422 # th HTTP log format date/time datetime
410 423 y('a {:th} b', 'a 21/Nov/2011:10:21:36 +1000 b', aest_d)
  424 + y('a {:th} b', 'a 21/Nov/2011:10:21:36 +10:00 b', aest_d)
411 425
412 426 d = datetime(2011, 11, 21, 10, 21, 36)
413 427
@@ -415,14 +429,21 @@ def n(fmt, s, e):
415 429 y('a {:tc} b', 'a Mon Nov 21 10:21:36 2011 b', d)
416 430
417 431 t530 = parse.FixedTzOffset(-5*60 - 30, '-5:30')
  432 + t830 = parse.FixedTzOffset(-8*60 - 30, '-8:30')
418 433
419 434 # tt Time time
420 435 y('a {:tt} b', 'a 10:21:36 AM +1000 b', time(10, 21, 36, tzinfo=aest))
  436 + y('a {:tt} b', 'a 10:21:36 AM +10:00 b', time(10, 21, 36, tzinfo=aest))
421 437 y('a {:tt} b', 'a 10:21:36 AM b', time(10, 21, 36))
422 438 y('a {:tt} b', 'a 10:21:36 PM b', time(22, 21, 36))
423 439 y('a {:tt} b', 'a 10:21:36 b', time(10, 21, 36))
424 440 y('a {:tt} b', 'a 10:21 b', time(10, 21))
425 441 y('a {:tt} b', 'a 10:21:36 PM -5:30 b', time(22, 21, 36, tzinfo=t530))
  442 + y('a {:tt} b', 'a 10:21:36 PM -530 b', time(22, 21, 36, tzinfo=t530))
  443 + y('a {:tt} b', 'a 10:21:36 PM -05:30 b', time(22, 21, 36, tzinfo=t530))
  444 + y('a {:tt} b', 'a 10:21:36 PM -0530 b', time(22, 21, 36, tzinfo=t530))
  445 + y('a {:tt} b', 'a 10:21:36 PM -08:30 b', time(22, 21, 36, tzinfo=t830))
  446 + y('a {:tt} b', 'a 10:21:36 PM -0830 b', time(22, 21, 36, tzinfo=t830))
426 447
427 448 def test_datetime_group_count(self):
428 449 # test we increment the group count correctly for datetimes
@@ -491,7 +512,8 @@ def test_mixed_types(self):
491 512 ''')
492 513 self.assertNotEqual(r, None)
493 514 self.assertEqual(r.fixed[22], 'spam')
494   - # variants
  515 +
  516 + def test_mixed_type_variant(self):
495 517 r = parse.parse('''
496 518 letters: {:w}
497 519 non-letters: {:W}

0 comments on commit 01c1404

Please sign in to comment.
Something went wrong with that request. Please try again.