Skip to content

Commit

Permalink
Merge branch 'bug-2314-parse_rfc_2822-tests-failing-in-some-timezones'
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian Murray committed May 2, 2012
2 parents bb4da15 + 37101a3 commit 12a42c6
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 23 deletions.
63 changes: 53 additions & 10 deletions ckan/lib/helpers.py
Expand Up @@ -605,26 +605,69 @@ def date_str_to_datetime(date_str):
# a strptime. Also avoids problem with Python 2.5 not having %f.
return datetime.datetime(*map(int, re.split('[^\d]', date_str)))

def parse_rfc_2822_date(date_str, tz_aware=True):
def parse_rfc_2822_date(date_str, assume_utc=True):
"""
Parse a date string of the form specified in RFC 2822, and return a datetime.
RFC 2822 is the date format used in HTTP headers.
If the date string contains a timezone indication, and tz_aware is True,
then the associated tzinfo is attached to the returned datetime object.
Returns None if the string cannot be parse as a valid datetime.
RFC 2822 is the date format used in HTTP headers. It should contain timezone
information, but that cannot be relied upon.
If date_str doesn't contain timezone information, then the 'assume_utc' flag
determines whether we assume this string is local (with respect to the
server running this code), or UTC. In practice, what this means is that if
assume_utc is True, then the returned datetime is 'aware', with an associated
tzinfo of offset zero. Otherwise, the returned datetime is 'naive'.
If timezone information is available in date_str, then the returned datetime
is 'aware', ie - it has an associated tz_info object.
Returns None if the string cannot be parsed as a valid datetime.
"""
time_tuple = email.utils.parsedate_tz(date_str)

# Not parsable
if not time_tuple:
return None

if not tz_aware:
time_tuple = time_tuple[:-1] + (None,)
# No timezone information available in the string
if time_tuple[-1] is None and not assume_utc:
return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
else:
offset = 0 if time_tuple[-1] is None else time_tuple[-1]
tz_info = _RFC2282TzInfo(offset)
return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)

class _RFC2282TzInfo(datetime.tzinfo):
"""
A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
In order to return timezone information, a concrete implementation of
datetime.tzinfo is required. This class represents tzinfo that knows
about it's offset from UTC, has no knowledge of daylight savings time, and
no knowledge of the timezone name.
"""

def __init__(self, offset):
"""
offset from UTC in seconds.
"""
self.offset = datetime.timedelta(seconds=offset)

def utcoffset(self, dt):
return self.offset

def dst(self, dt):
"""
Dates parsed from an RFC 2822 string conflate timezone and dst, and so
it's not possible to determine whether we're in DST or not, hence
returning None.
"""
return None

def tzname(self, dt):
return None

return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))

def time_ago_in_words_from_str(date_str, granularity='month'):
if date_str:
Expand Down
28 changes: 15 additions & 13 deletions ckan/tests/lib/test_helpers.py
Expand Up @@ -92,36 +92,38 @@ def test_gravatar_encodes_url_correctly(self):
for e in expected:
assert e in res, (e,res)

def test_parse_rfc_2822_simple_case(self):
def test_parse_rfc_2822_no_timezone_specified(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26" successfully.
No zone info.
Assuming it's UTC.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')

def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26" successfully.
Assuming it's local.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
assert_equal(dt.tzinfo, None)

def test_parse_rfc_2822_gmt_case(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
GMT obs-zone specified
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')

def test_parse_rfc_2822_with_offset(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
assert_equal(dt.isoformat(), '1994-11-15T05:45:26')

def test_parse_rfc_2822_ignoring_offset(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')

assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')

0 comments on commit 12a42c6

Please sign in to comment.