Skip to content

Commit

Permalink
[2314] Fix broken parse_rfc_2822 helper function.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian Murray committed May 2, 2012
1 parent 5638aed commit 37101a3
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 23 deletions.
63 changes: 53 additions & 10 deletions ckan/lib/helpers.py
Expand Up @@ -605,26 +605,69 @@ def date_str_to_datetime(date_str):
# a strptime. Also avoids problem with Python 2.5 not having %f.
return datetime.datetime(*map(int, re.split('[^\d]', date_str)))

def parse_rfc_2822_date(date_str, tz_aware=True):
def parse_rfc_2822_date(date_str, assume_utc=True):

This comment has been minimized.

Copy link
@tobes

tobes May 2, 2012

Contributor

Ian, we don't want to break any existing code that may use tz_aware.

Can we keep it as a parameter and if we are killing it the just create a log.critical('tz_aware is no longer available') if it has been set. or retain the functionality. Not sure what users we have for this.

Also maybe change assume_utc to timezone='UTC' would give us more future flexibility just raise exception for all timezones but 'UTC'

This comment has been minimized.

Copy link
@icmurray

icmurray May 2, 2012

Contributor

Hi tobes,

I'd considered this, but I'm hoping amercader would allow this changeset into 1.7 as it's a bug fix. And since this function was introduced in 1.7, there's no need to maintain backward compatibility with anything yet. (Hence me committing to it's own branch rather than master for Adria to review: if he doesn't accept it, I can add the tz_aware argument back in).

As for assume_utc: I think the options are "assume utc" or "assume local". I don't want to introduce more complexity by allowing a user to set a timezone as part of the function call. They can achieve this by setting assume_utc=False, and then providing their own tzinfo object on the returned object.

This comment has been minimized.

Copy link
@icmurray

icmurray May 2, 2012

Contributor

.... assume_local would be a better name though.

This comment has been minimized.

Copy link
@tobes

tobes May 2, 2012

Contributor

if that''s the case that's all great thanks

"""
Parse a date string of the form specified in RFC 2822, and return a datetime.
RFC 2822 is the date format used in HTTP headers.
If the date string contains a timezone indication, and tz_aware is True,
then the associated tzinfo is attached to the returned datetime object.
Returns None if the string cannot be parse as a valid datetime.
RFC 2822 is the date format used in HTTP headers. It should contain timezone
information, but that cannot be relied upon.
If date_str doesn't contain timezone information, then the 'assume_utc' flag
determines whether we assume this string is local (with respect to the
server running this code), or UTC. In practice, what this means is that if
assume_utc is True, then the returned datetime is 'aware', with an associated
tzinfo of offset zero. Otherwise, the returned datetime is 'naive'.
If timezone information is available in date_str, then the returned datetime
is 'aware', ie - it has an associated tz_info object.
Returns None if the string cannot be parsed as a valid datetime.
"""
time_tuple = email.utils.parsedate_tz(date_str)

# Not parsable
if not time_tuple:
return None

if not tz_aware:
time_tuple = time_tuple[:-1] + (None,)
# No timezone information available in the string
if time_tuple[-1] is None and not assume_utc:
return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))
else:
offset = 0 if time_tuple[-1] is None else time_tuple[-1]
tz_info = _RFC2282TzInfo(offset)
return datetime.datetime(*time_tuple[:6], microsecond=0, tzinfo=tz_info)

class _RFC2282TzInfo(datetime.tzinfo):
"""
A datetime.tzinfo implementation used by parse_rfc_2822_date() function.
In order to return timezone information, a concrete implementation of
datetime.tzinfo is required. This class represents tzinfo that knows
about it's offset from UTC, has no knowledge of daylight savings time, and
no knowledge of the timezone name.
"""

def __init__(self, offset):
"""
offset from UTC in seconds.
"""
self.offset = datetime.timedelta(seconds=offset)

def utcoffset(self, dt):
return self.offset

def dst(self, dt):
"""
Dates parsed from an RFC 2822 string conflate timezone and dst, and so
it's not possible to determine whether we're in DST or not, hence
returning None.
"""
return None

def tzname(self, dt):
return None

return datetime.datetime.fromtimestamp(email.utils.mktime_tz(time_tuple))

def time_ago_in_words_from_str(date_str, granularity='month'):
if date_str:
Expand Down
28 changes: 15 additions & 13 deletions ckan/tests/lib/test_helpers.py
Expand Up @@ -92,36 +92,38 @@ def test_gravatar_encodes_url_correctly(self):
for e in expected:
assert e in res, (e,res)

def test_parse_rfc_2822_simple_case(self):
def test_parse_rfc_2822_no_timezone_specified(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26" successfully.
No zone info.
Assuming it's UTC.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')

def test_parse_rfc_2822_no_timezone_specified_assuming_local(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26" successfully.
Assuming it's local.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26', assume_utc=False)
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
assert_equal(dt.tzinfo, None)

def test_parse_rfc_2822_gmt_case(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 GMT" successfully.
GMT obs-zone specified
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 GMT')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')
assert_equal(dt.isoformat(), '1994-11-15T12:45:26+00:00')

def test_parse_rfc_2822_with_offset(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700')
assert_equal(dt.isoformat(), '1994-11-15T05:45:26')

def test_parse_rfc_2822_ignoring_offset(self):
"""
Parse "Tue, 15 Nov 1994 12:45:26 +0700" successfully.
"""
dt = h.parse_rfc_2822_date('Tue, 15 Nov 1994 12:45:26 +0700', tz_aware=False)
assert_equal(dt.isoformat(), '1994-11-15T12:45:26')

assert_equal(dt.isoformat(), '1994-11-15T12:45:26+07:00')

0 comments on commit 37101a3

Please sign in to comment.