Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Mar 15, 2018
1 parent 0f1b1ab commit 9f38bda
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 110 deletions.
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ cdef _parse_timezone_directive(object z):
if len(z) > 5:
if z[5] != ':':
msg = "Inconsistent use of : in {0}"
raise ValueError(msg.format(found_dict['z']))
raise ValueError(msg.format(z))
z = z[:5] + z[6:]
hours = int(z[1:3])
minutes = int(z[3:5])
Expand Down
127 changes: 68 additions & 59 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,70 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
return result.values


def _return_parsed_timezone_results(result, tznames, tzoffsets, parsing_tzname,
parsing_tzoffset, box):
"""
Return results from array_strptime if a %z or %Z directive was passed.
If %Z is only parsed, timezones will be a pytz.timezone object.
If %z is only parsed, timezones will be a pytz.FixedOffset object.
If both %Z and %z are parsed, timezones will be a datetime.timezone object.
Parameters
----------
result : ndarray
int64 date representations of the dates
tznames : ndarray
strings of timezone names if %Z is parsed
tzoffsets : ndarray
timedelta objects of the timezone offset if %z is parsed
parsing_tzname : boolean
True if %Z is parsed
parsing_tzoffset : boolean
True if %z is parsed
box : boolean
True boxes result as an Index-like, False returns an ndarray
Returns
-------
tz_result : ndarray of parsed dates with timezone
Returns:
- Index-like if box=True
- ndarray of Timestamps if box=False
"""
if parsing_tzname and not parsing_tzoffset:
tz_results = np.array([tslib.Timestamp(res, tz=tz)
for res, tz in zip(result, tznames)])
elif parsing_tzoffset and not parsing_tzname:
tz_results = []
for res, offset in zip(result, tzoffsets):
offset_mins = offset.total_seconds() / 60
tzoffset = pytz.FixedOffset(offset_mins)
ts = tslib.Timestamp(res)
ts = ts.tz_localize(tzoffset)
tz_results.append(ts)
tz_results = np.array(tz_results)
elif parsing_tzoffset and parsing_tzname:
if not PY3:
raise ValueError("Parsing tzoffsets are not "
"not supported in Python 3")
from datetime import timezone
tz_results = []
for res, offset, tzname in zip(result, tzoffsets, tznames):
# Do we need to validate these timezones?
# e.g. UTC / +0100
tzinfo = timezone(offset, tzname)
ts = tslib.Timestamp(res, tzinfo=tzinfo)
tz_results.append(ts)
tz_results = np.array(tz_results)
if box:
from pandas import Index
return Index(tz_results)
return tz_results


def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
utc=None, box=True, format=None, exact=True,
unit=None, infer_datetime_format=False, origin='unix',
Expand Down Expand Up @@ -355,65 +419,10 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
"information.")
result, tznames, tzoffsets = array_strptime(
arg, format, exact=exact, errors=errors)
if parsing_tzname and not parsing_tzoffset:
if len(set(tznames)) == 1:
tz = tznames[0]
if box:
result = DatetimeIndex(result,
tz=tz,
name=name)
else:
stamps = [tslib.Timestamp(res, tz=tz)
for res in result]
result = np.array(stamps, dtype=object)
else:
stamps = [tslib.Timestamp(res, tz=tz)
for res, tz in zip(result, tznames)]
result = np.array(stamps, dtype=object)
return result
elif parsing_tzoffset and not parsing_tzname:
# Should we convert these to pytz.FixedOffsets
# or datetime.timezones?
if len(set(tzoffsets)) == 1:
offset_mins = tzoffsets[0].total_seconds() / 60
tzoffset = pytz.FixedOffset(offset_mins)
if box:
result = DatetimeIndex(result,
tz=tzoffset,
name=name)
else:
stamps = []
for res, offset in zip(result, tzoffsets):
ts = tslib.Timestamp(res)
ts = ts.tz_localize(tzoffset)
stamps.append(ts)
result = np.array(stamps, dtype=object)
else:
stamps = []
for res, offset in zip(result, tzoffsets):
offset_mins = offset.total_seconds() / 60
tzoffset = pytz.FixedOffset(offset_mins)
ts = tslib.Timestamp(res)
ts = ts.tz_localize(tzoffset)
stamps.append(ts)
result = np.array(stamps, dtype=object)
return result
elif parsing_tzoffset and parsing_tzname:
if not PY3:
raise ValueError("Parsing tzoffsets are not "
"not supported in Python 3")
from datetime import timezone
stamps = []
for res, offset, tzname in zip(result, tzoffsets,
tznames):
# Do we need to validate these timezones?
# e.g. UTC / +0100
tzinfo = timezone(offset, tzname)
ts = tslib.Timestamp(res, tzinfo=tzinfo)
stamps.append(ts)
result = np.array(stamps, dtype=object)
return result

if parsing_tzname or parsing_tzoffset:
return _return_parsed_timezone_results(
result, tznames, tzoffsets, parsing_tzname,
parsing_tzoffset, box)
except tslib.OutOfBoundsDatetime:
if errors == 'raise':
raise
Expand Down
102 changes: 52 additions & 50 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,63 +186,65 @@ def test_to_datetime_format_weeks(self, cache):
for s, format, dt in data:
assert to_datetime(s, format=format, cache=cache) == dt

@pytest.mark.skipif(not PY3,
reason="datetime.timezone not supported in PY2")
def test_to_datetime_parse_timezone(self):
# %Z parsing only
fmt = '%Y-%m-%d %H:%M:%S %Z'
dates = ['2010-01-01 12:00:00 UTC'] * 2
result = pd.to_datetime(dates, format=fmt)
expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2
expected = pd.DatetimeIndex(expected_dates)
tm.assert_index_equal(result, expected)

result = pd.to_datetime(dates, format=fmt, box=False)
expected = np.array(expected_dates, dtype=object)
tm.assert_numpy_array_equal(result, expected)

dates = ['2010-01-01 12:00:00 UTC', '2010-01-01 12:00:00 GMT']
result = pd.to_datetime(dates, format=fmt)
expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC'),
pd.Timestamp('2010-01-01 12:00:00', tz='GMT')]
expected = np.array(expected_dates, dtype=object)
tm.assert_numpy_array_equal(result, expected)

# %z parsing only
dates = ['2010-01-01 12:00:00 +0100'] * 2
fmt = '%Y-%m-%d %H:%M:%S %z'
result = pd.to_datetime(dates, format=fmt)
expected_dates = [pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(60))] * 2
expected = pd.DatetimeIndex(expected_dates)
tm.assert_index_equal(result, expected)

result = pd.to_datetime(dates, format=fmt, box=False)
expected = np.array(expected_dates, dtype=object)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("box,const,assert_equal", [
[True, pd.Index, 'assert_index_equal'],
[False, np.array, 'assert_numpy_array_equal']])
@pytest.mark.parametrize("fmt,dates,expected_dates", [
['%Y-%m-%d %H:%M:%S %Z',
['2010-01-01 12:00:00 UTC'] * 2,
[pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2],
['%Y-%m-%d %H:%M:%S %Z',
['2010-01-01 12:00:00 UTC', '2010-01-01 12:00:00 GMT'],
[pd.Timestamp('2010-01-01 12:00:00', tz='UTC'),
pd.Timestamp('2010-01-01 12:00:00', tz='GMT')]],
['%Y-%m-%d %H:%M:%S %z',
['2010-01-01 12:00:00 +0100'] * 2,
[pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(60))] * 2],
['%Y-%m-%d %H:%M:%S %z',
['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100'],
[pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(60)),
pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(-60))]]])
def test_to_datetime_parse_tzname_or_tzoffset(self, box, const,
assert_equal, fmt,
dates, expected_dates):
# %z or %Z parsing
result = pd.to_datetime(dates, format=fmt, box=box)
expected = const(expected_dates)
getattr(tm, assert_equal)(result, expected)

dates = ['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100']
result = pd.to_datetime(dates, format=fmt)
expected_dates = [pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(60)),
pd.Timestamp('2010-01-01 12:00:00',
tzinfo=pytz.FixedOffset(-60))]
expected = np.array(expected_dates, dtype=object)
tm.assert_numpy_array_equal(result, expected)
with pytest.raises(ValueError):
pd.to_datetime(dates, format=fmt, box=box, utc=True)

@pytest.mark.skipif(not PY3,
reason="datetime.timezone not supported in PY2")
@pytest.mark.parametrize("box,const,assert_equal", [
[True, pd.Index, 'assert_index_equal'],
[False, np.array, 'assert_numpy_array_equal']])
@pytest.mark.parametrize("dates,expected_dates", [
[['2010-01-01 12:00:00 UTC +0100'] * 2,
[pd.Timestamp('2010-01-01 13:00:00',
tzinfo=timezone(timedelta(minutes=60), 'UTC'))] * 2],
[['2010-01-01 12:00:00 UTC +0100', '2010-01-01 12:00:00 GMT -0200'],
[pd.Timestamp('2010-01-01 13:00:00',
tzinfo=timezone(timedelta(minutes=60), 'UTC')),
pd.Timestamp('2010-01-01 10:00:00',
tzinfo=timezone(timedelta(minutes=-120), 'GMT'))]]])
def test_to_datetime_parse_tzname_and_tzoffset(self, box, const,
assert_equal, dates,
expected_dates):
# %z and %Z parsing
dates = ['2010-01-01 12:00:00 UTC +0100'] * 2
fmt = '%Y-%m-%d %H:%M:%S %Z %z'
result = pd.to_datetime(dates, format=fmt)
tzinfo = timezone(timedelta(minutes=60), 'UTC')
expected_dates = [pd.Timestamp('2010-01-01 13:00:00', tzinfo=tzinfo)]
expected = np.array(expected_dates * 2, dtype=object)
tm.assert_numpy_array_equal(result, expected)
result = pd.to_datetime(dates, format=fmt, box=box)
expected = const(expected_dates)
getattr(tm, assert_equal)(result, expected)

with pytest.raises(ValueError):
pd.to_datetime(dates, format=fmt, utc=True)
pd.to_datetime(dates, format=fmt, box=box, utc=True)

@pytest.mark.parametrize('cache', ['+0', '-1foo', 'UTCbar', ':10'])
@pytest.mark.parametrize('offset', ['+0', '-1foo', 'UTCbar', ':10'])
def test_to_datetime_parse_timezone_malformed(self, offset):
fmt = '%Y-%m-%d %H:%M:%S %z'
date = '2010-01-01 12:00:00 ' + offset
Expand Down

0 comments on commit 9f38bda

Please sign in to comment.