From 9f38bda9ea14dc9e922ab2cfafcb66833e7a6d1d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 15 Mar 2018 13:13:22 -0700 Subject: [PATCH] Address comments --- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/core/tools/datetimes.py | 127 ++++++++++--------- pandas/tests/indexes/datetimes/test_tools.py | 102 +++++++-------- 3 files changed, 121 insertions(+), 110 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index ecf4f53242397c..1c5983808810fb 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -682,7 +682,7 @@ cdef _parse_timezone_directive(object z): if len(z) > 5: if z[5] != ':': msg = "Inconsistent use of : in {0}" - raise ValueError(msg.format(found_dict['z'])) + raise ValueError(msg.format(z)) z = z[:5] + z[6:] hours = int(z[1:3]) minutes = int(z[3:5]) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b6d98b5645400b..376bca4f01bedd 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -106,6 +106,70 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): return result.values +def _return_parsed_timezone_results(result, tznames, tzoffsets, parsing_tzname, + parsing_tzoffset, box): + """ + Return results from array_strptime if a %z or %Z directive was passed. + + If %Z is only parsed, timezones will be a pytz.timezone object. + If %z is only parsed, timezones will be a pytz.FixedOffset object. + If both %Z and %z are parsed, timezones will be a datetime.timezone object. + + Parameters + ---------- + result : ndarray + int64 date representations of the dates + tznames : ndarray + strings of timezone names if %Z is parsed + tzoffsets : ndarray + timedelta objects of the timezone offset if %z is parsed + parsing_tzname : boolean + True if %Z is parsed + parsing_tzoffset : boolean + True if %z is parsed + box : boolean + True boxes result as an Index-like, False returns an ndarray + + Returns + ------- + tz_result : ndarray of parsed dates with timezone + Returns: + + - Index-like if box=True + - ndarray of Timestamps if box=False + + """ + if parsing_tzname and not parsing_tzoffset: + tz_results = np.array([tslib.Timestamp(res, tz=tz) + for res, tz in zip(result, tznames)]) + elif parsing_tzoffset and not parsing_tzname: + tz_results = [] + for res, offset in zip(result, tzoffsets): + offset_mins = offset.total_seconds() / 60 + tzoffset = pytz.FixedOffset(offset_mins) + ts = tslib.Timestamp(res) + ts = ts.tz_localize(tzoffset) + tz_results.append(ts) + tz_results = np.array(tz_results) + elif parsing_tzoffset and parsing_tzname: + if not PY3: + raise ValueError("Parsing tzoffsets are not " + "not supported in Python 3") + from datetime import timezone + tz_results = [] + for res, offset, tzname in zip(result, tzoffsets, tznames): + # Do we need to validate these timezones? + # e.g. UTC / +0100 + tzinfo = timezone(offset, tzname) + ts = tslib.Timestamp(res, tzinfo=tzinfo) + tz_results.append(ts) + tz_results = np.array(tz_results) + if box: + from pandas import Index + return Index(tz_results) + return tz_results + + def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', @@ -355,65 +419,10 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): "information.") result, tznames, tzoffsets = array_strptime( arg, format, exact=exact, errors=errors) - if parsing_tzname and not parsing_tzoffset: - if len(set(tznames)) == 1: - tz = tznames[0] - if box: - result = DatetimeIndex(result, - tz=tz, - name=name) - else: - stamps = [tslib.Timestamp(res, tz=tz) - for res in result] - result = np.array(stamps, dtype=object) - else: - stamps = [tslib.Timestamp(res, tz=tz) - for res, tz in zip(result, tznames)] - result = np.array(stamps, dtype=object) - return result - elif parsing_tzoffset and not parsing_tzname: - # Should we convert these to pytz.FixedOffsets - # or datetime.timezones? - if len(set(tzoffsets)) == 1: - offset_mins = tzoffsets[0].total_seconds() / 60 - tzoffset = pytz.FixedOffset(offset_mins) - if box: - result = DatetimeIndex(result, - tz=tzoffset, - name=name) - else: - stamps = [] - for res, offset in zip(result, tzoffsets): - ts = tslib.Timestamp(res) - ts = ts.tz_localize(tzoffset) - stamps.append(ts) - result = np.array(stamps, dtype=object) - else: - stamps = [] - for res, offset in zip(result, tzoffsets): - offset_mins = offset.total_seconds() / 60 - tzoffset = pytz.FixedOffset(offset_mins) - ts = tslib.Timestamp(res) - ts = ts.tz_localize(tzoffset) - stamps.append(ts) - result = np.array(stamps, dtype=object) - return result - elif parsing_tzoffset and parsing_tzname: - if not PY3: - raise ValueError("Parsing tzoffsets are not " - "not supported in Python 3") - from datetime import timezone - stamps = [] - for res, offset, tzname in zip(result, tzoffsets, - tznames): - # Do we need to validate these timezones? - # e.g. UTC / +0100 - tzinfo = timezone(offset, tzname) - ts = tslib.Timestamp(res, tzinfo=tzinfo) - stamps.append(ts) - result = np.array(stamps, dtype=object) - return result - + if parsing_tzname or parsing_tzoffset: + return _return_parsed_timezone_results( + result, tznames, tzoffsets, parsing_tzname, + parsing_tzoffset, box) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index e95ec6a537f882..40e16f2df57b9e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -186,63 +186,65 @@ def test_to_datetime_format_weeks(self, cache): for s, format, dt in data: assert to_datetime(s, format=format, cache=cache) == dt - @pytest.mark.skipif(not PY3, - reason="datetime.timezone not supported in PY2") - def test_to_datetime_parse_timezone(self): - # %Z parsing only - fmt = '%Y-%m-%d %H:%M:%S %Z' - dates = ['2010-01-01 12:00:00 UTC'] * 2 - result = pd.to_datetime(dates, format=fmt) - expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2 - expected = pd.DatetimeIndex(expected_dates) - tm.assert_index_equal(result, expected) - - result = pd.to_datetime(dates, format=fmt, box=False) - expected = np.array(expected_dates, dtype=object) - tm.assert_numpy_array_equal(result, expected) - - dates = ['2010-01-01 12:00:00 UTC', '2010-01-01 12:00:00 GMT'] - result = pd.to_datetime(dates, format=fmt) - expected_dates = [pd.Timestamp('2010-01-01 12:00:00', tz='UTC'), - pd.Timestamp('2010-01-01 12:00:00', tz='GMT')] - expected = np.array(expected_dates, dtype=object) - tm.assert_numpy_array_equal(result, expected) - - # %z parsing only - dates = ['2010-01-01 12:00:00 +0100'] * 2 - fmt = '%Y-%m-%d %H:%M:%S %z' - result = pd.to_datetime(dates, format=fmt) - expected_dates = [pd.Timestamp('2010-01-01 12:00:00', - tzinfo=pytz.FixedOffset(60))] * 2 - expected = pd.DatetimeIndex(expected_dates) - tm.assert_index_equal(result, expected) - - result = pd.to_datetime(dates, format=fmt, box=False) - expected = np.array(expected_dates, dtype=object) - tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("box,const,assert_equal", [ + [True, pd.Index, 'assert_index_equal'], + [False, np.array, 'assert_numpy_array_equal']]) + @pytest.mark.parametrize("fmt,dates,expected_dates", [ + ['%Y-%m-%d %H:%M:%S %Z', + ['2010-01-01 12:00:00 UTC'] * 2, + [pd.Timestamp('2010-01-01 12:00:00', tz='UTC')] * 2], + ['%Y-%m-%d %H:%M:%S %Z', + ['2010-01-01 12:00:00 UTC', '2010-01-01 12:00:00 GMT'], + [pd.Timestamp('2010-01-01 12:00:00', tz='UTC'), + pd.Timestamp('2010-01-01 12:00:00', tz='GMT')]], + ['%Y-%m-%d %H:%M:%S %z', + ['2010-01-01 12:00:00 +0100'] * 2, + [pd.Timestamp('2010-01-01 12:00:00', + tzinfo=pytz.FixedOffset(60))] * 2], + ['%Y-%m-%d %H:%M:%S %z', + ['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100'], + [pd.Timestamp('2010-01-01 12:00:00', + tzinfo=pytz.FixedOffset(60)), + pd.Timestamp('2010-01-01 12:00:00', + tzinfo=pytz.FixedOffset(-60))]]]) + def test_to_datetime_parse_tzname_or_tzoffset(self, box, const, + assert_equal, fmt, + dates, expected_dates): + # %z or %Z parsing + result = pd.to_datetime(dates, format=fmt, box=box) + expected = const(expected_dates) + getattr(tm, assert_equal)(result, expected) - dates = ['2010-01-01 12:00:00 +0100', '2010-01-01 12:00:00 -0100'] - result = pd.to_datetime(dates, format=fmt) - expected_dates = [pd.Timestamp('2010-01-01 12:00:00', - tzinfo=pytz.FixedOffset(60)), - pd.Timestamp('2010-01-01 12:00:00', - tzinfo=pytz.FixedOffset(-60))] - expected = np.array(expected_dates, dtype=object) - tm.assert_numpy_array_equal(result, expected) + with pytest.raises(ValueError): + pd.to_datetime(dates, format=fmt, box=box, utc=True) + @pytest.mark.skipif(not PY3, + reason="datetime.timezone not supported in PY2") + @pytest.mark.parametrize("box,const,assert_equal", [ + [True, pd.Index, 'assert_index_equal'], + [False, np.array, 'assert_numpy_array_equal']]) + @pytest.mark.parametrize("dates,expected_dates", [ + [['2010-01-01 12:00:00 UTC +0100'] * 2, + [pd.Timestamp('2010-01-01 13:00:00', + tzinfo=timezone(timedelta(minutes=60), 'UTC'))] * 2], + [['2010-01-01 12:00:00 UTC +0100', '2010-01-01 12:00:00 GMT -0200'], + [pd.Timestamp('2010-01-01 13:00:00', + tzinfo=timezone(timedelta(minutes=60), 'UTC')), + pd.Timestamp('2010-01-01 10:00:00', + tzinfo=timezone(timedelta(minutes=-120), 'GMT'))]]]) + def test_to_datetime_parse_tzname_and_tzoffset(self, box, const, + assert_equal, dates, + expected_dates): # %z and %Z parsing - dates = ['2010-01-01 12:00:00 UTC +0100'] * 2 fmt = '%Y-%m-%d %H:%M:%S %Z %z' - result = pd.to_datetime(dates, format=fmt) - tzinfo = timezone(timedelta(minutes=60), 'UTC') - expected_dates = [pd.Timestamp('2010-01-01 13:00:00', tzinfo=tzinfo)] - expected = np.array(expected_dates * 2, dtype=object) - tm.assert_numpy_array_equal(result, expected) + result = pd.to_datetime(dates, format=fmt, box=box) + expected = const(expected_dates) + getattr(tm, assert_equal)(result, expected) with pytest.raises(ValueError): - pd.to_datetime(dates, format=fmt, utc=True) + pd.to_datetime(dates, format=fmt, box=box, utc=True) - @pytest.mark.parametrize('cache', ['+0', '-1foo', 'UTCbar', ':10']) + @pytest.mark.parametrize('offset', ['+0', '-1foo', 'UTCbar', ':10']) def test_to_datetime_parse_timezone_malformed(self, offset): fmt = '%Y-%m-%d %H:%M:%S %z' date = '2010-01-01 12:00:00 ' + offset