From 9f273c48cfbfc2aedbb9cee84f6c32c191ff55f0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 22 Feb 2018 11:27:53 -0800 Subject: [PATCH] ENH: Parse %z directive in format for to_datetime return timedeltas as list return timedeltas in a numpy array some flake fixes --- pandas/_libs/tslibs/strptime.pyx | 50 ++++++++++++++++++++++++++++++-- pandas/core/tools/datetimes.py | 2 +- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index e7dabb94f89751..5d343a68a22b95 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -29,7 +29,8 @@ cimport cython import numpy as np from numpy cimport ndarray, int64_t -from datetime import date as datetime_date +from datetime import (date as datetime_date, timedelta as datetime_timedelta, + timezone as datetime_timezone) from cpython.datetime cimport datetime from np_datetime cimport (check_dts_bounds, @@ -58,6 +59,7 @@ def array_strptime(ndarray[object] values, object fmt, Py_ssize_t i, n = len(values) pandas_datetimestruct dts ndarray[int64_t] iresult + ndarray[object] results_tz int year, month, day, minute, hour, second, weekday, julian, tz int week_of_year, week_of_year_start int64_t us, ns @@ -109,6 +111,8 @@ def array_strptime(ndarray[object] values, object fmt, result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') + results_tz = np.empty(n, dtype='object') + dts.us = dts.ps = dts.as = 0 cdef dict _parse_code_table = { @@ -130,7 +134,8 @@ def array_strptime(ndarray[object] values, object fmt, 'U': 15, 'W': 16, 'Z': 17, - 'p': 18 # just an additional key, works only with I + 'p': 18, # just an additional key, works only with I + 'z': 19, } cdef int parse_code @@ -177,6 +182,8 @@ def array_strptime(ndarray[object] values, object fmt, month = day = 1 hour = minute = second = ns = us = 0 tz = -1 + gmtoff = None + gmtoff_fraction = 0 # Default to -1 to signify that values not known; not critical to have, # though week_of_year = -1 @@ -281,6 +288,32 @@ def array_strptime(ndarray[object] values, object fmt, else: tz = value break + elif parse_code == 19: + z = found_dict['z'] + if z == 'Z': + gmtoff = 0 + else: + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + msg = "Unconsistent use of : in {0}" + raise ValueError(msg.format(found_dict['z'])) + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds + gmtoff_remainder = z[8:] + # Pad to always return microseconds. + pad_number = 6 - len(gmtoff_remainder) + gmtoff_remainder_padding = "0" * pad_number + gmtoff_fraction = int(gmtoff_remainder + + gmtoff_remainder_padding) + if z.startswith("-"): + gmtoff = -gmtoff + gmtoff_fraction = -gmtoff_fraction + # If we know the wk of the year and what day of that wk, we can figure # out the Julian day of the year. if julian == -1 and week_of_year != -1 and weekday != -1: @@ -330,7 +363,17 @@ def array_strptime(ndarray[object] values, object fmt, continue raise - return result + if gmtoff is not None: + tzdelta = datetime_timedelta(seconds=gmtoff, + microseconds=gmtoff_fraction) + tzname = found_dict.get('Z') + if tzname: + tzinfo = datetime_timezone(tzdelta, tzname) + else: + tzinfo = datetime_timezone(tzdelta, tzname) + results_tz[i] = tzinfo + + return result, results_tz """_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored @@ -538,6 +581,7 @@ class TimeRE(dict): # XXX: Does 'Y' need to worry about having less or more than # 4 digits? 'Y': r"(?P\d\d\d\d)", + 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|Z)", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 1de43116d0b49d..d02313bb64d354 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -344,7 +344,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): if result is None: try: result = array_strptime(arg, format, exact=exact, - errors=errors) + errors=errors)[0] except tslib.OutOfBoundsDatetime: if errors == 'raise': raise