Skip to content

Commit

Permalink
REF: collect get_dst_info-using functions in tslibs.vectorized (#35168)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Jul 8, 2020
1 parent 42fd7e7 commit 0cef605
Show file tree
Hide file tree
Showing 17 changed files with 504 additions and 467 deletions.
5 changes: 4 additions & 1 deletion asv_bench/benchmarks/tslibs/resolution.py
Expand Up @@ -23,7 +23,10 @@
import numpy as np
import pytz

from pandas._libs.tslibs.resolution import get_resolution
try:
from pandas._libs.tslibs import get_resolution
except ImportError:
from pandas._libs.tslibs.resolution import get_resolution


class TimeResolution:
Expand Down
5 changes: 4 additions & 1 deletion asv_bench/benchmarks/tslibs/tslib.py
Expand Up @@ -21,7 +21,10 @@
import numpy as np
import pytz

from pandas._libs.tslib import ints_to_pydatetime
try:
from pandas._libs.tslibs import ints_to_pydatetime
except ImportError:
from pandas._libs.tslib import ints_to_pydatetime

_tzs = [
None,
Expand Down
170 changes: 2 additions & 168 deletions pandas/_libs/tslib.pyx
Expand Up @@ -4,18 +4,14 @@ from cpython.datetime cimport (
PyDate_Check,
PyDateTime_Check,
PyDateTime_IMPORT,
date,
datetime,
time,
timedelta,
tzinfo,
)
# import datetime C API
PyDateTime_IMPORT


cimport numpy as cnp
from numpy cimport float64_t, int64_t, ndarray, uint8_t, intp_t
from numpy cimport float64_t, int64_t, ndarray
import numpy as np
cnp.import_array()

Expand All @@ -42,11 +38,6 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime

from pandas._libs.tslibs.parsing import parse_datetime_string

from pandas._libs.tslibs.timezones cimport (
get_dst_info,
is_utc,
is_tzlocal,
)
from pandas._libs.tslibs.conversion cimport (
_TSObject,
cast_from_unit,
Expand All @@ -60,174 +51,17 @@ from pandas._libs.tslibs.nattype cimport (
c_nat_strings as nat_strings,
)

from pandas._libs.tslibs.offsets cimport to_offset

from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts, _Timestamp
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timestamps import Timestamp

from pandas._libs.tslibs.tzconversion cimport (
tz_convert_utc_to_tzlocal,
tz_localize_to_utc_single,
)

# Note: this is the only non-tslibs intra-pandas dependency here
from pandas._libs.missing cimport checknull_with_nat_and_na


cdef inline object create_datetime_from_ts(
int64_t value,
npy_datetimestruct dts,
tzinfo tz,
object freq,
bint fold,
):
"""
Convenience routine to construct a datetime.datetime from its parts.
"""
return datetime(
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold
)


cdef inline object create_date_from_ts(
int64_t value,
npy_datetimestruct dts,
tzinfo tz,
object freq,
bint fold
):
"""
Convenience routine to construct a datetime.date from its parts.
"""
# GH 25057 add fold argument to match other func_create signatures
return date(dts.year, dts.month, dts.day)


cdef inline object create_time_from_ts(
int64_t value,
npy_datetimestruct dts,
tzinfo tz,
object freq,
bint fold
):
"""
Convenience routine to construct a datetime.time from its parts.
"""
return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold)


@cython.wraparound(False)
@cython.boundscheck(False)
def ints_to_pydatetime(
const int64_t[:] arr,
tzinfo tz=None,
object freq=None,
bint fold=False,
str box="datetime"
):
"""
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
Parameters
----------
arr : array of i8
tz : str, optional
convert to this timezone
freq : str/Offset, optional
freq to convert
fold : bint, default is 0
Due to daylight saving time, one wall clock time can occur twice
when shifting from summer to winter time; fold describes whether the
datetime-like corresponds to the first (0) or the second time (1)
the wall clock hits the ambiguous time
.. versionadded:: 1.1.0
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
* If datetime, convert to datetime.datetime
* If date, convert to datetime.date
* If time, convert to datetime.time
* If Timestamp, convert to pandas.Timestamp
Returns
-------
ndarray of dtype specified by box
"""
cdef:
Py_ssize_t i, n = len(arr)
ndarray[int64_t] trans
int64_t[:] deltas
intp_t[:] pos
npy_datetimestruct dts
object dt, new_tz
str typ
int64_t value, local_value, delta = NPY_NAT # dummy for delta
ndarray[object] result = np.empty(n, dtype=object)
object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint)
bint use_utc = False, use_tzlocal = False, use_fixed = False
bint use_pytz = False

if box == "date":
assert (tz is None), "tz should be None when converting to date"

func_create = create_date_from_ts
elif box == "timestamp":
func_create = create_timestamp_from_ts

if isinstance(freq, str):
freq = to_offset(freq)
elif box == "time":
func_create = create_time_from_ts
elif box == "datetime":
func_create = create_datetime_from_ts
else:
raise ValueError(
"box must be one of 'datetime', 'date', 'time' or 'timestamp'"
)

if is_utc(tz) or tz is None:
use_utc = True
elif is_tzlocal(tz):
use_tzlocal = True
else:
trans, deltas, typ = get_dst_info(tz)
if typ not in ["pytz", "dateutil"]:
# static/fixed; in this case we know that len(delta) == 1
use_fixed = True
delta = deltas[0]
else:
pos = trans.searchsorted(arr, side="right") - 1
use_pytz = typ == "pytz"

for i in range(n):
new_tz = tz
value = arr[i]

if value == NPY_NAT:
result[i] = <object>NaT
else:
if use_utc:
local_value = value
elif use_tzlocal:
local_value = tz_convert_utc_to_tzlocal(value, tz)
elif use_fixed:
local_value = value + delta
elif not use_pytz:
# i.e. dateutil
# no zone-name change for dateutil tzs - dst etc
# represented in single object.
local_value = value + deltas[pos[i]]
else:
# pytz
# find right representation of dst etc in pytz timezone
new_tz = tz._tzinfos[tz._transition_info[pos[i]]]
local_value = value + deltas[pos[i]]

dt64_to_dtstruct(local_value, &dts)
result[i] = func_create(value, dts, new_tz, freq, fold)

return result


def _test_parse_iso8601(ts: str):
"""
TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used
Expand Down
12 changes: 12 additions & 0 deletions pandas/_libs/tslibs/__init__.py
Expand Up @@ -11,8 +11,13 @@
"Period",
"Resolution",
"Timedelta",
"normalize_i8_timestamps",
"is_date_array_normalized",
"dt64arr_to_periodarr",
"delta_to_nanoseconds",
"ints_to_pydatetime",
"ints_to_pytimedelta",
"get_resolution",
"Timestamp",
"tz_convert_single",
"to_offset",
Expand All @@ -30,3 +35,10 @@
from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta
from .timestamps import Timestamp
from .tzconversion import tz_convert_single
from .vectorized import (
dt64arr_to_periodarr,
get_resolution,
ints_to_pydatetime,
is_date_array_normalized,
normalize_i8_timestamps,
)
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/conversion.pxd
Expand Up @@ -25,5 +25,4 @@ cdef int64_t get_datetime64_nanos(object val) except? -1
cpdef datetime localize_pydatetime(datetime dt, object tz)
cdef int64_t cast_from_unit(object ts, str unit) except? -1

cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz)
cdef int64_t normalize_i8_stamp(int64_t local_val) nogil

0 comments on commit 0cef605

Please sign in to comment.