Skip to content

Commit

Permalink
REF: simplify concat_datetime (#33526)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Apr 15, 2020
1 parent 7a1d715 commit d106b81
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 76 deletions.
11 changes: 6 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
return type(self)(new_values, dtype=self.dtype)

@classmethod
def _concat_same_type(cls, to_concat):
def _concat_same_type(cls, to_concat, axis: int = 0):

# do not pass tz to set because tzlocal cannot be hashed
dtypes = {str(x.dtype) for x in to_concat}
Expand All @@ -733,14 +733,15 @@ def _concat_same_type(cls, to_concat):
obj = to_concat[0]
dtype = obj.dtype

values = np.concatenate([x.asi8 for x in to_concat])
i8values = [x.asi8 for x in to_concat]
values = np.concatenate(i8values, axis=axis)

if is_period_dtype(to_concat[0].dtype):
new_freq = None
if is_period_dtype(dtype):
new_freq = obj.freq
else:
elif axis == 0:
# GH 3232: If the concat result is evenly spaced, we can retain the
# original frequency
new_freq = None
to_concat = [x for x in to_concat if len(x)]

if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
Expand Down
93 changes: 26 additions & 67 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@

import numpy as np

from pandas._libs import tslib, tslibs

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
TD64NS_DTYPE,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_dtype,
Expand All @@ -19,13 +15,7 @@
is_sparse,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import (
ABCCategoricalIndex,
ABCDatetimeArray,
ABCIndexClass,
ABCRangeIndex,
ABCSeries,
)
from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCRangeIndex, ABCSeries


def get_dtype_kinds(l):
Expand Down Expand Up @@ -390,70 +380,39 @@ def concat_datetime(to_concat, axis=0, typs=None):
if typs is None:
typs = get_dtype_kinds(to_concat)

# multiple types, need to coerce to object
if len(typs) != 1:
return _concatenate_2d(
[_convert_datetimelike_to_object(x) for x in to_concat], axis=axis
)

# must be single dtype
if any(typ.startswith("datetime") for typ in typs):

if "datetime" in typs:
to_concat = [x.astype(np.int64, copy=False) for x in to_concat]
return _concatenate_2d(to_concat, axis=axis).view(DT64NS_DTYPE)
else:
# when to_concat has different tz, len(typs) > 1.
# thus no need to care
return _concat_datetimetz(to_concat)

elif "timedelta" in typs:
return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view(
TD64NS_DTYPE
)

elif any(typ.startswith("period") for typ in typs):
assert len(typs) == 1
cls = to_concat[0]
new_values = cls._concat_same_type(to_concat)
return new_values

to_concat = [_wrap_datetimelike(x) for x in to_concat]
single_dtype = len({x.dtype for x in to_concat}) == 1

def _convert_datetimelike_to_object(x):
# coerce datetimelike array to object dtype
# multiple types, need to coerce to object
if not single_dtype:
# wrap_datetimelike ensures that astype(object) wraps in Timestamp/Timedelta
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)

# if dtype is of datetimetz or timezone
if x.dtype.kind == DT64NS_DTYPE.kind:
if getattr(x, "tz", None) is not None:
x = np.asarray(x.astype(object))
else:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp")
x = x.reshape(shape)
if axis == 1:
# TODO(EA2D): kludge not necessary with 2D EAs
to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]

elif x.dtype == TD64NS_DTYPE:
shape = x.shape
x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True)
x = x.reshape(shape)
result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)

return x
if result.ndim == 2 and is_extension_array_dtype(result.dtype):
# TODO(EA2D): kludge not necessary with 2D EAs
assert result.shape[0] == 1
result = result[0]
return result


def _concat_datetimetz(to_concat, name=None):
def _wrap_datetimelike(arr):
"""
concat DatetimeIndex with the same tz
all inputs must be DatetimeIndex
it is used in DatetimeIndex.append also
Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
DTA/TDA handle .astype(object) correctly.
"""
# Right now, internals will pass a List[DatetimeArray] here
# for reductions like quantile. I would like to disentangle
# all this before we get here.
sample = to_concat[0]

if isinstance(sample, ABCIndexClass):
return sample._concat_same_dtype(to_concat, name=name)
elif isinstance(sample, ABCDatetimeArray):
return sample._concat_same_type(to_concat)
from pandas.core.construction import array as pd_array, extract_array

arr = extract_array(arr, extract_numpy=True)
if isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]:
arr = pd_array(arr)
return arr


def _concat_sparse(to_concat, axis=0, typs=None):
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,8 +778,8 @@ def _fast_union(self, other, sort=None):
left, right = self, other
left_start = left[0]
loc = right.searchsorted(left_start, side="left")
right_chunk = right.values[:loc]
dates = concat_compat((left.values, right_chunk))
right_chunk = right._values[:loc]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
Expand All @@ -793,8 +793,8 @@ def _fast_union(self, other, sort=None):
# concatenate
if left_end < right_end:
loc = right.searchsorted(left_end, side="right")
right_chunk = right.values[loc:]
dates = concat_compat((left.values, right_chunk))
right_chunk = right._values[loc:]
dates = concat_compat([left._values, right_chunk])
result = self._shallow_copy(dates)
result._set_freq("infer")
# TODO: can we infer that it has self.freq?
Expand Down

0 comments on commit d106b81

Please sign in to comment.