Skip to content

Commit

Permalink
BUG/TST: assure conversions of datetimelikes for object, numeric dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Jan 13, 2018
1 parent 8347ff8 commit b953695
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 65 deletions.
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Expand Up @@ -385,6 +385,11 @@ Conversion
- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)



- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)


Indexing
^^^^^^^^

Expand Down
26 changes: 24 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Expand Up @@ -29,7 +29,7 @@ from np_datetime cimport (check_dts_bounds,

from util cimport (is_string_object,
is_datetime64_object,
is_integer_object, is_float_object)
is_integer_object, is_float_object, is_array)

from timedeltas cimport cast_from_unit
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
Expand All @@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat
# Constants

cdef int64_t DAY_NS = 86400000000000LL
NS_DTYPE = np.dtype('M8[ns]')
TD_DTYPE = np.dtype('m8[ns]')

UTC = pytz.UTC

Expand Down Expand Up @@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
return ival


def ensure_datetime64ns(ndarray arr):
def ensure_datetime64ns(ndarray arr, copy=True):
"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
Parameters
----------
arr : ndarray
copy : boolean, default True
Returns
-------
Expand All @@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr):

unit = get_datetime64_unit(arr.flat[0])
if unit == PANDAS_FR_ns:
if copy:
arr = arr.copy()
result = arr
else:
for i in range(n):
Expand All @@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr):
return result


def ensure_timedelta64ns(ndarray arr, copy=True):
"""
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
Parameters
----------
arr : ndarray
copy : boolean, default True
Returns
-------
result : ndarray with dtype timedelta64[ns]
"""
return arr.astype(TD_DTYPE, copy=copy)


def datetime_to_datetime64(ndarray[object] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
Expand Down
42 changes: 18 additions & 24 deletions pandas/core/dtypes/cast.py
Expand Up @@ -656,33 +656,39 @@ def astype_nansafe(arr, dtype, copy=True):
return tslib.ints_to_pydatetime(arr.view(np.int64))
elif dtype == np.int64:
return arr.view(dtype)
elif dtype != _NS_DTYPE:
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))
return arr.astype(_NS_DTYPE)

# allow frequency conversions
if dtype.kind == 'M':
return arr.astype(dtype)

raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))

elif is_timedelta64_dtype(arr):
if dtype == np.int64:
return arr.view(dtype)
elif dtype == object:
return tslib.ints_to_pytimedelta(arr.view(np.int64))

# in py3, timedelta64[ns] are int64
elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
(not PY3 and dtype != _TD_DTYPE)):
if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
(not PY3 and dtype != _TD_DTYPE)):

# allow frequency conversions
# we return a float here!
if dtype.kind == 'm':
mask = isna(arr)
result = arr.astype(dtype).astype(np.float64)
result[mask] = np.nan
return result
elif dtype == _TD_DTYPE:
return arr.astype(_TD_DTYPE, copy=copy)

raise TypeError("cannot astype a timedelta from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))

return arr.astype(_TD_DTYPE)
elif (np.issubdtype(arr.dtype, np.floating) and
np.issubdtype(dtype, np.integer)):

Expand All @@ -704,19 +710,7 @@ def astype_nansafe(arr, dtype, copy=True):

if copy:

if arr.dtype == dtype:
return arr.copy()

# we handle datetimelikes with pandas machinery
# to be robust to the input type
elif is_datetime64_dtype(dtype):
from pandas import to_datetime
return to_datetime(arr).values
elif is_timedelta64_dtype(dtype):
from pandas import to_timedelta
return to_timedelta(arr).values

return arr.astype(dtype)
return arr.astype(dtype, copy=True)
return arr.view(dtype)


Expand Down
8 changes: 6 additions & 2 deletions pandas/core/dtypes/common.py
Expand Up @@ -4,6 +4,7 @@
from pandas.compat import (string_types, text_type, binary_type,
PY3, PY36)
from pandas._libs import algos, lib
from pandas._libs.tslibs import conversion
from .dtypes import (CategoricalDtype, CategoricalDtypeType,
DatetimeTZDtype, DatetimeTZDtypeType,
PeriodDtype, PeriodDtypeType,
Expand All @@ -21,8 +22,8 @@
for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
'int32', 'uint32', 'int64', 'uint64']])

_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')
_NS_DTYPE = conversion.NS_DTYPE
_TD_DTYPE = conversion.TD_DTYPE
_INT64_DTYPE = np.dtype(np.int64)

# oh the troubles to reduce import time
Expand All @@ -31,6 +32,9 @@
_ensure_float64 = algos.ensure_float64
_ensure_float32 = algos.ensure_float32

_ensure_datetime64ns = conversion.ensure_datetime64ns
_ensure_timedelta64ns = conversion.ensure_timedelta64ns


def _ensure_float(arr):
"""
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/internals.py
Expand Up @@ -631,7 +631,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
values = astype_nansafe(values.ravel(), dtype, copy=True)
values = values.reshape(self.shape)

newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
newb = make_block(values, placement=self.mgr_locs,
klass=klass)
except:
if errors == 'raise':
Expand Down Expand Up @@ -1954,6 +1954,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
_can_hold_na = True
is_numeric = False

def __init__(self, values, placement, fastpath=False, **kwargs):
if values.dtype != _TD_DTYPE:
values = conversion.ensure_timedelta64ns(values)

super(TimeDeltaBlock, self).__init__(values, fastpath=True,
placement=placement, **kwargs)

@property
def _box_func(self):
return lambda x: tslib.Timedelta(x, unit='ns')
Expand Down
65 changes: 65 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Expand Up @@ -640,6 +640,71 @@ def test_astype_categoricaldtype_class_raises(self, cls):
with tm.assert_raises_regex(TypeError, xpr):
df['A'].astype(cls)

@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
# tests all units from numeric origination
# gh-19223 / gh-12425
dtype = "{}[{}]".format(dtype, unit)
arr = np.array([[1, 2, 3]], dtype=arr_dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_datetime_unit(self, unit):
# tests all units from datetime origination
# gh-19223
dtype = "M8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns'])
def test_astype_to_timedelta_unit_ns(self, unit):
# preserver the timedelta conversion
# gh-19223
dtype = "m8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_timedelta_unit(self, unit):
# coerce to float
# gh-19223
dtype = "m8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(df.values.astype(dtype).astype(float))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_incorrect_datetimelike(self, unit):
# trying to astype a m to a M, or vice-versa
# gh-19224
dtype = "M8[{}]".format(unit)
other = "m8[{}]".format(unit)

df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
with pytest.raises(TypeError):
df.astype(other)

df = DataFrame(np.array([[1, 2, 3]], dtype=other))
with pytest.raises(TypeError):
df.astype(dtype)

def test_timedeltas(self):
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
freq='D')),
Expand Down
24 changes: 11 additions & 13 deletions pandas/tests/reshape/merge/test_merge.py
Expand Up @@ -523,25 +523,23 @@ def test_other_datetime_unit(self):
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)

def test_other_timedelta_unit(self):
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
def test_other_timedelta_unit(self, unit):
# GH 13389
df1 = pd.DataFrame({'entity_id': [101, 102]})
s = pd.Series([None, None], index=[101, 102], name='days')

for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
'timedelta64[ns]']:
dtype = "m8[{}]".format(unit)
df2 = s.astype(dtype).to_frame('days')
assert df2['days'].dtype == 'm8[ns]'

df2 = s.astype(dtype).to_frame('days')
assert df2['days'].dtype == dtype

result = df1.merge(df2, left_on='entity_id', right_index=True)
result = df1.merge(df2, left_on='entity_id', right_index=True)

exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype=dtype)},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)
exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype=dtype)},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)

def test_overlapping_columns_error_message(self):
df = DataFrame({'key': [1, 2, 3],
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/series/test_constructors.py
Expand Up @@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self):
s.iloc[0] = np.nan
assert s.dtype == 'M8[ns]'

# invalid astypes
for t in ['s', 'D', 'us', 'ms']:
pytest.raises(TypeError, s.astype, 'M8[%s]' % t)

# GH3414 related
pytest.raises(TypeError, lambda x: Series(
Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
Expand Down Expand Up @@ -707,6 +703,20 @@ def test_constructor_with_datetime_tz(self):
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
assert_series_equal(s, expected)

@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
# tests all units
# gh-19223
dtype = "{}[{}]".format(dtype, unit)
arr = np.array([1, 2, 3], dtype=arr_dtype)
s = Series(arr)
result = s.astype(dtype)
expected = Series(arr.astype(dtype))

tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('arg',
['2013-01-01 00:00:00', pd.NaT, np.nan, None])
def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
Expand Down
32 changes: 13 additions & 19 deletions pandas/tests/series/test_operators.py
Expand Up @@ -1649,32 +1649,26 @@ def test_invalid_ops(self):
pytest.raises(Exception, self.objSeries.__sub__,
np.array(1, dtype=np.int64))

def test_timedelta64_conversions(self):
@pytest.mark.parametrize("m", [1, 3, 10])
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
def test_timedelta64_conversions(self, m, unit):

startdate = Series(date_range('2013-01-01', '2013-01-03'))
enddate = Series(date_range('2013-03-01', '2013-03-03'))

s1 = enddate - startdate
s1[2] = np.nan

for m in [1, 3, 10]:
for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']:

# op
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
result = s1 / np.timedelta64(m, unit)
assert_series_equal(result, expected)

if m == 1 and unit != 'ns':

# astype
result = s1.astype("timedelta64[{0}]".format(unit))
assert_series_equal(result, expected)
# op
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
result = s1 / np.timedelta64(m, unit)
assert_series_equal(result, expected)

# reverse op
expected = s1.apply(
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
result = np.timedelta64(m, unit) / s1
assert_series_equal(result, expected)
# reverse op
expected = s1.apply(
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
result = np.timedelta64(m, unit) / s1
assert_series_equal(result, expected)

# astype
s = Series(date_range('20130101', periods=3))
Expand Down

0 comments on commit b953695

Please sign in to comment.