Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix concatenating Variables with dtype=datetime64 #134

Merged
merged 6 commits into from
May 20, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ def test(self):
self.assertEqual(expected.dtype, actual.dtype)


class TestArrayEquiv(TestCase):
def test_0d(self):
# verify our work around for pd.isnull not working for 0-dimensional
# object arrays
self.assertTrue(utils.array_equiv(0, np.array(0, dtype=object)))
self.assertTrue(
utils.array_equiv(np.nan, np.array(np.nan, dtype=object)))
self.assertFalse(
utils.array_equiv(0, np.array(1, dtype=object)))


class TestDictionaries(TestCase):
def setUp(self):
self.x = {'a': 'A', 'b': 'B'}
Expand Down
202 changes: 175 additions & 27 deletions test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
import numpy as np
import pandas as pd

from xray import Variable, Dataset, DataArray
from xray import Variable, Dataset, DataArray, indexing
from xray.variable import (Coordinate, as_variable, NumpyArrayAdapter,
PandasIndexAdapter)
PandasIndexAdapter, _as_compatible_data)
from xray.pycompat import PY3

from . import TestCase, source_ndarray

Expand Down Expand Up @@ -36,32 +37,89 @@ def test_attrs(self):
v.attrs['foo'] = 'baz'
self.assertEqual(v.attrs['foo'], 'baz')

def test_0d_data(self):
d = datetime(2000, 1, 1)
for value, dtype in [(0, int),
(np.float32(0.5), np.float32),
('foo', np.str_),
(d, None),
(np.datetime64(d), np.datetime64)]:
def assertIndexedLikeNDArray(self, variable, expected_value0,
expected_dtype=None):
"""Given a 1-dimensional variable, verify that the variable is indexed
like a numpy.ndarray.
"""
self.assertEqual(variable[0].shape, ())
self.assertEqual(variable[0].ndim, 0)
self.assertEqual(variable[0].size, 1)
# test identity
self.assertTrue(variable.equals(variable.copy()))
self.assertTrue(variable.identical(variable.copy()))
# check value is equal for both ndarray and Variable
self.assertEqual(variable.values[0], expected_value0)
self.assertEqual(variable[0].values, expected_value0)
# check type or dtype is consistent for both ndarray and Variable
if expected_dtype is None:
# check output type instead of array dtype
self.assertEqual(type(variable.values[0]), type(expected_value0))
self.assertEqual(type(variable[0].values), type(expected_value0))
else:
self.assertEqual(variable.values[0].dtype, expected_dtype)
self.assertEqual(variable[0].values.dtype, expected_dtype)

def test_index_0d_int(self):
for value, dtype in [(0, np.int_),
(np.int32(0), np.int32)]:
x = self.cls(['x'], [value])
self.assertIndexedLikeNDArray(x, value, dtype)

def test_index_0d_float(self):
for value, dtype in [(0.5, np.float_),
(np.float32(0.5), np.float32)]:
x = self.cls(['x'], [value])
self.assertIndexedLikeNDArray(x, value, dtype)

def test_index_0d_string(self):
for value, dtype in [('foo', np.dtype('U3' if PY3 else 'S3')),
(u'foo', np.dtype('U3'))]:
x = self.cls(['x'], [value])
# check array properties
self.assertEqual(x[0].shape, ())
self.assertEqual(x[0].ndim, 0)
self.assertEqual(x[0].size, 1)
# test identity
self.assertTrue(x.equals(x.copy()))
self.assertTrue(x.identical(x.copy()))
# check value is equal for both ndarray and Variable
self.assertEqual(x.values[0], value)
self.assertEqual(x[0].values, value)
# check type or dtype is consistent for both ndarray and Variable
if dtype is None:
# check output type instead of array dtype
self.assertEqual(type(x.values[0]), type(value))
self.assertEqual(type(x[0].values), type(value))
else:
assert np.issubdtype(x.values[0].dtype, dtype), (x.values[0].dtype, dtype)
assert np.issubdtype(x[0].values.dtype, dtype), (x[0].values.dtype, dtype)
self.assertIndexedLikeNDArray(x, value, dtype)

def test_index_0d_datetime(self):
d = datetime(2000, 1, 1)
x = self.cls(['x'], [d])
self.assertIndexedLikeNDArray(x, d)

x = self.cls(['x'], [np.datetime64(d)])
self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')

x = self.cls(['x'], pd.DatetimeIndex([d]))
self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')

def test_index_0d_object(self):

class HashableItemWrapper(object):
def __init__(self, item):
self.item = item

def __eq__(self, other):
return self.item == other.item

def __hash__(self):
return hash(self.item)

def __repr__(self):
return '%s(item=%r)' % (type(self).__name__, self.item)

item = HashableItemWrapper((1, 2, 3))
x = self.cls('x', [item])
self.assertIndexedLikeNDArray(x, item)

def test_index_and_concat_datetime(self):
# regression test for #125
date_range = pd.date_range('2011-09-01', periods=10)
for dates in [date_range, date_range.values,
date_range.to_pydatetime()]:
expected = self.cls('t', dates)
for times in [[expected[i] for i in range(10)],
[expected[i:(i + 1)] for i in range(10)],
[expected[[i]] for i in range(10)]]:
actual = Variable.concat(times, 't')
self.assertEqual(expected.dtype, actual.dtype)
self.assertArrayEqual(expected, actual)

def test_0d_time_data(self):
# regression test for #105
Expand Down Expand Up @@ -229,6 +287,39 @@ def test_item(self):
self.assertEqual(v.item(), 0)
self.assertIs(type(v.item()), float)

def test_datetime64_conversion(self):
# verify that datetime64 is always converted to ns precision with
# sources preserved
values = np.datetime64('2000-01-01T00')
v = Variable([], values)
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
self.assertEqual(v.values, values)
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))

values = pd.date_range('2000-01-01', periods=3).values.astype(
'datetime64[s]')
v = Variable(['t'], values)
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
self.assertArrayEqual(v.values, values)
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
self.assertIsNot(source_ndarray(v.values), values)

values = pd.date_range('2000-01-01', periods=3).values.copy()
v = Variable(['t'], values)
self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
self.assertArrayEqual(v.values, values)
self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
self.assertIs(source_ndarray(v.values), values)

def test_0d_str(self):
v = Variable([], u'foo')
self.assertEqual(v.dtype, np.dtype('U3'))
self.assertEqual(v.values, 'foo')

v = Variable([], np.string_('foo'))
self.assertEqual(v.dtype, np.dtype('S3'))
self.assertEqual(v.values, bytes('foo', 'ascii') if PY3 else 'foo')

def test_equals_and_identical(self):
d = np.random.rand(10, 3)
d[0, 0] = np.nan
Expand Down Expand Up @@ -463,3 +554,60 @@ def test_data(self):
self.assertIsInstance(x._data, PandasIndexAdapter)
with self.assertRaisesRegexp(TypeError, 'cannot be modified'):
x[:] = 0

def test_avoid_index_dtype_inference(self):
# verify our work-around for (pandas<0.14):
# https://github.com/pydata/pandas/issues/6370
data = pd.date_range('2000-01-01', periods=3).to_pydatetime()
t = Coordinate('t', data)
self.assertArrayEqual(t.values[:2], data[:2])
self.assertArrayEqual(t[:2].values, data[:2])
self.assertArrayEqual(t.values[:2], data[:2])
self.assertArrayEqual(t[:2].values, data[:2])
self.assertEqual(t.dtype, object)
self.assertEqual(t[:2].dtype, object)


class TestAsCompatibleData(TestCase):
def test_unchanged_types(self):
types = (NumpyArrayAdapter, PandasIndexAdapter,
indexing.LazilyIndexedArray)
for t in types:
for data in [np.arange(3),
pd.date_range('2000-01-01', periods=3),
pd.date_range('2000-01-01', periods=3).values]:
x = t(data)
self.assertIs(x, _as_compatible_data(x))

def test_converted_types(self):
for input_array in [[[0, 1, 2]], pd.DataFrame([[0, 1, 2]])]:
actual = _as_compatible_data(input_array)
self.assertArrayEqual(np.asarray(input_array), actual)
self.assertEqual(NumpyArrayAdapter, type(actual))
self.assertEqual(np.dtype(int), actual.dtype)

def test_datetime(self):
expected = np.datetime64('2000-01-01T00')
actual = _as_compatible_data(expected)
self.assertEqual(expected, actual)
self.assertEqual(np.datetime64, type(actual))
self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)

expected = np.array([np.datetime64('2000-01-01T00')])
actual = _as_compatible_data(expected)
self.assertEqual(np.asarray(expected), actual)
self.assertEqual(NumpyArrayAdapter, type(actual))
self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)

expected = np.array([np.datetime64('2000-01-01T00', 'ns')])
actual = _as_compatible_data(expected)
self.assertEqual(np.asarray(expected), actual)
self.assertEqual(NumpyArrayAdapter, type(actual))
self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
self.assertIs(expected, source_ndarray(np.asarray(actual)))

expected = pd.Timestamp('2000-01-01T00').to_datetime()
actual = _as_compatible_data(expected)
self.assertEqual(np.asarray(expected), actual)
self.assertEqual(NumpyArrayAdapter, type(actual))
self.assertEqual(np.dtype('O'), actual.dtype)
4 changes: 2 additions & 2 deletions xray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .netcdf3 import encode_nc3_variable
import xray
from xray.conventions import encode_cf_variable
from xray.utils import FrozenOrderedDict, NDArrayMixin, as_array_or_item
from xray.utils import FrozenOrderedDict, NDArrayMixin
from xray import indexing
from xray.pycompat import iteritems, basestring

Expand All @@ -31,7 +31,7 @@ def __getitem__(self, key):
# work around for netCDF4-python's broken handling of 0-d
# arrays (slicing them always returns a 1-dimensional array):
# https://github.com/Unidata/netcdf4-python/pull/220
data = as_array_or_item(np.asscalar(self.array[key]))
data = np.asscalar(self.array[key])
else:
data = self.array[key]
return data
Expand Down
6 changes: 1 addition & 5 deletions xray/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,8 @@ def shape(self):
shape.append(k.size)
return tuple(shape)

@property
def values(self):
return self.array[self.key]

def __array__(self, dtype=None):
return np.asarray(self.values, dtype=None)
return np.asarray(self.array[self.key], dtype=None)

def __getitem__(self, key):
return type(self)(self.array, self._updated_key(key))
Expand Down
51 changes: 16 additions & 35 deletions xray/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import operator
import warnings
from collections import OrderedDict, Mapping, MutableMapping
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -36,35 +35,6 @@ def __new__(cls, *args, **kwargs):
return Wrapper


def as_safe_array(values, dtype=None):
"""Like np.asarray, but convert all datetime64 arrays to ns precision
"""
values = np.asarray(values, dtype=dtype)
if values.dtype.kind == 'M':
# np.datetime64
values = values.astype('datetime64[ns]')
return values


def as_array_or_item(values, dtype=None):
"""Return the given values as a numpy array of the indicated dtype, or as
an individual value if it's a 0-dimensional object array or datetime.
"""
if isinstance(values, datetime):
# shortcut because if you try to make a datetime or Timestamp object
# into an array with the proper dtype, it is liable to be silently
# converted into an integer instead :(
return values
values = as_safe_array(values, dtype=dtype)
if values.ndim == 0 and values.dtype.kind == 'O':
# unpack 0d object arrays to be consistent with numpy
values = values.item()
if isinstance(values, pd.Timestamp):
# turn Timestamps back into datetime64 objects
values = np.datetime64(values, 'ns')
return values


def squeeze(xray_obj, dimensions, dimension=None):
"""Squeeze the dimensions of an xray object."""
if dimension is None:
Expand Down Expand Up @@ -93,11 +63,22 @@ def array_equiv(arr1, arr2):
arr1, arr2 = np.asarray(arr1), np.asarray(arr2)
if arr1.shape != arr2.shape:
return False
# we could make this faster by not-checking for null values if the dtype
# does not support them, but the logic would get more convoluted.
# using pd.isnull lets us defer the NaN handling to pandas (and unlike
# np.isnan it works on every dtype).
return ((arr1 == arr2) | (pd.isnull(arr1) & pd.isnull(arr2))).all()
if arr1.ndim == 0:
# work around for pd.isnull not working for 0-dimensional object
# arrays: https://github.com/pydata/pandas/pull/7176 (should be fixed
# in pandas 0.14)
# use .item() instead of keeping around 0-dimensional arrays to avoid
# the numpy quirk where object arrays are checked as equal by identity
# (hence NaN in an object array is equal to itself):
arr1 = arr1.item()
arr2 = arr2.item()
return arr1 == arr2 or (arr1 != arr1 and arr2 != arr2)
else:
# we could make this faster by not-checking for null values if the
# dtype does not support them, but the logic would get more convoluted.
# using pd.isnull lets us defer the NaN handling to pandas (and unlike
# np.isnan it works on every dtype).
return ((arr1 == arr2) | (pd.isnull(arr1) & pd.isnull(arr2))).all()


def safe_cast_to_index(array):
Expand Down