Skip to content

Commit

Permalink
Merge pull request #149 from shoyer/data-array-constructor
Browse files Browse the repository at this point in the history
Data array constructor
  • Loading branch information
shoyer committed Jun 11, 2014
2 parents c899265 + 31cbb2f commit 606f388
Show file tree
Hide file tree
Showing 9 changed files with 339 additions and 66 deletions.
4 changes: 2 additions & 2 deletions test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ def assertItemsEqual(self, first, second, msg=None):
return self.assertCountEqual(first, second, msg)

def assertVariableEqual(self, v1, v2):
self.assertTrue(as_variable(v1).equals(v2))
assert as_variable(v1).equals(v2), (v1, v2)

def assertVariableIdentical(self, v1, v2):
self.assertTrue(as_variable(v1).identical(v2))
assert as_variable(v1).identical(v2), (v1, v2)

def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08):
self.assertEqual(v1.dimensions, v2.dimensions)
Expand Down
147 changes: 131 additions & 16 deletions test/test_data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,28 @@
from collections import OrderedDict

from xray import Dataset, DataArray, Variable, align
from xray.data_array import Indexes
from xray.pycompat import iteritems
from . import TestCase, ReturnItem, source_ndarray


class TestIndexes(TestCase):
def test(self):
indexes = Indexes(['a', 'b', 'c'], [0, 1, 2])
self.assertEqual(indexes['a'], 0)
self.assertEqual(indexes[0], 0)
self.assertEqual(indexes[:1], Indexes(['a'], [0]))
self.assertEqual(indexes[:], indexes)
self.assertEqual(repr(indexes),
"Indexes(['a', 'b', 'c'], [0, 1, 2])")


class TestDataArray(TestCase):
def setUp(self):
self._attrs = {'attr1': 'value1', 'attr2': 2929}
self.attrs = {'attr1': 'value1', 'attr2': 2929}
self.x = np.random.random((10, 20))
self.v = Variable(['x', 'y'], self.x)
self.va = Variable(['x', 'y'], self.x, self._attrs)
self.va = Variable(['x', 'y'], self.x, self.attrs)
self.ds = Dataset({'foo': self.v})
self.dv = self.ds['foo']

Expand All @@ -37,7 +49,7 @@ def test_repr(self):
self.assertEqual(expected, repr(data_array))

def test_properties(self):
self.assertIs(self.dv.dataset, self.ds)
self.assertDatasetIdentical(self.dv.dataset, self.ds)
self.assertEqual(self.dv.name, 'foo')
self.assertVariableEqual(self.dv.variable, self.v)
self.assertArrayEqual(self.dv.values, self.v.values)
Expand All @@ -56,6 +68,107 @@ def test_properties(self):
with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'):
self.ds['foo'].as_index

def test_constructor(self):
data = np.random.random((2, 3))

actual = DataArray(data)
expected = Dataset({None: (['dim_0', 'dim_1'], data)})[None]
self.assertDataArrayIdentical(expected, actual)

actual = DataArray(data, [['a', 'b'], [-1, -2, -3]])
expected = Dataset({None: (['dim_0', 'dim_1'], data),
'dim_0': ('dim_0', ['a', 'b']),
'dim_1': ('dim_1', [-1, -2, -3])})[None]
self.assertDataArrayIdentical(expected, actual)

actual = DataArray(data, [pd.Index(['a', 'b'], name='x'),
pd.Index([-1, -2, -3], name='y')])
expected = Dataset({None: (['x', 'y'], data),
'x': ('x', ['a', 'b']),
'y': ('y', [-1, -2, -3])})[None]
self.assertDataArrayIdentical(expected, actual)

indexes = [['a', 'b'], [-1, -2, -3]]
actual = DataArray(data, indexes, ['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

indexes = [pd.Index(['a', 'b'], name='A'),
pd.Index([-1, -2, -3], name='B')]
actual = DataArray(data, indexes, ['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

indexes = {'x': ['a', 'b'], 'y': [-1, -2, -3]}
actual = DataArray(data, indexes, ['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

indexes = OrderedDict([('x', ['a', 'b']), ('y', [-1, -2, -3])])
actual = DataArray(data, indexes)
self.assertDataArrayIdentical(expected, actual)

expected = Dataset({None: (['x', 'y'], data),
'x': ('x', ['a', 'b'])})[None]
actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y'])
self.assertDataArrayIdentical(expected, actual)

with self.assertRaisesRegexp(ValueError, 'but data has ndim'):
DataArray(data, [[0, 1, 2]], ['x', 'y'])

with self.assertRaisesRegexp(ValueError, 'not array dimensions'):
DataArray(data, {'x': [0, 1, 2]}, ['a', 'b'])

with self.assertRaisesRegexp(ValueError, 'must have the same length'):
DataArray(data, {'x': [0, 1, 2]})

actual = DataArray(data, dimensions=['x', 'y'])
expected = Dataset({None: (['x', 'y'], data)})[None]
self.assertDataArrayIdentical(expected, actual)

actual = DataArray(data, dimensions=['x', 'y'], name='foo')
expected = Dataset({'foo': (['x', 'y'], data)})['foo']
self.assertDataArrayIdentical(expected, actual)

with self.assertRaisesRegexp(TypeError, 'is not a string'):
DataArray(data, dimensions=['x', None])

actual = DataArray(data, name='foo')
expected = Dataset({'foo': (['dim_0', 'dim_1'], data)})['foo']
self.assertDataArrayIdentical(expected, actual)

actual = DataArray(data, dimensions=['x', 'y'], attributes={'bar': 2})
expected = Dataset({None: (['x', 'y'], data, {'bar': 2})})[None]
self.assertDataArrayIdentical(expected, actual)

actual = DataArray(data, dimensions=['x', 'y'], encoding={'bar': 2})
expected = Dataset({None: (['x', 'y'], data, {}, {'bar': 2})})[None]
self.assertDataArrayIdentical(expected, actual)

def test_constructor_from_self_described(self):
data = [[-0.1, 21], [0, 2]]
expected = DataArray(data,
indexes={'x': ['a', 'b'], 'y': [-1, -2]},
dimensions=['x', 'y'], name='foobar',
attributes={'bar': 2}, encoding={'foo': 3})
actual = DataArray(expected)
self.assertDataArrayIdentical(expected, actual)

frame = pd.DataFrame(data, index=pd.Index(['a', 'b'], name='x'),
columns=pd.Index([-1, -2], name='y'))
actual = DataArray(frame)
self.assertDataArrayEqual(expected, actual)

series = pd.Series(data[0], index=pd.Index([-1, -2], name='y'))
actual = DataArray(series)
self.assertDataArrayEqual(expected[0], actual)

panel = pd.Panel({0: frame})
actual = DataArray(panel)
expected = DataArray([data], expected.coordinates, ['dim_0', 'x', 'y'])
self.assertDataArrayIdentical(expected, actual)

expected = DataArray(['a', 'b'], name='foo')
actual = DataArray(pd.Index(['a', 'b'], name='foo'))
self.assertDataArrayIdentical(expected, actual)

def test_equals_and_identical(self):
da2 = self.dv.copy()
self.assertTrue(self.dv.equals(da2))
Expand Down Expand Up @@ -119,19 +232,21 @@ def test_indexed(self):

def test_labeled(self):
self.ds['x'] = ('x', np.array(list('abcdefghij')))
self.assertDataArrayIdentical(self.dv, self.dv.labeled(x=slice(None)))
self.assertDataArrayIdentical(self.dv[1], self.dv.labeled(x='b'))
self.assertDataArrayIdentical(self.dv[:3], self.dv.labeled(x=slice('c')))
da = self.ds['foo']
self.assertDataArrayIdentical(da, da.labeled(x=slice(None)))
self.assertDataArrayIdentical(da[1], da.labeled(x='b'))
self.assertDataArrayIdentical(da[:3], da.labeled(x=slice('c')))

def test_loc(self):
self.ds['x'] = ('x', np.array(list('abcdefghij')))
self.assertDataArrayIdentical(self.dv[:3], self.dv.loc[:'c'])
self.assertDataArrayIdentical(self.dv[1], self.dv.loc['b'])
self.assertDataArrayIdentical(self.dv[:3], self.dv.loc[['a', 'b', 'c']])
self.assertDataArrayIdentical(self.dv[:3, :4],
self.dv.loc[['a', 'b', 'c'], np.arange(4)])
self.dv.loc['a':'j'] = 0
self.assertTrue(np.all(self.dv.values == 0))
da = self.ds['foo']
self.assertDataArrayIdentical(da[:3], da.loc[:'c'])
self.assertDataArrayIdentical(da[1], da.loc['b'])
self.assertDataArrayIdentical(da[:3], da.loc[['a', 'b', 'c']])
self.assertDataArrayIdentical(da[:3, :4],
da.loc[['a', 'b', 'c'], np.arange(4)])
da.loc['a':'j'] = 0
self.assertTrue(np.all(da.values == 0))

def test_reindex(self):
foo = self.dv
Expand Down Expand Up @@ -271,7 +386,7 @@ def test_inplace_math(self):
self.assertIs(b.variable, v)
self.assertArrayEqual(b.values, x)
self.assertIs(source_ndarray(b.values), x)
self.assertIs(b.dataset, self.ds)
self.assertDatasetIdentical(b.dataset, self.ds)

def test_transpose(self):
self.assertVariableEqual(self.dv.variable.transpose(),
Expand All @@ -294,8 +409,8 @@ def test_reduce_keep_attrs(self):

# Test kept attrs
vm = self.va.mean(keep_attrs=True)
self.assertEqual(len(vm.attrs), len(self._attrs))
self.assertEqual(vm.attrs, self._attrs)
self.assertEqual(len(vm.attrs), len(self.attrs))
self.assertEqual(vm.attrs, self.attrs)

def test_unselect(self):
with self.assertRaisesRegexp(ValueError, 'cannot unselect the name'):
Expand Down
2 changes: 1 addition & 1 deletion test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def test_getitem(self):
data = create_test_data()
self.assertIsInstance(data['var1'], DataArray)
self.assertVariableEqual(data['var1'], data.variables['var1'])
self.assertIs(data['var1'].dataset, data)
self.assertDatasetIdentical(data['var1'].dataset, data)

def test_virtual_variables(self):
# access virtual variables
Expand Down
3 changes: 2 additions & 1 deletion test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ def test_init(self):
def test_as_index(self):
data = 0.5 * np.arange(10)
v = Coordinate(['time'], data, {'foo': 'bar'})
self.assertTrue(pd.Index(data).equals(v.as_index))
self.assertTrue(pd.Index(data, name='time').identical(v.as_index))

def test_data(self):
x = Coordinate('x', np.arange(3.0))
Expand All @@ -582,6 +582,7 @@ def test_data(self):
self.assertEqual(float, x.dtype)
self.assertArrayEqual(np.arange(3), x)
self.assertEqual(float, x.values.dtype)
self.assertEqual('x', x.name)
# after inspecting x.values, the Coordinate will be saved as an Index
self.assertIsInstance(x._data, PandasIndexAdapter)
with self.assertRaisesRegexp(TypeError, 'cannot be modified'):
Expand Down
2 changes: 1 addition & 1 deletion xray/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def array_repr(arr):
summary.append(repr(arr.values))
else:
summary.append('[%s values with dtype=%s]' % (arr.size, arr.dtype))
if hasattr(arr, 'name'):
if hasattr(arr, 'coordinates'):
if arr.coordinates:
summary.append('Coordinates:')
for k, v in arr.coordinates.items():
Expand Down
Loading

0 comments on commit 606f388

Please sign in to comment.