Permalink
Browse files

Changed the behavior of la.align().

  • Loading branch information...
1 parent d397cce commit 8b4a51ba56e2176017a8a4241c63ee0a6ec89e32 @kwgoodman committed Jun 28, 2010
Showing with 158 additions and 142 deletions.
  1. +2 −2 README.rst
  2. +2 −2 doc/source/intro.rst
  3. +76 −68 la/flarry.py
  4. +78 −70 la/tests/flarry_test.py
View
4 README.rst
@@ -90,9 +90,9 @@ After you have installed ``la``, run the suite of unit tests:
>>> import la
>>> la.test()
<snip>
- Ran 2920 tests in 1.351s
+ Ran 2922 tests in 1.351s
OK
- <nose.result.TextTestResult run=2920 errors=0 failures=0>
+ <nose.result.TextTestResult run=2922 errors=0 failures=0>
The ``la`` package contains a C extension module. The functions in the module
speed up common alignment operations such as adding two unaligned larrys. If
View
4 doc/source/intro.rst
@@ -99,9 +99,9 @@ After you have installed ``la``, run the suite of unit tests:
>>> import la
>>> la.test()
<snip>
- Ran 2920 tests in 1.351s
+ Ran 2922 tests in 1.351s
OK
- <nose.result.TextTestResult run=2920 errors=0 failures=0>
+ <nose.result.TextTestResult run=2922 errors=0 failures=0>
The ``la`` package contains a C extension module. The functions in the module
speed up common alignment operations such as adding two unaligned larrys. If
View
144 la/flarry.py
@@ -10,7 +10,7 @@
# Alignment -----------------------------------------------------------------
-def align(lar1, lar2, join='inner', fill='default', cast=True):
+def align(lar1, lar2, join='inner', cast=True):
"""
Align two larrys using one of five join methods.
@@ -29,18 +29,14 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
same as the number of dimensions of the two larrys. The first element
in the list is the join method for axis=0, the second element is the
join method for axis=1, and so on.
- fill : fill value, optional
- Some join methods can introduce new rows, columns, etc. to the input
- larrys. The new rows, columns, etc. are filled with the `fill` value.
- By default ('default') the fill value is determined by the function
- la.missing.missing_marker().
cast : bool, optional
- Only float, str, and object dtypes have default fill values (la.nan,
+ Only float, str, and object dtypes have missing value markers (la.nan,
'', and None, respectively). Other dtypes, such as int and bool, do
not have a missing value marker. If `cast` is set to True (default),
then int and bool dtypes, for example, will be cast to float. If cast
is set to False, then a TypeError will be raised for int and bool
- input. If `fill` is not 'default' then `cast` is ignored.
+ input if the join introduces new rows, columns, etc. An inner join
+ will never introduce new rows, columns, etc.
Returns
-------
@@ -53,38 +49,43 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
--------
Create two larrys:
- >>> lar1 = larry([1, 2, nan], [['a', 'b', 'c']])
- >>> lar2 = larry([1, nan, nan], [['a', 'b', 'dd']])
-
- The default is an inner join:
-
- >>> binaryop(np.add, lar1, lar2)
- >>> label_0
- a
- b
- x
- array([2., NaN])
-
- If one data element is missing in one larry but not in the other, then you
- can replace the missing value with `one_missing` (here 0):
-
- >>> binaryop(np.add, lar1, lar2, one_missing=0)
- >>> label_0
- a
- b
- x
- array([2., 2.])
-
- An outer join with single and double missing values replaced by zero:
-
- >>> binaryop(np.add, lar1, lar2, join='outer', one_missing=0, two_missing=0)
- >>> label_0
- a
- b
- c
- dd
- x
- array([2., 2.0, 0.0, 0.0])
+ >>> y1 = larry([1, 2])
+ >>> y2 = larry([1, 2, 3])
+
+ The default join method is an inner join:
+
+ >>> lar3, lar4 = la.align(y1, y2)
+ >>> lar3
+ label_0
+ 0
+ 1
+ x
+ array([1, 2])
+ >>> lar4
+ label_0
+ 0
+ 1
+ x
+ array([1, 2])
+
+ An outer join adds a missing value (NaN) to lar1, therefore the the dtype
+ of lar1 is changed from int to float:
+
+ >>> lar3, lar4 = la.align(y1, y2, join='outer')
+ >>> lar3
+ label_0
+ 0
+ 1
+ 2
+ x
+ array([ 1., 2., NaN])
+ >>> lar4
+ label_0
+ 0
+ 1
+ 2
+ x
+ array([1, 2, 3])
"""
@@ -106,18 +107,8 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
raise TypeError, "`join` must be a string or a list."
# Find missing markers
- if fill == 'default':
- miss1 = missing_marker(lar1)
- miss2 = missing_marker(lar2)
- if (miss1 == NotImplemented) and cast:
- lar1 = lar1.astype(float)
- miss1 = missing_marker(lar1)
- if (miss2 == NotImplemented) and cast:
- lar2 = lar2.astype(float)
- miss2 = missing_marker(lar2)
- else:
- miss1 = fill
- miss2 = fill
+ miss1 = missing_marker(lar1)
+ miss2 = missing_marker(lar2)
# For loop initialization
label = []
@@ -128,7 +119,8 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
x1isview = True
x2isview = True
- # Loop: align one axis at a time
+ # Loop: align one axis at a time
+ msg = "`fill` type not compatible with larry dtype"
for ax in range(ndim):
list1 = label1[ax]
list2 = label2[ax]
@@ -159,12 +151,22 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
index2 = [slice(None)] * ndim
index1[ax] = idx1_miss
index2[ax] = idx2_miss
- try:
- x1[index1] = miss1
+ if len(idx1_miss) > 0:
+ if miss1 == NotImplemented:
+ if cast:
+ x1 = x1.astype(float)
+ miss1 = missing_marker(x1)
+ else:
+ raise TypeError, msg
+ x1[index1] = miss1
+ if len(idx2_miss) > 0:
+ if miss2 == NotImplemented:
+ if cast:
+ x2 = x2.astype(float)
+ miss2 = missing_marker(x2)
+ else:
+ raise TypeError, msg
x2[index2] = miss2
- except TypeError:
- msg = "`fill` type not compatible with larry dtype"
- raise TypeError, msg
x1isview = False
x2isview = False
elif joinax == 'left':
@@ -174,11 +176,14 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
x2 = x2.take(idx2, ax)
index2 = [slice(None)] * ndim
index2[ax] = idx2_miss
- try:
- x2[index2] = miss2
- except TypeError:
- msg = "`fill` type not compatible with larry dtype"
- raise TypeError, msg
+ if len(idx2_miss) > 0:
+ if miss2 == NotImplemented:
+ if cast:
+ x2 = x2.astype(float)
+ miss2 = missing_marker(x2)
+ else:
+ raise TypeError, msg
+ x2[index2] = miss2
x2isview = False
elif joinax == 'right':
list3 = list(list2)
@@ -187,11 +192,14 @@ def align(lar1, lar2, join='inner', fill='default', cast=True):
x1 = x1.take(idx1, ax)
index1 = [slice(None)] * ndim
index1[ax] = idx1_miss
- try:
- x1[index1] = miss1
- except TypeError:
- msg = "`fill` type not compatible with larry dtype"
- raise TypeError, msg
+ if len(idx1_miss) > 0:
+ if miss1 == NotImplemented:
+ if cast:
+ x1 = x1.astype(float)
+ miss1 = missing_marker(x1)
+ else:
+ raise TypeError, msg
+ x1[index1] = miss1
x1isview = False
else:
raise ValueError, 'join type not recognized'
View
148 la/tests/flarry_test.py
@@ -1,6 +1,7 @@
"Unit tests of larry functions."
import unittest
+import datetime
import numpy as np
nan = np.nan
@@ -110,9 +111,9 @@ def test_1d1(self):
y1 = larry([1, 2])
y2 = larry([1, 2, 3])
a1, a2 = align(y1, y2)
- d1 = larry([1, 2], dtype=float)
- d2 = larry([1, 2], dtype=float)
- msg = "align 1d #1 fail on %s larry"
+ d1 = larry([1, 2])
+ d2 = larry([1, 2])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -121,20 +122,20 @@ def test_1d2(self):
y1 = larry([1, 2])
y2 = larry([1, 2, 3])
a1, a2 = align(y1, y2, join='inner')
- d1 = larry([1, 2], dtype=float)
- d2 = larry([1, 2], dtype=float)
- msg = "align 1d #2 fail on %s larry"
+ d1 = larry([1, 2])
+ d2 = larry([1, 2])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_1d3(self):
"align 1d test #3"
y1 = larry([1, 2])
y2 = larry([1, 2, 3])
- a1, a2 = align(y1, y2, join='inner', fill=0)
+ a1, a2 = align(y1, y2, join='inner', cast=False)
d1 = larry([1, 2])
d2 = larry([1, 2])
- msg = "align 1d #3 fail on %s larry"
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -144,30 +145,25 @@ def test_1d4(self):
y2 = larry([1, 2, 3])
a1, a2 = align(y1, y2, join='outer')
d1 = larry([1, 2, nan], dtype=float)
- d2 = larry([1, 2, 3], dtype=float)
- msg = "align 1d #4 fail on %s larry"
+ d2 = larry([1, 2, 3])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_1d5(self):
"align 1d test #5"
y1 = larry([1, 2])
y2 = larry([1, 2, 3])
- a1, a2 = align(y1, y2, join='outer', fill=0)
- d1 = larry([1, 2, 0])
- d2 = larry([1, 2, 3])
- msg = "align 1d #5 fail on %s larry"
- ale(a1, d1, msg % 'left', original=y1)
- ale(a2, d2, msg % 'right', original=y2)
+ self.failUnlessRaises(TypeError, align, y1, y2, 'outer', False)
def test_1d6(self):
"align 1d test #6"
y1 = larry([1, 2])
y2 = larry([1, 2, 3])
a1, a2 = align(y1, y2, join='left')
- d1 = larry([1, 2], dtype=float)
- d2 = larry([1, 2], dtype=float)
- msg = "align 1d #6 fail on %s larry"
+ d1 = larry([1, 2])
+ d2 = larry([1, 2])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -177,8 +173,8 @@ def test_1d7(self):
y2 = larry([1, 2, 3])
a1, a2 = align(y1, y2, join='right')
d1 = larry([1, 2, nan], dtype=float)
- d2 = larry([1, 2, 3], dtype=float)
- msg = "align 1d #7 fail on %s larry"
+ d2 = larry([1, 2, 3])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -188,8 +184,8 @@ def test_1d8(self):
y2 = larry([1, 2, 3])
a1, a2 = align(y1, y2, join=['right'])
d1 = larry([1, 2, nan], dtype=float)
- d2 = larry([1, 2, 3], dtype=float)
- msg = "align 1d #8 fail on %s larry"
+ d2 = larry([1, 2, 3])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -198,9 +194,9 @@ def test_1d9(self):
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
a1, a2 = align(y1, y2)
- d1 = larry([])
- d2 = larry([])
- msg = "align 1d #9 fail on %s larry"
+ d1 = larry([], dtype=int)
+ d2 = larry([], dtype=int)
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -209,20 +205,20 @@ def test_1d10(self):
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
a1, a2 = align(y1, y2, join='inner')
- d1 = larry([])
- d2 = larry([])
- msg = "align 1d #10 fail on %s larry"
+ d1 = larry([], dtype=int)
+ d2 = larry([], dtype=int)
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_1d11(self):
"align 1d test #11"
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
- a1, a2 = align(y1, y2, join='inner', fill=0)
+ a1, a2 = align(y1, y2, join='inner')
d1 = larry([], dtype=int)
d2 = larry([], dtype=int)
- msg = "align 1d #11 fail on %s larry"
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -233,29 +229,24 @@ def test_1d12(self):
a1, a2 = align(y1, y2, join='outer')
d1 = larry([1, 2, nan, nan], [[0, 1, 'a', 'b']], dtype=float)
d2 = larry([nan, nan, 1, 2], [[0, 1, 'a', 'b']], dtype=float)
- msg = "align 1d #12 fail on %s larry"
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_1d13(self):
"align 1d test #13"
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
- a1, a2 = align(y1, y2, join='outer', fill=0)
- d1 = larry([1, 2, 0, 0], [[0, 1, 'a', 'b']])
- d2 = larry([0, 0, 1, 2], [[0, 1, 'a', 'b']])
- msg = "align 1d #13 fail on %s larry"
- ale(a1, d1, msg % 'left', original=y1)
- ale(a2, d2, msg % 'right', original=y2)
+ self.failUnlessRaises(TypeError, align, y1, y2, 'outer', False)
def test_1d14(self):
"align 1d test #14"
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
a1, a2 = align(y1, y2, join='left')
- d1 = larry([1, 2], dtype=float)
+ d1 = larry([1, 2])
d2 = larry([nan, nan])
- msg = "align 1d #14 fail on %s larry"
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -264,9 +255,9 @@ def test_1d15(self):
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
a1, a2 = align(y1, y2, join='right')
- d1 = larry([nan, nan], [['a', 'b']], dtype=float)
- d2 = larry([1, 2], [['a', 'b']], dtype=float)
- msg = "align 1d #15 fail on %s larry"
+ d1 = larry([nan, nan], [['a', 'b']])
+ d2 = larry([1, 2], [['a', 'b']])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -275,18 +266,17 @@ def test_1d16(self):
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
a1, a2 = align(y1, y2, join=['left'])
- d1 = larry([1, 2], dtype=float)
+ d1 = larry([1, 2])
d2 = larry([nan, nan])
- msg = "align 1d #16 fail on %s larry"
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_1d17(self):
"align 1d test #17"
y1 = larry([1, 2])
y2 = larry([1, 2], [['a', 'b']])
- self.failUnlessRaises(TypeError, align, y1, y2, 'outer', 'default',
- False)
+ self.failUnlessRaises(TypeError, align, y1, y2, 'outer', False)
def test_1d18(self):
"align 1d test #18"
@@ -295,7 +285,30 @@ def test_1d18(self):
a1, a2 = align(y1, y2, cast=False)
d1 = larry([1, 2])
d2 = larry([1, 2])
- msg = "align 1d #18 fail on %s larry"
+ msg = "align 1d fail on %s larry"
+ ale(a1, d1, msg % 'left', original=y1)
+ ale(a2, d2, msg % 'right', original=y2)
+
+ def test_1d19(self):
+ "align 1d test #19"
+ y1 = larry([True, False])
+ y2 = larry([True, False, True])
+ a1, a2 = align(y1, y2, join='outer')
+ d1 = larry([1, 0, nan], dtype=float)
+ d2 = larry([True, False, True])
+ msg = "align 1d fail on %s larry"
+ ale(a1, d1, msg % 'left', original=y1)
+ ale(a2, d2, msg % 'right', original=y2)
+
+ def test_1d20(self):
+ "align 1d test #20"
+ d = datetime.date
+ y1 = larry([d(2011,1,1), d(2011,1,2)])
+ y2 = larry([d(2011,1,3), d(2011,1,4), d(2011,1,5)])
+ a1, a2 = align(y1, y2, join='outer')
+ d1 = larry([d(2011,1,1), d(2011,1,2), None])
+ d2 = larry([d(2011,1,3), d(2011,1,4), d(2011,1,5)])
+ msg = "align 1d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -307,9 +320,9 @@ def test_2d1(self):
y1 = larry([[1, 2], [3, 4]])
y2 = larry([[1, 2, 5], [3, 4, 6]])
a1, a2 = align(y1, y2)
- d1 = larry([[1, 2], [3, 4]], dtype=float)
- d2 = larry([[1, 2], [3, 4]], dtype=float)
- msg = "align 2d #1 fail on %s larry"
+ d1 = larry([[1, 2], [3, 4]])
+ d2 = larry([[1, 2], [3, 4]])
+ msg = "align 2d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -318,20 +331,20 @@ def test_2d2(self):
y1 = larry([[1, 2], [3, 4]])
y2 = larry([[1, 2, 5], [3, 4, 6]])
a1, a2 = align(y1, y2, join='inner')
- d1 = larry([[1, 2], [3, 4]], dtype=float)
- d2 = larry([[1, 2], [3, 4]], dtype=float)
- msg = "align 2d #2 fail on %s larry"
+ d1 = larry([[1, 2], [3, 4]])
+ d2 = larry([[1, 2], [3, 4]])
+ msg = "align 2d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_2d3(self):
"align 2d test #3"
y1 = larry([[1, 2], [3, 4]])
y2 = larry([[1, 2, 5], [3, 4, 6]])
- a1, a2 = align(y1, y2, join='inner', fill=0)
+ a1, a2 = align(y1, y2, join='inner', cast=False)
d1 = larry([[1, 2], [3, 4]])
d2 = larry([[1, 2], [3, 4]])
- msg = "align 2d #3 fail on %s larry"
+ msg = "align 2d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -341,30 +354,25 @@ def test_2d4(self):
y2 = larry([[1, 2, 5], [3, 4, 6]])
a1, a2 = align(y1, y2, join='outer')
d1 = larry([[1, 2, nan], [3, 4, nan]], dtype=float)
- d2 = larry([[1, 2, 5], [3, 4, 6]], dtype=float)
- msg = "align 2d #4 fail on %s larry"
+ d2 = larry([[1, 2, 5], [3, 4, 6]])
+ msg = "align 2d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
def test_2d5(self):
"align 2d test #5"
y1 = larry([[1, 2], [3, 4]])
y2 = larry([[1, 2, 5], [3, 4, 6]])
- a1, a2 = align(y1, y2, join='outer', fill=0)
- d1 = larry([[1, 2, 0], [3, 4, 0]])
- d2 = larry([[1, 2, 5], [3, 4, 6]])
- msg = "align 2d #5 fail on %s larry"
- ale(a1, d1, msg % 'left', original=y1)
- ale(a2, d2, msg % 'right', original=y2)
+ self.failUnlessRaises(TypeError, align, y1, y2, 'outer', False)
def test_2d6(self):
"align 2d test #6"
y1 = larry([[1, 2], [3, 4]])
y2 = larry([[1, 2, 5], [3, 4, 6]])
- a1, a2 = align(y1, y2, join=['inner', 'outer'], fill=0)
- d1 = larry([[1, 2, 0], [3, 4, 0]])
- d2 = larry([[1, 2, 5], [3, 4, 6]])
- msg = "align 2d #6 fail on %s larry"
+ a1, a2 = align(y1, y2, join=['inner', 'outer'])
+ d1 = larry([[1, 2, nan], [3, 4, nan]])
+ d2 = larry([[1, 2, 5], [3, 4, 6]])
+ msg = "align 2d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)
@@ -373,7 +381,7 @@ def test_2d7(self):
y1 = larry([[1, 2], [3, 4]])
y2 = larry([[1, 2, 5], [3, 4, 6]])
j = ['outer', 'inner', 'left']
- self.failUnlessRaises(ValueError, align, y1, y2, j, 'default', False)
+ self.failUnlessRaises(ValueError, align, y1, y2, j, False)
def test_2d8(self):
"align 2d test #8"
@@ -382,7 +390,7 @@ def test_2d8(self):
a1, a2 = align(y1, y2, cast=False)
d1 = larry([[1, 2], [3, 4]])
d2 = larry([[1, 2], [3, 4]])
- msg = "align 2d #8 fail on %s larry"
+ msg = "align 2d fail on %s larry"
ale(a1, d1, msg % 'left', original=y1)
ale(a2, d2, msg % 'right', original=y2)

0 comments on commit 8b4a51b

Please sign in to comment.