Skip to content

Commit

Permalink
ENH: Series.align method to leverage faster joins, faster binary ops
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Sep 30, 2011
1 parent 6bac095 commit 56e0c9f
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 23 deletions.
1 change: 1 addition & 0 deletions bench/zoo_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ def sample(values, k):

lx = larry(np.random.randn(100000), [list(indices)])
ly = larry(np.random.randn(subsample_size), [list(y.index)])

40 changes: 24 additions & 16 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,21 +248,24 @@ def union(self, other):
if len(self) == 0:
return _ensure_index(other)

indexer = self.get_indexer(other)
indexer = (indexer == -1).nonzero()[0]

if len(indexer) > 0:
other_diff = other.values.take(indexer)
result = list(self) + list(other_diff)
if self.is_monotonic and other.is_monotonic:
result = lib.outer_join_indexer_object(self, other)[0]
else:
# contained in
result = list(self)
indexer = self.get_indexer(other)
indexer = (indexer == -1).nonzero()[0]

# timsort wins
try:
result.sort()
except Exception:
pass
if len(indexer) > 0:
other_diff = other.values.take(indexer)
result = list(self) + list(other_diff)
else:
# contained in
result = list(self)

# timsort wins
try:
result.sort()
except Exception:
pass

# for subclasses
return self._wrap_union_result(other, result)
Expand Down Expand Up @@ -292,9 +295,14 @@ def intersection(self, other):
if self.equals(other):
return self

indexer = self.get_indexer(other)
indexer = indexer.take((indexer != -1).nonzero()[0])
return self.take(indexer)
other = _ensure_index(other)

if self.is_monotonic and other.is_monotonic:
return Index(lib.inner_join_indexer_object(self, other)[0])
else:
indexer = self.get_indexer(other)
indexer = indexer.take((indexer != -1).nonzero()[0])
return self.take(indexer)

def diff(self, other):
"""
Expand Down
49 changes: 42 additions & 7 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,10 @@ def wrapper(self, other):
if self.index.equals(other.index):
return Series(op(self.values, other.values), index=self.index)

new_index = self.index + other.index
this_reindexed = self.reindex(new_index)
other_reindexed = other.reindex(new_index)
this_reindexed, other_reindexed = self.align(other, join='outer',
copy=False)
arr = op(this_reindexed.values, other_reindexed.values)
return Series(arr, index=new_index)
return Series(arr, index=this_reindexed.index)
elif isinstance(other, DataFrame):
return NotImplemented
else:
Expand Down Expand Up @@ -955,9 +954,8 @@ def _binop(self, other, func, fill_value=None):
this = self

if not self.index.equals(other.index):
new_index = self.index + other.index
this = self.reindex(new_index)
other = other.reindex(new_index)
this, other = self.align(other, join='outer')
new_index = this.index

this_vals = this.values
other_vals = other.values
Expand Down Expand Up @@ -1265,6 +1263,43 @@ def apply(self, func):
except Exception:
return Series([func(x) for x in self], index=self.index)

def align(self, other, join='outer', copy=True):
"""
Align two Series object with the specified join method
Parameters
----------
other : Series
join : {'outer', 'inner', 'left', 'right'}, default 'outer'
Returns
-------
(left, right) : (Series, Series)
Aligned Series
"""
join_index, lidx, ridx = self.index.join(other.index, how=join,
return_indexers=True)

if lidx is not None:
left = Series(common.take_1d(self.values, lidx), join_index)
else:
if copy:
new_values = self.values.copy()
else:
new_values = self.values
left = Series(new_values, join_index)

if ridx is not None:
right = Series(common.take_1d(other.values, ridx), join_index)
else:
if copy:
new_values = other.values.copy()
else:
new_values = other.values
right = Series(new_values, join_index)

return left, right

def reindex(self, index=None, method=None, copy=True):
"""Conform Series to new index with optional filling logic, placing
NA/NaN in locations having no value in the previous index. A new object
Expand Down

0 comments on commit 56e0c9f

Please sign in to comment.