Skip to content

Commit

Permalink
Added from_chunks param to rbind all
Browse files Browse the repository at this point in the history
  • Loading branch information
tgsmith61591 committed Jan 31, 2017
1 parent 41172c5 commit 73bb5c0
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 11 deletions.
6 changes: 4 additions & 2 deletions skutil/h2o/balance.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def balance(self, X):
# since H2O won't allow us to resample (it's considered rearranging)
# we need to rbind at each point of duplication... this can be pretty
# inefficient, so we might need to get clever about this...
Xb = reorder_h2o_frame(frame, _gen_optimized_chunks(sample_idcs))
Xb = reorder_h2o_frame(frame, _gen_optimized_chunks(sample_idcs), from_chunks=True)
return Xb


Expand Down Expand Up @@ -308,5 +308,7 @@ def balance(self, X):
# since there are no feature_names, we can just slice
# the h2o frame as is, given the indices:
idcs = partitioner.get_indices(self.shuffle)
Xb = frame[idcs, :] if not self.shuffle else reorder_h2o_frame(frame, _gen_optimized_chunks(idcs))
Xb = frame[idcs, :] if not self.shuffle else reorder_h2o_frame(frame,
_gen_optimized_chunks(idcs),
from_chunks=True)
return Xb
5 changes: 0 additions & 5 deletions skutil/h2o/fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,6 @@ def rbind_all(*args):
The rbound H2OFrame
"""
# lazily evaluate type on the h2o side
if isinstance(args, (tuple, list)):
lst = args[0]
if len(lst) == 1: # there's only one element
return lst[0]
return lst[0].rbind(lst[1:])
if len(args) == 1:
return args[0]
return args[0].rbind(args[1:])
12 changes: 8 additions & 4 deletions skutil/h2o/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def _gen_optimized_chunks(idcs):
return [sorted(chunk) for chunk in chunks]


def reorder_h2o_frame(X, idcs):
def reorder_h2o_frame(X, idcs, from_chunks=False):
"""Currently, H2O does not allow us to reorder
frames. This is a hack to rbind rows together in the
order prescribed.
Expand All @@ -399,6 +399,10 @@ def reorder_h2o_frame(X, idcs):
idcs : iterable
The order of the H2OFrame rows to be returned.
from_chunks : bool, optional (default=False)
Whether the elements in ``idcs`` are optimized chunks
generated by ``_gen_optimized_chunks``.
Returns
-------
Expand All @@ -417,7 +421,7 @@ def reorder_h2o_frame(X, idcs):

for i in idcs:
# if it's a chunk from balancer:
if hasattr(i, '__iter__'): # probably a list of indices
if from_chunks: # probably a list of indices
chunks.append(X[i, :])

# otherwise chunks have not been computed
Expand All @@ -438,10 +442,10 @@ def reorder_h2o_frame(X, idcs):
# append the chunk and reset the list
chunks.append(rows)
chunk = []
last_index = i
last_index = np.inf

# print([type(c) for c in chunks]) # couldn't figure out an issue for a while...
return chunks[0] if len(chunks) == 1 else rbind_all(*chunks)
return rbind_all(*chunks)


def shuffle_h2o_frame(X):
Expand Down

0 comments on commit 73bb5c0

Please sign in to comment.