Skip to content

Commit

Permalink
fix tests, do not use numpy in randomSplit, no performance gain
Browse files Browse the repository at this point in the history
  • Loading branch information
Davies Liu committed Nov 13, 2014
1 parent f5fdf63 commit 78bf997
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
6 changes: 3 additions & 3 deletions python/pyspark/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,11 @@ def randomSplit(self, weights, seed=None):
:return: split RDDs in a list
>>> rdd = sc.parallelize(range(5), 1)
>>> rdd1, rdd2 = rdd.randomSplit([2, 3], 101)
>>> rdd1, rdd2 = rdd.randomSplit([2, 3], 17)
>>> rdd1.collect()
[2, 3]
[1, 3]
>>> rdd2.collect()
[0, 1, 4]
[0, 2, 4]
"""
s = float(sum(weights))
cweights = [0.0]
Expand Down
1 change: 1 addition & 0 deletions python/pyspark/rddsampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class RDDRangeSampler(RDDSamplerBase):

def __init__(self, lowerBound, upperBound, seed=None):
RDDSamplerBase.__init__(self, False, seed)
self._use_numpy = False # no performance gain from numpy
self._lowerBound = lowerBound
self._upperBound = upperBound

Expand Down

0 comments on commit 78bf997

Please sign in to comment.