diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 0e8920281e842..50535d2711708 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -325,11 +325,11 @@ def randomSplit(self, weights, seed=None): :return: split RDDs in a list >>> rdd = sc.parallelize(range(5), 1) - >>> rdd1, rdd2 = rdd.randomSplit([2, 3], 101) + >>> rdd1, rdd2 = rdd.randomSplit([2, 3], 17) >>> rdd1.collect() - [2, 3] + [1, 3] >>> rdd2.collect() - [0, 1, 4] + [0, 2, 4] """ s = float(sum(weights)) cweights = [0.0] diff --git a/python/pyspark/rddsampler.py b/python/pyspark/rddsampler.py index 4365640040116..558dcfd12d46f 100644 --- a/python/pyspark/rddsampler.py +++ b/python/pyspark/rddsampler.py @@ -119,6 +119,7 @@ class RDDRangeSampler(RDDSamplerBase): def __init__(self, lowerBound, upperBound, seed=None): RDDSamplerBase.__init__(self, False, seed) + self._use_numpy = False # no performance gain from numpy self._lowerBound = lowerBound self._upperBound = upperBound