Skip to content
Browse files

Fixed bugs

I've fixed the bugs detailed in the diff. One of the bugs was already
fixed on the local file (forgot to commit).
  • Loading branch information...
1 parent 42f8847 commit 1ecc221f841d898d831499042f5bd27f667d2ae1 @edisontung edisontung committed Jan 9, 2012
Showing with 16 additions and 7,500,008 deletions.
  1. +16 −14 core/src/main/scala/spark/RDD.scala
  2. +0 −7,499,994 kmeans_data.txt
View
30 core/src/main/scala/spark/RDD.scala
@@ -96,32 +96,34 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
var total = 0
var multiplier = 3.0
var initialCount = count()
-
+ var maxSelected = 0
+
+ if (initialCount > Integer.MAX_VALUE) {
+ maxSelected = Integer.MAX_VALUE
+ }
+ else {
+ maxSelected = initialCount.toInt
+ }
+
if (num > initialCount) {
- total = Math.min(initialCount, Integer.MAX_VALUE)
- total = total.toInt
- fraction = 1.0
+ total = maxSelected
+ fraction = Math.min(multiplier*(maxSelected+1)/initialCount, 1.0)
}
else if (num < 0) {
- throw(new IllegalArgumentException())
+ throw(new IllegalArgumentException("Negative number of elements requested"))
}
else {
- fraction = Math.min(multiplier*(num+1)/count(), 1.0)
+ fraction = Math.min(multiplier*(num+1)/initialCount, 1.0)
total = num.toInt
}
- var r = new SampledRDD(this, withReplacement, fraction, seed)
- var samples = r.collect()
+ var samples = this.sample(withReplacement, fraction, seed).collect()
while (samples.length < total) {
- r = new SampledRDD(this, withReplacement, fraction, seed)
+ samples = this.sample(withReplacement, fraction, seed).collect()
}
- var arr = new Array[T](total)
-
- for (i <- 0 to total - 1) {
- arr(i) = samples(i)
- }
+ val arr = samples.take(total)
return arr
}
View
7,499,994 kmeans_data.txt
0 additions, 7,499,994 deletions not shown because the diff is too large. Please use a local Git client to view these changes.

0 comments on commit 1ecc221

Please sign in to comment.
Something went wrong with that request. Please try again.