Permalink
Browse files

Only group elements ten at a time into SequenceFile records in

saveAsObjectFile
  • Loading branch information...
1 parent 3d24281 commit 5a7b3702253cf2d1936ba321680208dccec2095a @mateiz mateiz committed Oct 4, 2012
Showing with 1 addition and 1 deletion.
  1. +1 −1 core/src/main/scala/spark/RDD.scala
@@ -256,7 +256,7 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) extends Serial
}
def saveAsObjectFile(path: String) {
- this.glom
+ this.mapPartitions(iter => iter.grouped(10).map(_.toArray))
.map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x))))
.saveAsSequenceFile(path)
}

0 comments on commit 5a7b370

Please sign in to comment.