Skip to content

Commit

Permalink
replace random split with sliding
Browse files Browse the repository at this point in the history
  • Loading branch information
hhbyyh committed Feb 10, 2015
1 parent fa408a8 commit 0d0f3ee
Showing 1 changed file with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
import org.apache.spark.mllib.linalg.{Vector, DenseVector, SparseVector, Matrices}
import org.apache.spark.rdd.RDD
import org.apache.spark.util.Utils
import org.apache.spark.mllib.rdd.RDDFunctions._


/**
Expand Down Expand Up @@ -430,8 +431,7 @@ private[clustering] object LDA {
else if (D / 1000 < 4) 4
else D / 1000
val batchNumber = (D/batchSize + 1).toInt
// todo: performance killer, need to be replaced
private val batches = documents.randomSplit(Array.fill[Double](batchNumber)(1.0))
private val batches = documents.sliding(batchNumber).collect()

// Initialize the variational distribution q(beta|lambda)
var _lambda = getGammaMatrix(k, vocabSize) // K * V
Expand Down

0 comments on commit 0d0f3ee

Please sign in to comment.