Skip to content

Commit

Permalink
Doc formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
freeman-lab committed Oct 29, 2014
1 parent ea22ec8 commit 1472ec5
Showing 1 changed file with 19 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@ import org.apache.spark.util.Utils

/**
* :: DeveloperApi ::
*
* StreamingKMeansModel extends MLlib's KMeansModel for streaming
* algorithms, so it can keep track of the number of points assigned
* to each cluster, and also update the model by doing a single iteration
* of the standard KMeans algorithm.
* of the standard k-means algorithm.
*
* The update algorithm uses the "mini-batch" KMeans rule,
* generalized to incorporate forgetfullness (i.e. decay).
Expand All @@ -63,22 +62,13 @@ import org.apache.spark.util.Utils
* if 'batches', behavior will be independent of the number of points per batch;
* if 'points', the expected number of points per batch must be specified.
*
* Use a builder pattern to construct a streaming KMeans analysis
* in an application, like:
*
* val model = new StreamingKMeans()
* .setDecayFactor(0.5)
* .setK(3)
* .setRandomCenters(5)
* .trainOn(DStream)
*
*/
@DeveloperApi
class StreamingKMeansModel(
override val clusterCenters: Array[Vector],
val clusterCounts: Array[Long]) extends KMeansModel(clusterCenters) with Logging {

// do a sequential KMeans update on a batch of data
/** Perform a k-means update on a batch of data. */
def update(data: RDD[Vector], a: Double, units: String): StreamingKMeansModel = {

val centers = clusterCenters
Expand Down Expand Up @@ -125,7 +115,22 @@ class StreamingKMeansModel(
}

}

/**
* :: DeveloperApi ::
* StreamingKMeans provides methods for configuring a
* streaming k-means analysis, training the model on streaming,
* and using the model to make predictions on streaming data.
* See KMeansModel for details on algorithm and update rules.
*
* Use a builder pattern to construct a streaming k-means analysis
* in an application, like:
*
* val model = new StreamingKMeans()
* .setDecayFactor(0.5)
* .setK(3)
* .setRandomCenters(5)
* .trainOn(DStream)
*/
@DeveloperApi
class StreamingKMeans(
var k: Int,
Expand Down Expand Up @@ -171,7 +176,7 @@ class StreamingKMeans(
this
}

/** Specify initial explicitly directly. */
/** Specify initial centers directly. */
def setInitialCenters(initialCenters: Array[Vector]): this.type = {
val clusterCounts = Array.fill(this.k)(0).map(_.toLong)
this.model = new StreamingKMeansModel(initialCenters, clusterCounts)
Expand Down

0 comments on commit 1472ec5

Please sign in to comment.