diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 34b9495955134..b0e14cb8296a6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -464,9 +464,8 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
   /**
    * Update alpha based on `gammat`, the inferred topic distributions for documents in the
-   * current mini-batch.
-   * Uses Newton's method,
-   * @see Huang: Maximum Likelihood Estimation of Dirichlet Distribution Parameters
+   * current mini-batch. Uses Newton-Rhapson method.
+   * @see Section 3.3, Huang: Maximum Likelihood Estimation of Dirichlet Distribution Parameters
    *      (http://jonathan-huang.org/research/dirichlet/dirichlet.pdf)
    */
   private def updateAlpha(gammat: BDM[Double]): Unit = {
@@ -478,7 +477,6 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
     val c = N * trigamma(sum(alpha))
     val q = -N * trigamma(alpha)
-
     val b = sum(gradf / q) / (1D / c + sum(1D / q))
 
     val dalpha = -(gradf - b) / q
@@ -490,13 +488,13 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
   }
 
 
-  /** Calculates learning rate rho, which decays as a function of [[iteration]] */
+  /** Calculate learning rate rho for the current [[iteration]]. */
   private def rho(): Double = {
     math.pow(getTau0 + this.iteration, -getKappa)
   }
 
   /**
-   * Get a random matrix to initialize lambda
+   * Get a random matrix to initialize lambda.
    */
   private def getGammaMatrix(row: Int, col: Int): BDM[Double] = {
     val randBasis = new RandBasis(new org.apache.commons.math3.random.MersenneTwister(