Merge 7efd8b6 into 6c2945a

rjagerman · Dec 9, 2016 · 472b958 · 472b958
2 parents 6c2945a + 7efd8b6
commit 472b958
Show file tree

Hide file tree

Showing 11 changed files with 174 additions and 119 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -8,5 +8,5 @@ branches:
   only:
     - master
 
-script: "travis_wait 30 sbt clean coverage test"
+script: "sbt clean coverage test"
 after_success: "sbt coverageReport coveralls"
diff --git a/build.sbt b/build.sbt
@@ -1,18 +1,20 @@
+organization := "nl.uva.science.ilps"
+
 name := "ScalaRank"
 
 version := "1.0"
 
 scalaVersion := "2.11.8"
 
 
-libraryDependencies += "org.nd4j" % "nd4j-native-platform" % "0.6.0" % Test
-
 libraryDependencies += "org.nd4j" %% "nd4s" % "0.6.0"
 
 libraryDependencies += "org.deeplearning4j" % "deeplearning4j-core" % "0.6.0"
 
+libraryDependencies += "org.nd4j" % "nd4j-native-platform" % "0.6.0"
+
 libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.1" % "test"
 
 classpathTypes += "maven-plugin"
 
-
+parallelExecution in Test := false
diff --git a/src/main/scala/scalarank/ranker/LinearRegressionRanker.scala b/src/main/scala/scalarank/ranker/LinearRegressionRanker.scala
@@ -30,7 +30,7 @@ import scalarank.datapoint.{Datapoint, Query, Relevance}
   */
 class LinearRegressionRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint : ClassTag](val features: Int,
                                                                                                      val seed: Int = 42,
-                                                                                                     val iterations: Int = 10,
+                                                                                                     val iterations: Int = 100,
                                                                                                      val learningRate: Double = 1e-3)
   extends Ranker[TrainType, RankType] {
 

diff --git a/src/main/scala/scalarank/ranker/RankNetRanker.scala b/src/main/scala/scalarank/ranker/RankNetRanker.scala
@@ -35,7 +35,7 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint
                                                                                             val σ: Double = 1.0,
                                                                                             val hidden: Array[Int] = Array(10),
                                                                                             val seed: Int = 42,
-                                                                                            val iterations: Int = 10,
+                                                                                            val iterations: Int = 20,
                                                                                             val learningRate: Double = 5e-5)
   extends Ranker[TrainType, RankType] {
 
@@ -65,7 +65,7 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint
         .nIn(in)
         .nOut(hidden(h))
         .activation("relu")
-        .weightInit(WeightInit.XAVIER)
+        .weightInit(WeightInit.RELU)
         .build())
       in = hidden(h)
     }
@@ -88,26 +88,11 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint
     */
   override def train(data: Iterable[Query[TrainType]]): Unit = {
 
-    for(t <- 0 until iterations) {
+    for (t <- 0 until iterations) {
       data.foreach { query =>
-        val datapoints = query.datapoints
-
-        // Iterate over datapoints in this query
-        for (i <- datapoints.indices) {
-
-          // Keep data point x_i fixed
-          val x_i = datapoints(i).features
-          val y_i = datapoints(i).relevance
-          val s_i = network.output(x_i)
-          loss.y_i = y_i
-          loss.s_i = s_i
-
-          // Train on all data points excluding x_i
-          val otherDatapoints = datapoints.zipWithIndex.filter(_._2 != i).map(_._1)
-          val X = toMatrix[TrainType](otherDatapoints)
-          val y = otherDatapoints.map(_.relevance).toNDArray
-          network.fit(X, y)
-        }
+        val X = toMatrix[TrainType](query.datapoints)
+        val y = query.datapoints.map(_.relevance).toNDArray
+        network.fit(X, y)
       }
     }
 
@@ -145,80 +130,100 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint
   */
 private class RankNetLoss(σ: Double = 1.0) extends ILossFunction {
 
-  /**
-    * Score of the current (pairwise) comparison sample x_i
-    */
-  var s_i: INDArray = Nd4j.zeros(1)
-
-  /**
-    * Label of the current (pairwise) comparison sample x_i
-    */
-  var y_i: Double = 0.0
-
   override def computeGradientAndScore(labels: INDArray,
                                        preOutput: INDArray,
                                        activationFn: String,
                                        mask: INDArray,
                                        average: Boolean): Pair[java.lang.Double, INDArray] = {
-    val s_j = output(preOutput, activationFn)
-    val S_ij = Sij(labels)
-    Pair.create(score(scoreArray(s_j, S_ij), average), gradient(s_j, S_ij))
+    val S_var = S(labels)
+    val sigma_var = sigma(output(preOutput, activationFn))
+    Pair.create(score(S_var, sigma_var, average), gradient(S_var, sigma_var))
   }
 
   override def computeGradient(labels: INDArray,
                                preOutput: INDArray,
                                activationFn: String,
                                mask: INDArray): INDArray = {
-    gradient(output(preOutput, activationFn), Sij(labels))
+    gradient(S(labels), sigma(output(preOutput, activationFn)))
   }
 
   override def computeScoreArray(labels: INDArray,
                                  preOutput: INDArray,
                                  activationFn: String,
                                  mask: INDArray): INDArray = {
-    scoreArray(output(preOutput, activationFn), Sij(labels))
+    scoreArray(S(labels), sigma(output(preOutput, activationFn)))
   }
 
   override def computeScore(labels: INDArray,
                             preOutput: INDArray,
                             activationFn: java.lang.String,
                             mask: INDArray,
                             average: Boolean): Double = {
-    score(scoreArray(output(preOutput, activationFn), Sij(labels)), average)
+    score(S(labels), sigma(output(preOutput, activationFn)), average)
+  }
+
+  /**
+    * Computes the gradient for the full ranking
+    *
+    * @param S The S_ij matrix, indicating whether certain elements should be ranked higher or lower
+    * @param sigma The sigma matrix, indicating how scores relate to each other
+    * @return The gradient
+    */
+  private def gradient(S: INDArray, sigma: INDArray): INDArray = {
+    Nd4j.mean(((-S + 1)*0.5 - sigmoid(-sigma)) * σ, 0).transpose
+  }
+
+  /**
+    * Computes the score for the full ranking
+    *
+    * @param S The S_ij matrix, indicating whether certain elements should be ranked higher or lower
+    * @param sigma The sigma matrix, indicating how scores relate to each other
+    * @return The score array
+    */
+  private def scoreArray(S: INDArray, sigma: INDArray): INDArray = {
+    Nd4j.mean((-S + 1) * 0.5 * sigma + log(exp(-sigma) + 1), 0)
   }
 
   /**
-    * Computes dC / ds_j, the derivative with respect to s_j, the network's outputs
+    * Computes an aggregate over the score, with either summing or averaging
     *
-    * @param s_j The outputs of the network
-    * @param S_ij The pairwise labels
-    * @return The derivative
+    * @param S The S_ij matrix, indicating whether certain elements should be ranked higher or lower
+    * @param sigma The sigma matrix, indicating how scores relate to each other
+    * @param average Whether to average or sum
+    * @return The score as a single value
     */
-  private def gradient(s_j: INDArray, S_ij: INDArray): INDArray = {
-    -(-sigmoid((-s_j + s_i) * -σ) + (-S_ij + 1) * 0.5) * σ
+  private def score(S: INDArray, sigma: INDArray, average: Boolean): Double = average match {
+    case true => Nd4j.mean(scoreArray(S, sigma))(0)
+    case false => Nd4j.sum(scoreArray(S, sigma))(0)
   }
 
   /**
-    * Computes the score as an average or sum
+    * Computes the matrix S_ij, which indicates wheter certain elements should be ranked higher or lower
+    *
+    * S_ij = {
+    *    1.0   if y_i > y_j
+    *    0.0   if y_i = y_j
+    *   -1.0   if y_i < y_j
+    * }
     *
-    * @param scoreArray The array of scores
-    * @param average Whether to average or not
-    * @return The cost as a single numerical score
+    * @param labels The labels
+    * @return The S_ij matrix
     */
-  private def score(scoreArray: INDArray, average: Boolean): Double = average match {
-    case true => Nd4j.mean(scoreArray)(0)
-    case false => Nd4j.sum(scoreArray)(0)
+  private def S(labels: INDArray): INDArray = {
+    val labelMatrix = labels.transpose.mmul(Nd4j.ones(labels.rows, labels.columns)) - Nd4j.ones(labels.columns, labels.rows).mmul(labels)
+    labelMatrix.gt(0) - labelMatrix.lt(0)
   }
 
   /**
-    * Computes the score array
+    * Computes the sigma matrix, which indicates how scores relate to each other
     *
-    * @param s_j The output of the network for every j
-    * @param S_ij The label comparison S_ij
-    * @return The cost function array per sample
+    * sigma_ij = σ * (s_i - s_j)
+    *
+    * @param outputs The signal outputs from the network
+    * @return The sigma matrix
     */
-  private def scoreArray(s_j: INDArray, S_ij: INDArray): INDArray = {
-    ((-S_ij - 1) * 0.5 * (-s_j + s_i) * σ) + log(exp((-s_j + s_i) * -σ) + 1)
+  private def sigma(outputs: INDArray): INDArray = {
+    (outputs.transpose.mmul(Nd4j.ones(outputs.rows, outputs.columns)) - Nd4j.ones(outputs.columns, outputs.rows).mmul(outputs)) * σ
   }
 
   /**
@@ -232,17 +237,5 @@ private class RankNetLoss(σ: Double = 1.0) extends ILossFunction {
     Nd4j.getExecutioner.execAndReturn(Nd4j.getOpFactory.createTransform(activationFn, preOutput.dup))
   }
 
-  /**
-    * Computes S_ij = {
-    *    1.0  if y_i < y_j
-    *    0.0  if y_i = y_j
-    *   -1.0  if y_i > y_j
-    * }
-    *
-    * @param labels The labels y_j
-    * @return Array with values in {0, -1.0, 1.0}
-    */
-  private def Sij(labels: INDArray): INDArray = labels.gt(y_i) - labels.lt(y_i)
-
 }
 
diff --git a/src/test/scala/scalarank/TestData.scala b/src/test/scala/scalarank/TestData.scala
@@ -1,6 +1,7 @@
 package scalarank
 
 import org.nd4j.linalg.api.ndarray.INDArray
+import org.nd4s.Implicits._
 
 import scalarank.datapoint.{Datapoint, Query, Relevance, SVMRankDatapoint}
 
@@ -30,6 +31,17 @@ object TestData {
     }.toIndexedSeq
   }
 
+  /**
+    * A test data point with a dense feature vector
+    *
+    * @param f The features as an array of doubles
+    * @param r The relevance
+    */
+  class TestDatapoint(f: Array[Double], r: Double) extends Datapoint with Relevance {
+    override val features: INDArray = f.toNDArray
+    override val relevance: Double = r
+  }
+
   /**
     * A datapoint with relevance that does not contain features
     *

diff --git a/src/test/scala/scalarank/ranker/GradientCheck.scala b/src/test/scala/scalarank/ranker/GradientCheck.scala
@@ -0,0 +1,32 @@
+package scalarank.ranker
+
+import org.nd4j.linalg.api.ndarray.INDArray
+import org.nd4j.linalg.factory.Nd4j
+import org.nd4s.Implicits._
+
+/**
+  * Test trait for checking gradient functions
+  */
+trait GradientCheck {
+
+  /**
+    * Computes the gradient limit: lim h→0 (‖f(x+h) - f(x) - ∇f(x) · h‖ / ‖h‖)
+    *
+    * @param gradient The gradient (as a vector)
+    * @param x The input to compute said gradient (as a vector)
+    * @param function The function over which the gradient is computed
+    * @return The limit
+    */
+  def gradientLimits(gradient: INDArray, x: INDArray, function: INDArray => INDArray): Array[Double] = {
+    val rand = Nd4j.randn(x.rows, x.columns)
+    Array(1e1, 1, 1e-1, 1e-2).map { ε =>
+      (0 until x.columns).map { i =>
+        val e = Nd4j.zeros(x.columns)
+        e(i) = 1.0
+        val approximateGradient = (function(x + e * ε) - function(x - e * ε)) / (2*ε)
+        Math.abs(approximateGradient(i) - gradient(i))
+      }.sum
+    }
+  }
+
+}
diff --git a/src/test/scala/scalarank/ranker/LinearRegressionRankerSpec.scala b/src/test/scala/scalarank/ranker/LinearRegressionRankerSpec.scala
@@ -4,12 +4,15 @@ import org.scalatest.FlatSpec
 
 import scalarank.{TestData, metrics}
 import scalarank.datapoint.SVMRankDatapoint
+import scalarank.metrics._
 
 /**
   * Test specification for the Linear Regression ranker
   */
-class LinearRegressionRankerSpec extends FlatSpec {
-
+class LinearRegressionRankerSpec extends RankerSpec {
 
+  "A LinearRegression Ranker" should "report appropriate nDCG results on MQ2008 Fold 1" in {
+    testRanker(new LinearRegressionRanker(featureSize, seed=42), ndcg, "nDCG")
+  }
 
 }
diff --git a/src/test/scala/scalarank/ranker/OracleRankerSpec.scala b/src/test/scala/scalarank/ranker/OracleRankerSpec.scala
@@ -4,11 +4,12 @@ import org.scalatest.FlatSpec
 
 import scalarank.{TestData, metrics}
 import scalarank.datapoint.{Datapoint, Relevance}
+import scalarank.metrics._
 
 /**
   * Test specification for the Oracle ranker
   */
-class OracleRankerSpec extends FlatSpec {
+class OracleRankerSpec extends RankerSpec {
 
   "An Oracle ranker" should "rank perfectly on our test data" in {
     val oracle = new OracleRanker[Datapoint with Relevance]
@@ -26,4 +27,8 @@ class OracleRankerSpec extends FlatSpec {
     assert(metrics.ndcg(ranking) == 1.0)
   }
 
+  it should "report appropriate nDCG results on MQ2008 Fold 1" in {
+    testRanker(new OracleRanker(), ndcg, "nDCG")
+  }
+
 }
diff --git a/src/test/scala/scalarank/ranker/RandomRankerSpec.scala b/src/test/scala/scalarank/ranker/RandomRankerSpec.scala
@@ -0,0 +1,14 @@
+package scalarank.ranker
+
+import scalarank.metrics._
+
+/**
+  * Test specification for the Random ranker
+  */
+class RandomRankerSpec extends RankerSpec {
+
+  "A random ranker" should "report appropriate nDCG results on MQ2008 Fold 1" in {
+    testRanker(new RandomRanker(42), ndcg, "nDCG")
+  }
+
+}