diff --git a/.travis.yml b/.travis.yml index 0a0daae..ebf1611 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,5 +8,5 @@ branches: only: - master -script: "travis_wait 30 sbt clean coverage test" +script: "sbt clean coverage test" after_success: "sbt coverageReport coveralls" diff --git a/build.sbt b/build.sbt index 05391dd..b910be3 100644 --- a/build.sbt +++ b/build.sbt @@ -1,3 +1,5 @@ +organization := "nl.uva.science.ilps" + name := "ScalaRank" version := "1.0" @@ -5,14 +7,14 @@ version := "1.0" scalaVersion := "2.11.8" -libraryDependencies += "org.nd4j" % "nd4j-native-platform" % "0.6.0" % Test - libraryDependencies += "org.nd4j" %% "nd4s" % "0.6.0" libraryDependencies += "org.deeplearning4j" % "deeplearning4j-core" % "0.6.0" +libraryDependencies += "org.nd4j" % "nd4j-native-platform" % "0.6.0" + libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.1" % "test" classpathTypes += "maven-plugin" - +parallelExecution in Test := false diff --git a/src/main/scala/scalarank/ranker/LinearRegressionRanker.scala b/src/main/scala/scalarank/ranker/LinearRegressionRanker.scala index ee57e7a..6d2759b 100644 --- a/src/main/scala/scalarank/ranker/LinearRegressionRanker.scala +++ b/src/main/scala/scalarank/ranker/LinearRegressionRanker.scala @@ -30,7 +30,7 @@ import scalarank.datapoint.{Datapoint, Query, Relevance} */ class LinearRegressionRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint : ClassTag](val features: Int, val seed: Int = 42, - val iterations: Int = 10, + val iterations: Int = 100, val learningRate: Double = 1e-3) extends Ranker[TrainType, RankType] { diff --git a/src/main/scala/scalarank/ranker/RankNetRanker.scala b/src/main/scala/scalarank/ranker/RankNetRanker.scala index a935fd3..4260fd9 100644 --- a/src/main/scala/scalarank/ranker/RankNetRanker.scala +++ b/src/main/scala/scalarank/ranker/RankNetRanker.scala @@ -35,7 +35,7 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint val σ: Double = 1.0, val hidden: Array[Int] = Array(10), val seed: Int = 42, - val iterations: Int = 10, + val iterations: Int = 20, val learningRate: Double = 5e-5) extends Ranker[TrainType, RankType] { @@ -65,7 +65,7 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint .nIn(in) .nOut(hidden(h)) .activation("relu") - .weightInit(WeightInit.XAVIER) + .weightInit(WeightInit.RELU) .build()) in = hidden(h) } @@ -88,26 +88,11 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint */ override def train(data: Iterable[Query[TrainType]]): Unit = { - for(t <- 0 until iterations) { + for (t <- 0 until iterations) { data.foreach { query => - val datapoints = query.datapoints - - // Iterate over datapoints in this query - for (i <- datapoints.indices) { - - // Keep data point x_i fixed - val x_i = datapoints(i).features - val y_i = datapoints(i).relevance - val s_i = network.output(x_i) - loss.y_i = y_i - loss.s_i = s_i - - // Train on all data points excluding x_i - val otherDatapoints = datapoints.zipWithIndex.filter(_._2 != i).map(_._1) - val X = toMatrix[TrainType](otherDatapoints) - val y = otherDatapoints.map(_.relevance).toNDArray - network.fit(X, y) - } + val X = toMatrix[TrainType](query.datapoints) + val y = query.datapoints.map(_.relevance).toNDArray + network.fit(X, y) } } @@ -145,38 +130,28 @@ class RankNetRanker[TrainType <: Datapoint with Relevance,RankType <: Datapoint */ private class RankNetLoss(σ: Double = 1.0) extends ILossFunction { - /** - * Score of the current (pairwise) comparison sample x_i - */ - var s_i: INDArray = Nd4j.zeros(1) - - /** - * Label of the current (pairwise) comparison sample x_i - */ - var y_i: Double = 0.0 - override def computeGradientAndScore(labels: INDArray, preOutput: INDArray, activationFn: String, mask: INDArray, average: Boolean): Pair[java.lang.Double, INDArray] = { - val s_j = output(preOutput, activationFn) - val S_ij = Sij(labels) - Pair.create(score(scoreArray(s_j, S_ij), average), gradient(s_j, S_ij)) + val S_var = S(labels) + val sigma_var = sigma(output(preOutput, activationFn)) + Pair.create(score(S_var, sigma_var, average), gradient(S_var, sigma_var)) } override def computeGradient(labels: INDArray, preOutput: INDArray, activationFn: String, mask: INDArray): INDArray = { - gradient(output(preOutput, activationFn), Sij(labels)) + gradient(S(labels), sigma(output(preOutput, activationFn))) } override def computeScoreArray(labels: INDArray, preOutput: INDArray, activationFn: String, mask: INDArray): INDArray = { - scoreArray(output(preOutput, activationFn), Sij(labels)) + scoreArray(S(labels), sigma(output(preOutput, activationFn))) } override def computeScore(labels: INDArray, @@ -184,41 +159,71 @@ private class RankNetLoss(σ: Double = 1.0) extends ILossFunction { activationFn: java.lang.String, mask: INDArray, average: Boolean): Double = { - score(scoreArray(output(preOutput, activationFn), Sij(labels)), average) + score(S(labels), sigma(output(preOutput, activationFn)), average) + } + + /** + * Computes the gradient for the full ranking + * + * @param S The S_ij matrix, indicating whether certain elements should be ranked higher or lower + * @param sigma The sigma matrix, indicating how scores relate to each other + * @return The gradient + */ + private def gradient(S: INDArray, sigma: INDArray): INDArray = { + Nd4j.mean(((-S + 1)*0.5 - sigmoid(-sigma)) * σ, 0).transpose + } + + /** + * Computes the score for the full ranking + * + * @param S The S_ij matrix, indicating whether certain elements should be ranked higher or lower + * @param sigma The sigma matrix, indicating how scores relate to each other + * @return The score array + */ + private def scoreArray(S: INDArray, sigma: INDArray): INDArray = { + Nd4j.mean((-S + 1) * 0.5 * sigma + log(exp(-sigma) + 1), 0) } /** - * Computes dC / ds_j, the derivative with respect to s_j, the network's outputs + * Computes an aggregate over the score, with either summing or averaging * - * @param s_j The outputs of the network - * @param S_ij The pairwise labels - * @return The derivative + * @param S The S_ij matrix, indicating whether certain elements should be ranked higher or lower + * @param sigma The sigma matrix, indicating how scores relate to each other + * @param average Whether to average or sum + * @return The score as a single value */ - private def gradient(s_j: INDArray, S_ij: INDArray): INDArray = { - -(-sigmoid((-s_j + s_i) * -σ) + (-S_ij + 1) * 0.5) * σ + private def score(S: INDArray, sigma: INDArray, average: Boolean): Double = average match { + case true => Nd4j.mean(scoreArray(S, sigma))(0) + case false => Nd4j.sum(scoreArray(S, sigma))(0) } /** - * Computes the score as an average or sum + * Computes the matrix S_ij, which indicates wheter certain elements should be ranked higher or lower + * + * S_ij = { + * 1.0 if y_i > y_j + * 0.0 if y_i = y_j + * -1.0 if y_i < y_j + * } * - * @param scoreArray The array of scores - * @param average Whether to average or not - * @return The cost as a single numerical score + * @param labels The labels + * @return The S_ij matrix */ - private def score(scoreArray: INDArray, average: Boolean): Double = average match { - case true => Nd4j.mean(scoreArray)(0) - case false => Nd4j.sum(scoreArray)(0) + private def S(labels: INDArray): INDArray = { + val labelMatrix = labels.transpose.mmul(Nd4j.ones(labels.rows, labels.columns)) - Nd4j.ones(labels.columns, labels.rows).mmul(labels) + labelMatrix.gt(0) - labelMatrix.lt(0) } /** - * Computes the score array + * Computes the sigma matrix, which indicates how scores relate to each other * - * @param s_j The output of the network for every j - * @param S_ij The label comparison S_ij - * @return The cost function array per sample + * sigma_ij = σ * (s_i - s_j) + * + * @param outputs The signal outputs from the network + * @return The sigma matrix */ - private def scoreArray(s_j: INDArray, S_ij: INDArray): INDArray = { - ((-S_ij - 1) * 0.5 * (-s_j + s_i) * σ) + log(exp((-s_j + s_i) * -σ) + 1) + private def sigma(outputs: INDArray): INDArray = { + (outputs.transpose.mmul(Nd4j.ones(outputs.rows, outputs.columns)) - Nd4j.ones(outputs.columns, outputs.rows).mmul(outputs)) * σ } /** @@ -232,17 +237,5 @@ private class RankNetLoss(σ: Double = 1.0) extends ILossFunction { Nd4j.getExecutioner.execAndReturn(Nd4j.getOpFactory.createTransform(activationFn, preOutput.dup)) } - /** - * Computes S_ij = { - * 1.0 if y_i < y_j - * 0.0 if y_i = y_j - * -1.0 if y_i > y_j - * } - * - * @param labels The labels y_j - * @return Array with values in {0, -1.0, 1.0} - */ - private def Sij(labels: INDArray): INDArray = labels.gt(y_i) - labels.lt(y_i) - } diff --git a/src/test/scala/scalarank/TestData.scala b/src/test/scala/scalarank/TestData.scala index 40f5817..bfb5cfa 100644 --- a/src/test/scala/scalarank/TestData.scala +++ b/src/test/scala/scalarank/TestData.scala @@ -1,6 +1,7 @@ package scalarank import org.nd4j.linalg.api.ndarray.INDArray +import org.nd4s.Implicits._ import scalarank.datapoint.{Datapoint, Query, Relevance, SVMRankDatapoint} @@ -30,6 +31,17 @@ object TestData { }.toIndexedSeq } + /** + * A test data point with a dense feature vector + * + * @param f The features as an array of doubles + * @param r The relevance + */ + class TestDatapoint(f: Array[Double], r: Double) extends Datapoint with Relevance { + override val features: INDArray = f.toNDArray + override val relevance: Double = r + } + /** * A datapoint with relevance that does not contain features * diff --git a/src/test/scala/scalarank/ranker/GradientCheck.scala b/src/test/scala/scalarank/ranker/GradientCheck.scala new file mode 100644 index 0000000..eb6c4b9 --- /dev/null +++ b/src/test/scala/scalarank/ranker/GradientCheck.scala @@ -0,0 +1,32 @@ +package scalarank.ranker + +import org.nd4j.linalg.api.ndarray.INDArray +import org.nd4j.linalg.factory.Nd4j +import org.nd4s.Implicits._ + +/** + * Test trait for checking gradient functions + */ +trait GradientCheck { + + /** + * Computes the gradient limit: lim h→0 (‖f(x+h) - f(x) - ∇f(x) · h‖ / ‖h‖) + * + * @param gradient The gradient (as a vector) + * @param x The input to compute said gradient (as a vector) + * @param function The function over which the gradient is computed + * @return The limit + */ + def gradientLimits(gradient: INDArray, x: INDArray, function: INDArray => INDArray): Array[Double] = { + val rand = Nd4j.randn(x.rows, x.columns) + Array(1e1, 1, 1e-1, 1e-2).map { ε => + (0 until x.columns).map { i => + val e = Nd4j.zeros(x.columns) + e(i) = 1.0 + val approximateGradient = (function(x + e * ε) - function(x - e * ε)) / (2*ε) + Math.abs(approximateGradient(i) - gradient(i)) + }.sum + } + } + +} diff --git a/src/test/scala/scalarank/ranker/LinearRegressionRankerSpec.scala b/src/test/scala/scalarank/ranker/LinearRegressionRankerSpec.scala index 9786aeb..6ce4bb4 100644 --- a/src/test/scala/scalarank/ranker/LinearRegressionRankerSpec.scala +++ b/src/test/scala/scalarank/ranker/LinearRegressionRankerSpec.scala @@ -4,12 +4,15 @@ import org.scalatest.FlatSpec import scalarank.{TestData, metrics} import scalarank.datapoint.SVMRankDatapoint +import scalarank.metrics._ /** * Test specification for the Linear Regression ranker */ -class LinearRegressionRankerSpec extends FlatSpec { - +class LinearRegressionRankerSpec extends RankerSpec { + "A LinearRegression Ranker" should "report appropriate nDCG results on MQ2008 Fold 1" in { + testRanker(new LinearRegressionRanker(featureSize, seed=42), ndcg, "nDCG") + } } diff --git a/src/test/scala/scalarank/ranker/OracleRankerSpec.scala b/src/test/scala/scalarank/ranker/OracleRankerSpec.scala index 094fccb..85f0d94 100644 --- a/src/test/scala/scalarank/ranker/OracleRankerSpec.scala +++ b/src/test/scala/scalarank/ranker/OracleRankerSpec.scala @@ -4,11 +4,12 @@ import org.scalatest.FlatSpec import scalarank.{TestData, metrics} import scalarank.datapoint.{Datapoint, Relevance} +import scalarank.metrics._ /** * Test specification for the Oracle ranker */ -class OracleRankerSpec extends FlatSpec { +class OracleRankerSpec extends RankerSpec { "An Oracle ranker" should "rank perfectly on our test data" in { val oracle = new OracleRanker[Datapoint with Relevance] @@ -26,4 +27,8 @@ class OracleRankerSpec extends FlatSpec { assert(metrics.ndcg(ranking) == 1.0) } + it should "report appropriate nDCG results on MQ2008 Fold 1" in { + testRanker(new OracleRanker(), ndcg, "nDCG") + } + } diff --git a/src/test/scala/scalarank/ranker/RandomRankerSpec.scala b/src/test/scala/scalarank/ranker/RandomRankerSpec.scala new file mode 100644 index 0000000..947cb14 --- /dev/null +++ b/src/test/scala/scalarank/ranker/RandomRankerSpec.scala @@ -0,0 +1,14 @@ +package scalarank.ranker + +import scalarank.metrics._ + +/** + * Test specification for the Random ranker + */ +class RandomRankerSpec extends RankerSpec { + + "A random ranker" should "report appropriate nDCG results on MQ2008 Fold 1" in { + testRanker(new RandomRanker(42), ndcg, "nDCG") + } + +} diff --git a/src/test/scala/scalarank/ranker/RankNetRankerSpec.scala b/src/test/scala/scalarank/ranker/RankNetRankerSpec.scala index c2295f0..7474df7 100644 --- a/src/test/scala/scalarank/ranker/RankNetRankerSpec.scala +++ b/src/test/scala/scalarank/ranker/RankNetRankerSpec.scala @@ -3,61 +3,71 @@ package scalarank.ranker import org.nd4j.linalg.api.ndarray.INDArray import org.nd4j.linalg.factory.Nd4j import org.nd4s.Implicits._ -import org.scalatest.FlatSpec +import org.scalatest.{FlatSpec, Matchers} import scalarank.datapoint.SVMRankDatapoint +import scalarank.metrics._ import scalarank.{TestData, metrics} /** * Test specification for the Linear Regression ranker */ -class RankNetRankerSpec extends FlatSpec { +class RankNetRankerSpec extends RankerSpec with GradientCheck with Matchers { - "A RankNet loss function" should "be close to 0 when correctly predicted" in { + "A RankNet ranker" should "report appropriate nDCG results on MQ2008 Fold 1" in { + testRanker(new RankNetRanker(featureSize, seed=42), ndcg, "nDCG") + } - // For x_i output 5.0 and set its label as 5.0 - val s_i = Nd4j.create(Array(5.0)) - val y_i = 5.0 + "A RankNet loss function" should "be approximately log(2) when correctly predicted" in { // Create loss val loss = new RankNetLoss() - loss.s_i = s_i - loss.y_i = y_i - // For x_j values predict 0.0 and set outputs to 0.0 + // Single correctly predicted value val labels = Nd4j.create(Array(0.0, 0.0, 0.0)) val outputs = Nd4j.create(Array(0.0, 0.0, 0.0)) // Compute cost val cost = loss.computeScore(labels, outputs, "identity", null, true) - assert(cost < 0.01) + assert(Math.abs(cost - Math.log(2.0)) < 0.0000001) } - it should "have a gradient ∇ for which: lim h→0 (‖f(x+h) - f(x) - ∇f(x) · h‖ / ‖h‖) ≈ 0" in { - - // For x_i output 5.0 and set its label as 5.0 - val s_i = Nd4j.create(Array(5.0)) - val y_i = 5.0 + it should "succesfully perform the gradient limit check" in { // Create loss val loss = new RankNetLoss() - loss.s_i = s_i - loss.y_i = y_i // Set up labels and x sample data val labels = Nd4j.create(Array(0.0, 1.0, 0.0, 4.0)) val x = Nd4j.create(Array(0.1, -2.0, 7.0, 3.4)) - // Set up gradient check - val ε = 1e-5 - val grad = loss.computeGradient(labels, x, "identity", null) + 1 // + 1 for identity activation function + // Check gradient + val grad = -loss.computeGradient(labels, x, "identity", null) def f(x: INDArray): INDArray = loss.computeScoreArray(labels, x, "identity", null) - val h = Nd4j.create(Array(1.0, -1.0, 1.3, -2.0)) * ε + val limits = gradientLimits(grad, x, f) + info(limits.mkString(" > ")) + limits.sliding(2).foreach { case Array(l1, l2) => assert(l1 > l2) } + + } + + it should "succesfully compute both the gradient and cost" in { + + // Create loss + val loss = new RankNetLoss() + + // Set up labels and x sample data + val labels = Nd4j.create(Array(0.0, 1.0, 0.0, 4.0)) + val x = Nd4j.create(Array(0.1, -2.0, 7.0, 3.4)) + + // Compute the gradient and score + val gradient = loss.computeGradient(labels, x, "identity", null) + val score = loss.computeScore(labels, x, "identity", null, average=true) + val gradientAndScore = loss.computeGradientAndScore(labels, x, "identity", null, average=true) - // Check limit approximate to 0 - val lim = Nd4j.norm2(f(x+h) - f(x) - grad * h) / Nd4j.norm2(h) - assert(lim < 0.01) + // Check computation + gradientAndScore.getFirst shouldBe score + gradientAndScore.getSecond shouldBe gradient } diff --git a/src/test/scala/scalarank/ranker/RankerSpec.scala b/src/test/scala/scalarank/ranker/RankerSpec.scala index 2e94b3d..982f23b 100644 --- a/src/test/scala/scalarank/ranker/RankerSpec.scala +++ b/src/test/scala/scalarank/ranker/RankerSpec.scala @@ -1,6 +1,6 @@ package scalarank.ranker -import org.scalatest.FreeSpec +import org.scalatest.FlatSpec import scala.collection.mutable import scala.reflect.ClassTag @@ -11,35 +11,19 @@ import scalarank.datapoint.{Datapoint, Query, Relevance, SVMRankDatapoint} /** * Testing ranker performance */ -class RankerSpec extends FreeSpec { +class RankerSpec extends FlatSpec { val trainData = TestData.sampleTrainData val testData = TestData.sampleTestData val featureSize = trainData(0).datapoints(0).features.length() - "Testing Oracle ranker performance (nDCG)" in { - testRanker(new OracleRanker(), ndcg, "nDCG") - } - - "Testing Random ranker performance (nDCG)" in { - testRanker(new RandomRanker(42), ndcg, "nDCG") - } - - "Testing Linear Regression ranker performance (nDCG)" in { - testRanker(new LinearRegressionRanker(featureSize, seed=42), ndcg, "nDCG") - } - - "Testing RankNet ranker performance (nDCG)" in { - testRanker(new RankNetRanker(featureSize, seed=42), ndcg, "nDCG") - } - /** * Tests a ranker by training it on our training set and testing it on our test set * * @param ranker The ranker to train and evaluate * @param metric The metric to score by */ - def testRanker(ranker: Ranker[SVMRankDatapoint, SVMRankDatapoint], + protected def testRanker(ranker: Ranker[SVMRankDatapoint, SVMRankDatapoint], metric: Seq[SVMRankDatapoint] => Double, metricName: String = ""): Unit = { ranker.train(trainData)