Skip to content

Commit

Permalink
speed-up the ndcg calc on eval (#1215)
Browse files Browse the repository at this point in the history
  • Loading branch information
shuttie committed Jan 15, 2024
1 parent 42219af commit 6702ac9
Showing 1 changed file with 40 additions and 34 deletions.
74 changes: 40 additions & 34 deletions src/main/scala/ai/metarank/ml/rank/LambdaMARTRanker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import io.github.metarank.ltrlib.ranking.pairwise.LambdaMART
import org.apache.commons.io.FileUtils

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
import java.util
import scala.util.{Failure, Random, Success, Try}

object LambdaMARTRanker {
Expand All @@ -52,14 +53,10 @@ object LambdaMARTRanker {
_ <- checkDatasetSize(split.train)
result <- IO(makeBooster(split))
model <- IO.pure(LambdaMARTModel(name, config, result))
ndcg20 <- IO(model.eval(split.test, NDCG(20, nolabels = 1.0, relpow = true)))
_ <- info(s"NDCG20: source=${ndcg20.noopValue} reranked=${ndcg20.value} random=${ndcg20.randomValue}")
ndcg10 <- IO(model.eval(split.test, NDCG(10, nolabels = 1.0, relpow = true)))
_ <- info(s"NDCG10: source=${ndcg10.noopValue} reranked=${ndcg10.value} random=${ndcg10.randomValue}")
ndcg5 <- IO(model.eval(split.test, NDCG(5, nolabels = 1.0, relpow = true)))
_ <- info(s"NDCG5: source=${ndcg5.noopValue} reranked=${ndcg5.value} random=${ndcg5.randomValue}")
mrr <- IO(model.eval(split.test, MRR))
_ <- info(s"MRR: source=${mrr.noopValue} reranked=${mrr.value} random=${mrr.randomValue}")
ndcg10 <- model.eval(split.test, NDCG(10, nolabels = 1.0, relpow = true))
_ <- info(
s"NDCG10: source=${ndcg10.noopValue} reranked=${ndcg10.value} random=${ndcg10.randomValue} (took ${ndcg10.took}ms)"
)
} yield {
model
}
Expand Down Expand Up @@ -282,39 +279,48 @@ object LambdaMARTRanker {
result.toMap
}

def eval(dataset: Dataset, metric: Metric): MetricValue = {
val metricValue = booster.eval(dataset, metric)
val y = dataset.groups.map(_.labels).toArray
val yrand = for {
group <- dataset.groups
def eval(dataset: Dataset, metric: Metric): IO[MetricValue] = {
val start = System.currentTimeMillis()
val predictedScores = dataset.groups.map(g => booster.predictMat(g.values, g.rows, g.columns))
val randomScores = dataset.groups.map(g => randomArray(g.labels.length))
val noopScores = dataset.groups.map(g => noopArray(g.labels.length))
val labels = dataset.groups.map(_.labels)

for {
predictValue <- evalOne(metric, predictedScores, labels)
noopValue <- evalOne(metric, noopScores, labels)
randomValue <- evalOne(metric, randomScores, labels)
} yield {
val indices = new Array[Double](group.labels.length)
var i = 0
while (i < indices.length) {
indices(i) = Random.nextDouble()
i += 1
}
indices
MetricValue(predictValue, noopValue, randomValue, took = System.currentTimeMillis() - start)
}
val metricRandom = metric.eval(y, yrand.toArray)
val yhat = for {
group <- dataset.groups
} yield {
val indices = new Array[Double](group.labels.length)
var i = 0
while (i < indices.length) {
indices(i) = (indices.length - i) / indices.length.toDouble
i += 1
}
indices
}
def randomArray(len: Int): Array[Double] = {
val result = new Array[Double](len)
var i = 0
while (i < len) {
result(i) = Random.nextDouble()
i += 1
}
val metricNoop = metric.eval(y, yhat.toArray)
result
}

MetricValue(metricValue, metricNoop, metricRandom)
def noopArray(len: Int): Array[Double] = {
val result = new Array[Double](len)
var i = 0
val dlen = len.toDouble
while (i < len) {
result(i) = (len - i) / dlen
i += 1
}
result
}

def evalOne(metric: Metric, scores: List[Array[Double]], labels: List[Array[Double]]): IO[Double] = IO(
metric.eval(labels.toArray, scores.toArray)
)

}
case class MetricValue(value: Double, noopValue: Double, randomValue: Double)
case class MetricValue(value: Double, noopValue: Double, randomValue: Double, took: Long)

implicit val lmDecoder: Decoder[LambdaMARTConfig] = Decoder.instance(c =>
for {
Expand Down

0 comments on commit 6702ac9

Please sign in to comment.