This repository has been archived by the owner on Jun 29, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
TestData.scala
55 lines (45 loc) · 1.71 KB
/
TestData.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
package scalarank
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4s.Implicits._
import scalarank.datapoint.{Datapoint, Query, Relevance, SVMRankDatapoint}
/**
* An object that contains test data
*/
object TestData {
val featureless: Array[Datapoint with Relevance] = Array(
4.0, 3.0, 4.0, 3.0, 1.0, 2.0, 1.0, 4.0, 0.0, 4.0, 0.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0
).map(r => new FeaturelessDatapointRelevance(r))
val featurelessPrecision = 0.7272727272727273
val featurelessAveragePrecision = 0.9343461583351291
val featurelessReciprocalRank = 1.000000
val featurelessDCG = 16.31221516353917
val featurelessnDCG = 0.937572811083981
val sampleTrainData = readSVMRank("/train.txt")
val sampleTestData = readSVMRank("/test.txt")
def readSVMRank(file: String): IndexedSeq[Query[SVMRankDatapoint]] = {
val samples = scala.io.Source.fromInputStream(getClass.getResourceAsStream(file)).
getLines.map(l => SVMRankDatapoint(l))
samples.toArray.groupBy(d => d.qid).map { case (qid, ds) =>
new Query[SVMRankDatapoint](qid, ds)
}.toIndexedSeq
}
/**
* A test data point with a dense feature vector
*
* @param f The features as an array of doubles
* @param r The relevance
*/
class TestDatapoint(f: Array[Double], r: Double) extends Datapoint with Relevance {
override val features: INDArray = f.toNDArray
override val relevance: Double = r
}
/**
* A datapoint with relevance that does not contain features
*
* @param r The relevance label
*/
class FeaturelessDatapointRelevance(r: Double) extends Datapoint with Relevance {
override val features: INDArray = null
override val relevance: Double = r
}
}