/
IRRA12.php
66 lines (51 loc) · 1.83 KB
/
IRRA12.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
<?php
namespace Basset\Models;
use Basset\Models\Contracts\{
ProbabilisticModelInterface,
WeightedModelInterface
};
use Basset\{
Metric\VectorSimilarity,
Models\TermCount
};
/**
* An experimental IRRA system that aims to evaluate a new DFI-based term weighting model developed on the basis of
* Shannon’s information theory (Shannon, 1949), along with the evaluation of a heuristic approach that
* is expected to provide early precision when used together with DFI term weighting.
* @see http://trec.nist.gov/pubs/trec21/papers/irra.web.nb.pdf
*
* @author Jericko Tejido <jtbibliomania@gmail.com>
*/
class IRRA12 extends WeightedModel implements WeightedModelInterface, ProbabilisticModelInterface
{
public function __construct()
{
parent::__construct();
$this->queryModel = new TermCount;
$this->metric = new VectorSimilarity;
}
/**
* ∑qtf × ∆(Iij) × Λij
* @param int $tf
* @param int $docLength
* @param int $docUniqueLength
* @return float
*/
public function score(int $tf, int $docLength, int $docUniqueLength): float
{
$score = 0;
// eij+
$expected = ($this->getTermFrequency() * $docLength) / $this->getNumberOfTokens();
$expected_plus = (($this->getTermFrequency() +1 ) * ($docLength + 1)) / ($this->getNumberOfTokens() + 1);
if($tf <= $expected){
return $score;
}
$alpha = ($docLength - $tf) / $docLength;
$beta = (2/3) * (($tf + 1)/$tf);
// Λij
$suppress_junk = pow($alpha, (3/4)) * pow($beta, (1/4));
// ∆(Iij)
$score += (($tf + 1) * log((($tf + 1)/sqrt($expected_plus)), 2)) - ($tf * log(($tf/sqrt($expected)), 2));
return $score * $suppress_junk;
}
}