Skip to content

Commit

Permalink
TFIDF and IDF scoring functions implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
Nima Asadi committed Jan 27, 2012
1 parent 979ae55 commit 6fb350f
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 0 deletions.
72 changes: 72 additions & 0 deletions src/java/main/ivory/smrf/model/score/IDFScoringFunction.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package ivory.smrf.model.score;

import ivory.core.util.XMLTools;
import ivory.smrf.model.GlobalEvidence;
import ivory.smrf.model.GlobalTermEvidence;

import org.w3c.dom.Node;

/**
* Computes score based on IDF.
*
* @author Nima Asadi
*/
public class IDFScoringFunction extends ScoringFunction {
private String idfType = "okapi";
protected float idf;

@Override
public void configure(Node domNode) {
idfType = XMLTools.getAttributeValue(domNode, "idf", "okapi");
}

@Override
public float getScore(int tf, int docLen) {
return idf;
}

@Override
public String toString() {
return "<scoringfunction>IDF</scoringfunction>\n";
}

@Override
public void initialize(GlobalTermEvidence termEvidence, GlobalEvidence globalEvidence) {
super.initialize(termEvidence, globalEvidence);

if ("none".equals(idfType)) {
idf = 1;
} else if ("classic".equals(idfType)) {
idf = (float) Math.log((float) globalEvidence.numDocs / (float) termEvidence.getDf());
} else if ("okapi-positive".equals(idfType)) {
idf = (float) Math.log(((float) globalEvidence.numDocs + 0.5f)
/ ((float) termEvidence.getDf() + 0.5f));
} else {
// Defaults to "Okapi" idf.
idf = (float) Math.log(((float) globalEvidence.numDocs - (float) termEvidence.getDf() + 0.5f)
/ ((float) termEvidence.getDf() + 0.5f));
}
}

@Override
public float getMinScore() {
// TODO: make this bound tighter
return Float.NEGATIVE_INFINITY;
}
}
66 changes: 66 additions & 0 deletions src/java/main/ivory/smrf/model/score/TFIDFScoringFunction.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Ivory: A Hadoop toolkit for web-scale information retrieval
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package ivory.smrf.model.score;

import ivory.core.util.XMLTools;
import ivory.smrf.model.GlobalEvidence;
import ivory.smrf.model.GlobalTermEvidence;

import org.w3c.dom.Node;

/**
* Computes score based on IDF.
*
* @author Nima Asadi
*/
public class TFIDFScoringFunction extends ScoringFunction {
private String idfType = "okapi";
protected float idf;

@Override
public void configure(Node domNode) {
idfType = XMLTools.getAttributeValue(domNode, "idf", "okapi");
}

@Override
public float getScore(int tf, int docLen) {
return tf * idf;
}

@Override
public String toString() {
return "<scoringfunction>TFIDF</scoringfunction>\n";
}

@Override
public void initialize(GlobalTermEvidence termEvidence, GlobalEvidence globalEvidence) {
super.initialize(termEvidence, globalEvidence);

if ("none".equals(idfType)) {
idf = 1;
} else if ("classic".equals(idfType)) {
idf = (float) Math.log((float) globalEvidence.numDocs / (float) termEvidence.getDf());
} else if ("okapi-positive".equals(idfType)) {
idf = (float) Math.log(((float) globalEvidence.numDocs + 0.5f)
/ ((float) termEvidence.getDf() + 0.5f));
} else {
// Defaults to "Okapi" idf.
idf = (float) Math.log(((float) globalEvidence.numDocs - (float) termEvidence.getDf() + 0.5f)
/ ((float) termEvidence.getDf() + 0.5f));
}
}
}

0 comments on commit 6fb350f

Please sign in to comment.