Skip to content

Commit

Permalink
Merge branch 'master' into clpwsim-lsh
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmy0017 committed Jan 29, 2012
2 parents 9e14af5 + f156b0c commit b1a592c
Show file tree
Hide file tree
Showing 15 changed files with 496 additions and 139 deletions.
117 changes: 0 additions & 117 deletions 404.html

This file was deleted.

58 changes: 36 additions & 22 deletions src/java/main/ivory/ltr/ExtractFeatures.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@

import javax.xml.parsers.ParserConfigurationException;

import com.google.common.collect.Maps;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.w3c.dom.Node;
Expand All @@ -52,9 +54,14 @@

import edu.umd.cloud9.collection.DocnoMapping;

import ivory.ltr.operator.Operator;
import ivory.ltr.operator.OperatorUtil;
import ivory.ltr.operator.Sum;

/**
* @author Don Metzler
*
* Modified by Nima Asadi
*/
public class ExtractFeatures {

Expand All @@ -69,12 +76,24 @@ public class ExtractFeatures {
private Map<String, String> queries = null; // query id -> query text mapping
private DocnoMapping docnoMapping = null; // docno mapping

private Map<String, Operator> operators = null;

public ExtractFeatures(String [] args, FileSystem fs) throws SAXException, IOException, ParserConfigurationException, NotBoundException, Exception {
loadQueryRunner(args, fs);
env = runner.getRetrievalEnvironment();
queries = runner.getQueries();
docnoMapping = env.getDocnoMapping();

operators = Maps.newHashMap();
for(String configPath: args) {
Map<String, Operator> ops = OperatorUtil.parseOperators(configPath);
if(ops.size() > 0) {
for(String key: ops.keySet()) {
operators.put(key, ops.get(key));
}
}
}

Map<String, String> finalQueries = new HashMap<String, String>();
for(Entry<String, String> queryEntry : queries.entrySet()) {
String queryKey = queryEntry.getKey();
Expand Down Expand Up @@ -169,7 +188,7 @@ private void extract() throws Exception {
// extract features query-by-query
for(Entry<String, String> queryEntry : queries.entrySet()) {
// feature map (docname -> feature name -> feature value)
SortedMap<String,SortedMap<String,Double>> featureValues = new TreeMap<String,SortedMap<String,Double>>();
SortedMap<String,SortedMap<String,Operator>> featureValues = new TreeMap<String,SortedMap<String,Operator>>();

// query id and text
String qid = queryEntry.getKey();
Expand Down Expand Up @@ -231,17 +250,18 @@ private void extract() throws Exception {
String docName = docIdToNameMap.get(docid);

// get feature map for this docname
SortedMap<String,Double> docFeatures = featureValues.get(docName);
SortedMap<String,Operator> docFeatures = featureValues.get(docName);
if(docFeatures == null) {
docFeatures = new TreeMap<String,Double>();
docFeatures = new TreeMap<String,Operator>();
featureValues.put(docName, docFeatures);
}

// document judgment
double judgment = judgmentEntry.getValue();

// set judgment feature
docFeatures.put(JUDGMENT_FEATURE_NAME, judgment);
docFeatures.put(JUDGMENT_FEATURE_NAME, new Sum());
docFeatures.get(JUDGMENT_FEATURE_NAME).addScore(judgment);

// initialize doc nodes
for(DocumentNode node : docNodes) {
Expand All @@ -266,13 +286,10 @@ private void extract() throws Exception {
double score = model.computeFeatureValue(c.getConcept(), metaFeat) * c.getPotential();

// update feature values
Double curVal = docFeatures.get(featId);
if(curVal == null) {
docFeatures.put(featId, score);
}
else {
docFeatures.put(featId, curVal + score);
if(!docFeatures.containsKey(featId)) {
docFeatures.put(featId, operators.get(modelName + "-" + paramId).newInstance());
}
docFeatures.get(featId).addScore(score);
}
}
}
Expand All @@ -284,27 +301,24 @@ private void extract() throws Exception {
double score = c.getPotential();

// update feature values
Double curVal = docFeatures.get(featId);
if(curVal == null) {
docFeatures.put(featId, score);
if(!docFeatures.containsKey(featId)) {
docFeatures.put(featId, operators.get(featId).newInstance());
}
else {
docFeatures.put(featId, curVal + score);
}

docFeatures.get(featId).addScore(score);
}
}
}

// print feature values for current query
for(Entry<String, SortedMap<String, Double>> featureEntry : featureValues.entrySet()) {
for(Entry<String, SortedMap<String, Operator>> featureEntry : featureValues.entrySet()) {
String docName = featureEntry.getKey();
System.out.print(qid + "\t" + docName);
Map<String,Double> docFeatures = featureEntry.getValue();
Map<String,Operator> docFeatures = featureEntry.getValue();
for(String featureName : featureNames) {
Double featVal = docFeatures.get(featureName);
if(featVal == null) {
featVal = DEFAULT_FEATURE_VALUE;
Operator op = docFeatures.get(featureName);
double featVal = DEFAULT_FEATURE_VALUE;
if(op != null) {
featVal = op.getFinalScore();
}
System.out.print("\t" + featVal);
}
Expand Down
17 changes: 17 additions & 0 deletions src/java/main/ivory/ltr/operator/BooleanCount.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package ivory.ltr.operator;

public class BooleanCount extends Operator {
@Override public double getFinalScore() {
int s = 0;
for(double f: scores) {
if(f > 0) {
s++;
}
}
return ((double) s);
}

@Override public Operator newInstance() {
return new BooleanCount();
}
}
21 changes: 21 additions & 0 deletions src/java/main/ivory/ltr/operator/BooleanRatio.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package ivory.ltr.operator;

public class BooleanRatio extends Operator {
@Override public double getFinalScore() {
if(scores.size() == 0) {
return 0;
}

int s = 0;
for(double f: scores) {
if(f > 0) {
s++;
}
}
return ((double) s) / scores.size();
}

@Override public Operator newInstance() {
return new BooleanRatio();
}
}
17 changes: 17 additions & 0 deletions src/java/main/ivory/ltr/operator/Max.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package ivory.ltr.operator;

public class Max extends Operator {
@Override public double getFinalScore() {
double s = Double.NEGATIVE_INFINITY;
for(double f: scores) {
if(f > s) {
s = f;
}
}
return s;
}

@Override public Operator newInstance() {
return new Max();
}
}
20 changes: 20 additions & 0 deletions src/java/main/ivory/ltr/operator/Mean.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ivory.ltr.operator;

public class Mean extends Operator {
@Override public double getFinalScore() {
if(scores.size() == 0) {
return 0;
}

double s = 0;
for(double f: scores) {
s += f;
}

return (s / scores.size());
}

@Override public Operator newInstance() {
return new Mean();
}
}
17 changes: 17 additions & 0 deletions src/java/main/ivory/ltr/operator/Min.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package ivory.ltr.operator;

public class Min extends Operator {
@Override public double getFinalScore() {
double s = Double.POSITIVE_INFINITY;
for(double f: scores) {
if(f < s) {
s = f;
}
}
return s;
}

@Override public Operator newInstance() {
return new Min();
}
}
46 changes: 46 additions & 0 deletions src/java/main/ivory/ltr/operator/Operator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package ivory.ltr.operator;

import java.util.List;

import com.google.common.collect.Lists;

/**
* Holds and computes the final value of a feature
*
* @author Nima Asadi
*/
public abstract class Operator {
protected List<Double> scores;

protected Operator() {
scores = Lists.newArrayList();
}

/**
* Adds a new score
*
* @param score Score
*/
public void addScore(double score) {
scores.add(score);
}

/**
* Clears the scores
*/
public void clear() {
scores.clear();
}

/**
* Computes the final feature value
*
* @return Feature value
*/
public abstract double getFinalScore();

/**
* @return New instance
*/
public abstract Operator newInstance();
}
Loading

0 comments on commit b1a592c

Please sign in to comment.