Navigation Menu

Skip to content

Commit

Permalink
Merge branch 'master' of github.com:nasadi/Ivory into nasadi-master
Browse files Browse the repository at this point in the history
  • Loading branch information
jimmy0017 committed Jan 29, 2012
2 parents 2b44e05 + d33df49 commit 662a95b
Show file tree
Hide file tree
Showing 14 changed files with 496 additions and 22 deletions.
58 changes: 36 additions & 22 deletions src/java/main/ivory/ltr/ExtractFeatures.java
Expand Up @@ -44,6 +44,8 @@

import javax.xml.parsers.ParserConfigurationException;

import com.google.common.collect.Maps;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.w3c.dom.Node;
Expand All @@ -52,9 +54,14 @@

import edu.umd.cloud9.collection.DocnoMapping;

import ivory.ltr.operator.Operator;
import ivory.ltr.operator.OperatorUtil;
import ivory.ltr.operator.Sum;

/**
* @author Don Metzler
*
* Modified by Nima Asadi
*/
public class ExtractFeatures {

Expand All @@ -69,12 +76,24 @@ public class ExtractFeatures {
private Map<String, String> queries = null; // query id -> query text mapping
private DocnoMapping docnoMapping = null; // docno mapping

private Map<String, Operator> operators = null;

public ExtractFeatures(String [] args, FileSystem fs) throws SAXException, IOException, ParserConfigurationException, NotBoundException, Exception {
loadQueryRunner(args, fs);
env = runner.getRetrievalEnvironment();
queries = runner.getQueries();
docnoMapping = env.getDocnoMapping();

operators = Maps.newHashMap();
for(String configPath: args) {
Map<String, Operator> ops = OperatorUtil.parseOperators(configPath);
if(ops.size() > 0) {
for(String key: ops.keySet()) {
operators.put(key, ops.get(key));
}
}
}

Map<String, String> finalQueries = new HashMap<String, String>();
for(Entry<String, String> queryEntry : queries.entrySet()) {
String queryKey = queryEntry.getKey();
Expand Down Expand Up @@ -169,7 +188,7 @@ private void extract() throws Exception {
// extract features query-by-query
for(Entry<String, String> queryEntry : queries.entrySet()) {
// feature map (docname -> feature name -> feature value)
SortedMap<String,SortedMap<String,Double>> featureValues = new TreeMap<String,SortedMap<String,Double>>();
SortedMap<String,SortedMap<String,Operator>> featureValues = new TreeMap<String,SortedMap<String,Operator>>();

// query id and text
String qid = queryEntry.getKey();
Expand Down Expand Up @@ -231,17 +250,18 @@ private void extract() throws Exception {
String docName = docIdToNameMap.get(docid);

// get feature map for this docname
SortedMap<String,Double> docFeatures = featureValues.get(docName);
SortedMap<String,Operator> docFeatures = featureValues.get(docName);
if(docFeatures == null) {
docFeatures = new TreeMap<String,Double>();
docFeatures = new TreeMap<String,Operator>();
featureValues.put(docName, docFeatures);
}

// document judgment
double judgment = judgmentEntry.getValue();

// set judgment feature
docFeatures.put(JUDGMENT_FEATURE_NAME, judgment);
docFeatures.put(JUDGMENT_FEATURE_NAME, new Sum());
docFeatures.get(JUDGMENT_FEATURE_NAME).addScore(judgment);

// initialize doc nodes
for(DocumentNode node : docNodes) {
Expand All @@ -266,13 +286,10 @@ private void extract() throws Exception {
double score = model.computeFeatureValue(c.getConcept(), metaFeat) * c.getPotential();

// update feature values
Double curVal = docFeatures.get(featId);
if(curVal == null) {
docFeatures.put(featId, score);
}
else {
docFeatures.put(featId, curVal + score);
if(!docFeatures.containsKey(featId)) {
docFeatures.put(featId, operators.get(modelName + "-" + paramId).newInstance());
}
docFeatures.get(featId).addScore(score);
}
}
}
Expand All @@ -284,27 +301,24 @@ private void extract() throws Exception {
double score = c.getPotential();

// update feature values
Double curVal = docFeatures.get(featId);
if(curVal == null) {
docFeatures.put(featId, score);
if(!docFeatures.containsKey(featId)) {
docFeatures.put(featId, operators.get(featId).newInstance());
}
else {
docFeatures.put(featId, curVal + score);
}

docFeatures.get(featId).addScore(score);
}
}
}

// print feature values for current query
for(Entry<String, SortedMap<String, Double>> featureEntry : featureValues.entrySet()) {
for(Entry<String, SortedMap<String, Operator>> featureEntry : featureValues.entrySet()) {
String docName = featureEntry.getKey();
System.out.print(qid + "\t" + docName);
Map<String,Double> docFeatures = featureEntry.getValue();
Map<String,Operator> docFeatures = featureEntry.getValue();
for(String featureName : featureNames) {
Double featVal = docFeatures.get(featureName);
if(featVal == null) {
featVal = DEFAULT_FEATURE_VALUE;
Operator op = docFeatures.get(featureName);
double featVal = DEFAULT_FEATURE_VALUE;
if(op != null) {
featVal = op.getFinalScore();
}
System.out.print("\t" + featVal);
}
Expand Down
17 changes: 17 additions & 0 deletions src/java/main/ivory/ltr/operator/BooleanCount.java
@@ -0,0 +1,17 @@
package ivory.ltr.operator;

public class BooleanCount extends Operator {
@Override public double getFinalScore() {
int s = 0;
for(double f: scores) {
if(f > 0) {
s++;
}
}
return ((double) s);
}

@Override public Operator newInstance() {
return new BooleanCount();
}
}
21 changes: 21 additions & 0 deletions src/java/main/ivory/ltr/operator/BooleanRatio.java
@@ -0,0 +1,21 @@
package ivory.ltr.operator;

public class BooleanRatio extends Operator {
@Override public double getFinalScore() {
if(scores.size() == 0) {
return 0;
}

int s = 0;
for(double f: scores) {
if(f > 0) {
s++;
}
}
return ((double) s) / scores.size();
}

@Override public Operator newInstance() {
return new BooleanRatio();
}
}
17 changes: 17 additions & 0 deletions src/java/main/ivory/ltr/operator/Max.java
@@ -0,0 +1,17 @@
package ivory.ltr.operator;

public class Max extends Operator {
@Override public double getFinalScore() {
double s = Double.NEGATIVE_INFINITY;
for(double f: scores) {
if(f > s) {
s = f;
}
}
return s;
}

@Override public Operator newInstance() {
return new Max();
}
}
20 changes: 20 additions & 0 deletions src/java/main/ivory/ltr/operator/Mean.java
@@ -0,0 +1,20 @@
package ivory.ltr.operator;

public class Mean extends Operator {
@Override public double getFinalScore() {
if(scores.size() == 0) {
return 0;
}

double s = 0;
for(double f: scores) {
s += f;
}

return (s / scores.size());
}

@Override public Operator newInstance() {
return new Mean();
}
}
17 changes: 17 additions & 0 deletions src/java/main/ivory/ltr/operator/Min.java
@@ -0,0 +1,17 @@
package ivory.ltr.operator;

public class Min extends Operator {
@Override public double getFinalScore() {
double s = Double.POSITIVE_INFINITY;
for(double f: scores) {
if(f < s) {
s = f;
}
}
return s;
}

@Override public Operator newInstance() {
return new Min();
}
}
46 changes: 46 additions & 0 deletions src/java/main/ivory/ltr/operator/Operator.java
@@ -0,0 +1,46 @@
package ivory.ltr.operator;

import java.util.List;

import com.google.common.collect.Lists;

/**
* Holds and computes the final value of a feature
*
* @author Nima Asadi
*/
public abstract class Operator {
protected List<Double> scores;

protected Operator() {
scores = Lists.newArrayList();
}

/**
* Adds a new score
*
* @param score Score
*/
public void addScore(double score) {
scores.add(score);
}

/**
* Clears the scores
*/
public void clear() {
scores.clear();
}

/**
* Computes the final feature value
*
* @return Feature value
*/
public abstract double getFinalScore();

/**
* @return New instance
*/
public abstract Operator newInstance();
}
74 changes: 74 additions & 0 deletions src/java/main/ivory/ltr/operator/OperatorUtil.java
@@ -0,0 +1,74 @@
package ivory.ltr.operator;

import java.io.InputStream;
import java.io.IOException;
import java.io.File;
import java.util.Map;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.google.common.io.InputSupplier;
import com.google.common.io.Files;

import org.xml.sax.SAXException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

/**
* Provides auxiliary functions to parse operators.
*
* @author Nima Asadi
*/
public class OperatorUtil {
/**
* Loads and tokenizes a set of features
*
* @param featurePath Path to the file containing the feature descriptions
* @return Map of feature id to operator
*/
public static Map<String, Operator> parseOperators(String featurePath)
throws Exception {
return OperatorUtil.loadOperators(Files.newInputStreamSupplier(new File(featurePath)));
}

/**
* Reads a feature set in an XML format as follows:
*
* @param featureInputSupplier An input supplier that provides the feature descriptions
* @return A map of feature id to Operator
*/
public static Map<String, Operator> loadOperators(InputSupplier<? extends InputStream> featureInputSupplier)
throws ParserConfigurationException, SAXException, IOException, Exception {
Preconditions.checkNotNull(featureInputSupplier);

Map<String, Operator> operators = Maps.newHashMap();
Document dom = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(featureInputSupplier.getInput());
NodeList nodeList = dom.getDocumentElement().getElementsByTagName("model");

if(nodeList == null) {
return null;
}

for(int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String modelName = element.getAttribute("id");
NodeList featureList = element.getElementsByTagName("feature");
for(int j = 0; j < featureList.getLength(); j++) {
Element felement = (Element) featureList.item(j);
String fid = modelName + "-" + felement.getAttribute("id");
if(felement.hasAttribute("operator")) {
String className = felement.getAttribute("operator");
operators.put(fid, (Operator) Class.forName(className).newInstance());
} else {
operators.put(fid, new Sum());
}
}
}
return operators;
}
}
15 changes: 15 additions & 0 deletions src/java/main/ivory/ltr/operator/Sum.java
@@ -0,0 +1,15 @@
package ivory.ltr.operator;

public class Sum extends Operator {
@Override public double getFinalScore() {
double s = 0;
for(double f: scores) {
s += f;
}
return s;
}

@Override public Operator newInstance() {
return new Sum();
}
}
26 changes: 26 additions & 0 deletions src/java/main/ivory/ltr/operator/Variance.java
@@ -0,0 +1,26 @@
package ivory.ltr.operator;

public class Variance extends Operator {
@Override public double getFinalScore() {
if(scores.size() == 0) {
return 0;
}

double mean = 0;
for(double f: scores) {
mean += f;
}
mean /= scores.size();

double var = 0;
for(double f: scores) {
var += Math.pow((f - mean), 2);
}

return var / scores.size();
}

@Override public Operator newInstance() {
return new Variance();
}
}

0 comments on commit 662a95b

Please sign in to comment.