Permalink
Browse files

Plugging the collector query that uses str fields

  • Loading branch information...
1 parent f76a24b commit e3d4cb2f3c47d77d38d11ddb4ffb9e307e844a76 @romanchyla committed Jun 21, 2012
@@ -1,7 +1,6 @@
package org.apache.lucene.search;
import java.io.IOException;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@@ -11,19 +10,21 @@
public class CitesCollector extends Collector implements SetCollector {
- private Scorer scorer;
- private IndexReader reader;
- private int docBase;
- private String indexField;
- private Map<Integer, Integer> fieldCache;
- private Set<Integer> recids;
+ protected Scorer scorer;
+ protected IndexReader reader;
+ protected int docBase;
+ protected String indexField;
+ protected Map<Integer, Integer> fieldCache = null;
+ protected Set<Integer> recids;
public CitesCollector(Map<Integer, Integer> cache, String field) {
super();
fieldCache = cache;
indexField = field;
recids = new HashSet<Integer>();
}
+
+
@Override
public void setScorer(Scorer scorer) throws IOException {
@@ -0,0 +1,84 @@
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+
+public class CitesCollectorString extends Collector implements SetCollector {
+
+ protected Scorer scorer;
+ protected IndexReader reader;
+ protected int docBase;
+ protected String indexField;
+ protected Map<String, Integer> fieldCache = null;
+ protected Set<Integer> recids;
+
+ public CitesCollectorString(Map<String, Integer> cache, String field) {
+ super();
+ fieldCache = cache;
+ indexField = field;
+ recids = new HashSet<Integer>();
+ }
+
+
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ Document document = reader.document(docBase + doc);
+ String[] vals = document.getValues(indexField);
+ Integer va;
+ for (String v: vals) {
+ if (fieldCache.containsKey(v)) {
+ recids.add(fieldCache.get(v));
+ }
+ }
+
+ }
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase)
+ throws IOException {
+ this.reader = reader;
+ this.docBase = docBase;
+
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+
+ public Set<Integer> getHits() {
+ return recids;
+ }
+
+ @Override
+ public String toString() {
+ return "cites[using:" + indexField + "]";
+ }
+
+ /** Returns a hash code value for this object. */
+ public int hashCode() {
+ return indexField.hashCode() ^ fieldCache.hashCode();
+ }
+
+ /** Returns true iff <code>o</code> is equal to this. */
+ public boolean equals(Object o) {
+ if (o instanceof CitesCollector) {
+ CitesCollector fq = (CitesCollector) o;
+ return hashCode() == fq.hashCode();
+ }
+ return false;
+ }
+
+}
@@ -42,8 +42,8 @@
private HashMap<String, Map<Integer, int[]>>
cache = new HashMap<String, Map<Integer, int[]>>(4);
- private HashMap<String, Map<Integer, Integer>>
- translation_cache = new HashMap<String, Map<Integer, Integer>>(2);
+ private HashMap<String, Object>
+ translation_cache = new HashMap<String, Object>(2);
private HashMap<String, Integer>
translation_cache_tracker = new HashMap<String, Integer>(2);
@@ -73,6 +73,17 @@ public void setCache(String name, Map<Integer, int[]> value) {
return fromFieldToLuceneId;
}
+ public Map<String, Integer> buildCacheStr(String[] idMapping) throws IOException {
+
+ Map<String, Integer> fromFieldToLuceneId = new HashMap<String, Integer>(idMapping.length);
+ int i = 0;
+ for (String value: idMapping) {
+ fromFieldToLuceneId.put(value, i);
+ i++;
+ }
+ return fromFieldToLuceneId;
+ }
+
/**
* Provides the mapping <b>from</b> the external source ids <b>into</b>
* lucene doc ids. This class is thread safe if you read data only.
@@ -97,12 +108,33 @@ public void setCache(String name, Map<Integer, int[]> value) {
Map<Integer, Integer> translTable = buildCache(idMapping);
translation_cache.put(field, translTable);
translation_cache_tracker.put(field, h);
+ return translTable;
}
}
- return translation_cache.get(field);
+ return (Map<Integer, Integer>) translation_cache.get(field);
}
+ public Map<String, Integer> getTranslationCacheString(IndexReader reader, String idField, String refField) throws IOException {
+ int[] idMapping = getLuceneCache(reader, idField);
+ Integer h = idMapping.hashCode();
+ Integer old_hash = null;
+ String cacheKey = idField + refField;
+
+ if (translation_cache_tracker.containsKey(cacheKey))
+ old_hash = translation_cache_tracker.get(cacheKey);
+ if (!h.equals(old_hash)) {
+ synchronized(translation_cache_tracker) {
+ String[] strCache = FieldCache.DEFAULT.getStrings(reader, refField);
+ Map<String, Integer> translTable = buildCacheStr(strCache);
+ translation_cache.put(cacheKey, translTable);
+ translation_cache_tracker.put(idField, h);
+ return translTable;
+ }
+ }
+ return (Map<String, Integer>) translation_cache.get(cacheKey);
+ }
+
/**
* Uninverts the lucene index, it grabs all the values from the index and discovers what
@@ -180,6 +212,46 @@ public String toString() {
}
+ public int[][] getUnInvertedDocidsStrField(IndexReader reader, String field, String externalIds) throws IOException {
+
+ // first check that the index wasn't updated
+ Integer old_hash = null;
+ if (translation_cache_tracker.containsKey(externalIds))
+ old_hash = translation_cache_tracker.get(externalIds);
+
+ boolean indexUnchanged = old_hash.equals(getLuceneCache(reader, externalIds).hashCode());
+
+ if (invertedCache.containsKey(field) && indexUnchanged) {
+ return (int[][]) invertedCache.get(field);
+ }
+
+ final Map<Integer, Integer> idMapping = getTranslationCache(reader, externalIds);
+
+
+
+ Object val = unInvertField(reader, new Entry(field, new FieldCache.IntParser() {
+ public int parseInt(String value) {
+ int v = Integer.parseInt(value);
+ if (idMapping.containsKey(v)) {
+ return idMapping.get(v);
+ }
+ else {
+ return -1;
+ }
+ }
+ protected Object readResolve() {
+ return FieldCache.DEFAULT_INT_PARSER;
+ }
+ @Override
+ public String toString() {
+ return FieldCache.class.getName()+".UNINVERTING_INT_PARSER";
+ }
+ }));
+ invertedCache.put(field, val);
+ return (int[][]) val;
+
+ }
+
/*
* A temporary hack to get uninverted values from the index
* the solr-4.0 already has a solution for the problem of
@@ -86,11 +86,15 @@ private void adoc(String... fields) throws IOException {
public void testCitationQueries() throws Exception {
- runCollectorQuery("references", "id", 0);
- //runCollectorQuery("breferences", "bibcode", 10);
- }
-
- public void runCollectorQuery(String refField, String idField, Integer idPrefix) throws Exception {
+
+
+ // for the queries that use the Integer values
+ // -------------------------------------------
+
+ String refField = "references";
+ String idField = "id";
+ int idPrefix = 0;
+
TermQuery q1 = new TermQuery(new Term("id", String.valueOf(idPrefix + 1)));
TermQuery q2 = new TermQuery(new Term("id", String.valueOf(idPrefix + 2)));
TermQuery q3 = new TermQuery(new Term("id", String.valueOf(idPrefix + 3)));
@@ -189,6 +193,110 @@ public void runCollectorQuery(String refField, String idField, Integer idPrefix)
assertTrue(c3.equals(c4));
+ // for the queries that use the String values
+ // ------------------------------------------
+
+ refField = "references";
+ idField = "id";
+ idPrefix = 10;
+
+ q1 = new TermQuery(new Term("id", String.valueOf(idPrefix + 1)));
+ q2 = new TermQuery(new Term("id", String.valueOf(idPrefix + 2)));
+ q3 = new TermQuery(new Term("id", String.valueOf(idPrefix + 3)));
+ q4 = new TermQuery(new Term("id", String.valueOf(idPrefix + 4)));
+ q5 = new TermQuery(new Term("id", String.valueOf(idPrefix + 5)));
+ q6 = new TermQuery(new Term("id", String.valueOf(idPrefix + 6)));
+ q7 = new TermQuery(new Term("id", String.valueOf(idPrefix + 7)));
+ q99 = new TermQuery(new Term("id", String.valueOf(idPrefix + 99)));
+
+ bq13 = new BooleanQuery();
+ bq13.add(q1, Occur.SHOULD);
+ bq13.add(q3, Occur.SHOULD);
+
+ bq123 = new BooleanQuery();
+ bq123.add(q1, Occur.SHOULD);
+ bq123.add(q2, Occur.SHOULD);
+ bq123.add(q3, Occur.SHOULD);
+
+ bq1234 = new BooleanQuery();
+ bq1234.add(q1, Occur.SHOULD);
+ bq1234.add(q2, Occur.SHOULD);
+ bq1234.add(q3, Occur.SHOULD);
+ bq1234.add(q4, Occur.SHOULD);
+
+ bq15 = new BooleanQuery();
+ bq15.add(q1, Occur.SHOULD);
+ bq15.add(q5, Occur.SHOULD);
+
+ // just a test that index is OK
+ assertEquals(1, searcher.search(q1, 10).totalHits);
+ assertEquals(1, searcher.search(q2, 10).totalHits);
+ assertEquals(1, searcher.search(q3, 10).totalHits);
+ assertEquals(0, searcher.search(q99, 10).totalHits);
+ assertEquals(2, searcher.search(bq13, 10).totalHits);
+
+
+ // now test of references ( X --> (x))
+ Map<String, Integer> scache = DictionaryRecIdCache.INSTANCE.getTranslationCacheString(searcher.getIndexReader(), idField, refField);
+ scache = DictionaryRecIdCache.INSTANCE.getTranslationCacheString(searcher.getIndexReader(), idField, refField);
+ scache = DictionaryRecIdCache.INSTANCE.getTranslationCacheString(searcher.getIndexReader(), idField, refField);
+
+ assertEquals(3, searcher.search(new CollectorQuery(q1, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(0, searcher.search(new CollectorQuery(q2, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(2, searcher.search(new CollectorQuery(q3, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(2, searcher.search(new CollectorQuery(q4, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(0, searcher.search(new CollectorQuery(q5, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(0, searcher.search(new CollectorQuery(q6, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(0, searcher.search(new CollectorQuery(q99, new CitesCollectorString(scache, refField)), 10).totalHits);
+ assertEquals(5, searcher.search(new CollectorQuery(bq13, new CitesCollectorString(scache, refField)), 10).totalHits);
+
+ docs = searcher.search(new CollectorQuery(bq13, new CitesCollectorString(scache, refField)), 10).scoreDocs;
+
+ ar = new ArrayList<Integer>();
+ for (ScoreDoc d: docs) {
+ Document doc = reader.document(d.doc);
+ ar.add(Integer.valueOf(doc.get("id")));
+ }
+ er = Arrays.asList(idPrefix + 2, idPrefix + 3, idPrefix + 4, idPrefix + 5, idPrefix + 6);
+ assertTrue(ar.containsAll(er));
+
+
+ int[][] invCache = DictionaryRecIdCache.INSTANCE.getUnInvertedDocids(reader, refField, idField);
+ invCache = DictionaryRecIdCache.INSTANCE.getUnInvertedDocids(reader, refField, idField);
+ invCache = DictionaryRecIdCache.INSTANCE.getUnInvertedDocids(reader, refField, idField);
+
+ assertEquals(1, searcher.search(new CollectorQuery(q1, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(2, searcher.search(new CollectorQuery(q2, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(1, searcher.search(new CollectorQuery(q3, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(1, searcher.search(new CollectorQuery(q4, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(2, searcher.search(new CollectorQuery(q5, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(1, searcher.search(new CollectorQuery(q6, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(0, searcher.search(new CollectorQuery(q99, new CitedByCollector(invCache, refField)), 10).totalHits);
+
+ assertEquals(2, searcher.search(new CollectorQuery(bq13, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(2, searcher.search(new CollectorQuery(bq123, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(2, searcher.search(new CollectorQuery(bq1234, new CitedByCollector(invCache, refField)), 10).totalHits);
+ assertEquals(3, searcher.search(new CollectorQuery(bq15, new CitedByCollector(invCache, refField)), 10).totalHits);
+
+
+ ar = new ArrayList<Integer>();
+ for (ScoreDoc d: searcher.search(new CollectorQuery(bq15, new CitedByCollector(invCache, refField)), 10).scoreDocs) {
+ Document doc = reader.document(d.doc);
+ ar.add(Integer.valueOf(doc.get("id")));
+ }
+ er = Arrays.asList(idPrefix + 3, idPrefix + 4, idPrefix + 7);
+ assertTrue(ar.containsAll(er));
+
+
+
+ CollectorQuery c1 = new CollectorQuery(bq15, new CitedByCollector(invCache, refField));
+ CollectorQuery c2 = new CollectorQuery(bq15, new CitedByCollector(invCache, refField));
+ assertTrue(c1.equals(c2));
+
+ CollectorQuery c3 = new CollectorQuery(bq13, new CitesCollector(cache, refField));
+ CollectorQuery c4 = new CollectorQuery(bq13, new CitesCollector(cache, refField));
+ assertTrue(c3.equals(c4));
+
}

0 comments on commit e3d4cb2

Please sign in to comment.