Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

When executing MLT with multiple fields, it should considerem them all #1

Open
wants to merge 1 commit into
base: lucene_solr_5_3
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -656,13 +656,10 @@ private PriorityQueue<ScoreTerm> createQueue(Map<String, Int> words) throws IOEx
}

// go through all the fields and find the largest document frequency
String topField = fieldNames[0];
int docFreq = 0;
for (String fieldName : fieldNames) {
int freq = ir.docFreq(new Term(fieldName, word));
topField = (freq > docFreq) ? fieldName : topField;
docFreq = (freq > docFreq) ? freq : docFreq;
}
String[] array = word.split(":");
String fieldName = array[0];
word = array[1];
int docFreq = ir.docFreq(new Term(fieldName, word));

if (minDocFreq > 0 && docFreq < minDocFreq) {
continue; // filter out words that don't occur in enough docs
Expand All @@ -681,11 +678,11 @@ private PriorityQueue<ScoreTerm> createQueue(Map<String, Int> words) throws IOEx

if (queue.size() < limit) {
// there is still space in the queue
queue.add(new ScoreTerm(word, topField, score, idf, docFreq, tf));
queue.add(new ScoreTerm(word, fieldName, score, idf, docFreq, tf));
} else {
ScoreTerm term = queue.top();
if (term.score < score) { // update the smallest in the queue in place and update the queue.
term.update(word, topField, score, idf, docFreq, tf);
term.update(word, fieldName, score, idf, docFreq, tf);
queue.updateTop();
}
}
Expand Down Expand Up @@ -741,7 +738,7 @@ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
}
}
} else {
addTermFrequencies(termFreqMap, vector);
addTermFrequencies(termFreqMap, vector, fieldName);
}
}

Expand Down Expand Up @@ -773,7 +770,7 @@ private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, Collection<Object>> f
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, String fieldName) throws IOException {
final TermsEnum termsEnum = vector.iterator();
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
Expand All @@ -786,10 +783,11 @@ private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) thro
final int freq = (int) termsEnum.totalTermFreq();

// increment frequency
Int cnt = termFreqMap.get(term);
String mapKey = fieldName + ":" + term;
Int cnt = termFreqMap.get(mapKey);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
termFreqMap.put(mapKey, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
Expand Down Expand Up @@ -826,9 +824,10 @@ private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String f
}

// increment frequency
Int cnt = termFreqMap.get(word);
String mapKey = fieldName + ":" + word;
Int cnt = termFreqMap.get(mapKey);
if (cnt == null) {
termFreqMap.put(word, new Int());
termFreqMap.put(mapKey, new Int());
} else {
cnt.x++;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ public void setUp() throws Exception {
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);

// Add series of docs with specific information for MoreLikeThis
Document doc = new Document();
doc.add(newTextField("text", "lucene", Field.Store.YES));
doc.add(newTextField("foobar", "foobared", Field.Store.YES));
doc.add(newTextField("text", "bar", Field.Store.YES));
doc.add(newTextField("foobar", "bar", Field.Store.YES));
writer.addDocument(doc);

addDoc(writer, "lucene");
addDoc(writer, "lucene release");
addDoc(writer, "apache");
Expand Down Expand Up @@ -80,6 +87,7 @@ private void addDoc(RandomIndexWriter writer, String[] texts) throws IOException
Document doc = new Document();
for (String text : texts) {
doc.add(newTextField("text", text, Field.Store.YES));
doc.add(newTextField("foobar", text, Field.Store.YES));
}
writer.addDocument(doc);
}
Expand Down Expand Up @@ -152,7 +160,17 @@ public void testMultiFields() throws Exception {
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] {"text", "foobar"});
mlt.like("foobar", new StringReader("this is a test"));

BooleanQuery query = (BooleanQuery) mlt.like(0);
Collection<BooleanClause> clauses = query.clauses();
assertEquals("Expected 4 clauses only!", 4, clauses.size());
for (BooleanClause clause : clauses) {
Term term = ((TermQuery) clause.getQuery()).getTerm();
assertTrue(Arrays.asList(new Term("text", "lucene"),
new Term("foobar", "foobared"),
new Term("text", "bar"),
new Term("foobar", "bar")).contains(term));
}
analyzer.close();
}

Expand Down