Skip to content

Commit

Permalink
Return correct term statistics whem a field is not found in a shard
Browse files Browse the repository at this point in the history
If you ask for the term vectors of an artificial document with
term_statistics=true, but a shard does not have any terms of the doc's
field(s), it returns the doc's term vectors values as the shard-level
term statistics. This commit fixes that to return 0 for ttf and also
field-level aggregated statistics.

This closes elastic#21906
  • Loading branch information
shaie committed Dec 1, 2016
1 parent 6522538 commit 2465853
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
Expand Up @@ -71,7 +71,7 @@ void setFields(Fields termVectorsByField, Set<String> selectedFields, EnumSet<Fl

// if no terms found, take the retrieved term vector fields for stats
if (topLevelTerms == null) {
topLevelTerms = fieldTermVector;
topLevelTerms = EMPTY_TERMS;
}

TermsEnum topLevelIterator = topLevelTerms.iterator();
Expand Down Expand Up @@ -292,4 +292,30 @@ private void writePotentiallyNegativeVLong(long value) throws IOException {
// further...
output.writeVLong(Math.max(0, value + 1));
}

/** Implements an empty {@link Terms}. */
private static final Terms EMPTY_TERMS = new Terms() {
@Override public TermsEnum iterator() throws IOException { return EMPTY_TERMS_ENUM; }
@Override public long size() throws IOException { return 0; }
@Override public long getSumTotalTermFreq() throws IOException { return 0; }
@Override public long getSumDocFreq() throws IOException { return 0; }
@Override public int getDocCount() throws IOException { return 0; }
@Override public boolean hasFreqs() { return false; }
@Override public boolean hasOffsets() { return false; }
@Override public boolean hasPositions() { return false; }
@Override public boolean hasPayloads() { return false; }
};

/** Implements an empty {@link TermsEnum}. */
private static final TermsEnum EMPTY_TERMS_ENUM = new TermsEnum() {
@Override public BytesRef next() throws IOException { return null; }
@Override public SeekStatus seekCeil(BytesRef text) throws IOException { return SeekStatus.END; }
@Override public void seekExact(long ord) throws IOException { }
@Override public BytesRef term() throws IOException { return null; }
@Override public long ord() throws IOException { return 0; }
@Override public int docFreq() throws IOException { return 0; }
@Override public long totalTermFreq() throws IOException { return 0; }
@Override public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { return null; }
};

}
Expand Up @@ -848,6 +848,16 @@ public void testArtificialNoDoc() throws IOException {
.get();
assertThat(resp.isExists(), equalTo(true));
checkBrownFoxTermVector(resp.getFields(), "field1", false);

// Since the index is empty, all of artificial document's "term_statistics" should be 0/absent
Terms terms = resp.getFields().terms("field1");
assertEquals("sumDocFreq should be 0 for a non-existing field!", 0, terms.getSumDocFreq());
assertEquals("sumTotalTermFreq should be 0 for a non-existing field!", 0, terms.getSumTotalTermFreq());
TermsEnum termsEnum = terms.iterator(); // we're guaranteed to receive terms for that field
while (termsEnum.next() != null) {
String term = termsEnum.term().utf8ToString();
assertEquals("term [" + term + "] does not exist in the index; ttf should be 0!", 0, termsEnum.totalTermFreq());
}
}

public void testPerFieldAnalyzer() throws IOException {
Expand Down

0 comments on commit 2465853

Please sign in to comment.