Skip to content

Commit

Permalink
Highlight only queried fields (#1887)
Browse files Browse the repository at this point in the history
Expert queries highlight only the fields which are used in the query.
  • Loading branch information
sdorra committed Dec 7, 2021
1 parent ad5bbfe commit 9375d26
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 9 deletions.
2 changes: 2 additions & 0 deletions gradle/changelog/highlight_queried_fields.yaml
@@ -0,0 +1,2 @@
- type: fixed
description: Highlight only queried fields ([#1887](https://github.com/scm-manager/scm-manager/pull/1887))
23 changes: 22 additions & 1 deletion scm-webapp/src/main/java/sonia/scm/search/LuceneHighlighter.java
Expand Up @@ -26,10 +26,17 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public final class LuceneHighlighter {

Expand All @@ -42,11 +49,25 @@ public final class LuceneHighlighter {
private final Analyzer analyzer;
private final Highlighter highlighter;

private final Set<String> queriedFields = new HashSet<>();

public LuceneHighlighter(Analyzer analyzer, Query query) {
this.analyzer = analyzer;
QueryScorer scorer = new QueryScorer(query);
this.highlighter = new Highlighter(new SimpleHTMLFormatter(PRE_TAG, POST_TAG), scorer);
this.highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE));

query.visit(new QueryVisitor() {
@Override
public boolean acceptField(String field) {
queriedFields.add(field);
return super.acceptField(field);
}
});
}

public boolean isHighlightable(LuceneSearchableField field) {
return field.isHighlighted() && queriedFields.contains(field.getName());
}

public String[] highlight(String fieldName, Indexed.Analyzer fieldAnalyzer, String value) throws InvalidTokenOffsetsException, IOException {
Expand Down
Expand Up @@ -132,16 +132,16 @@ private Query createExpertQuery(LuceneSearchableType searchableType, QueryParams
}

public Query createBestGuessQuery(LuceneSearchableType searchableType, QueryBuilder.QueryParams queryParams) throws QueryNodeException, IOException {
String[] fieldNames = searchableType.getFieldNames();
if (fieldNames == null || fieldNames.length == 0) {
String[] defaultFieldNames = searchableType.getDefaultFieldNames();
if (defaultFieldNames == null || defaultFieldNames.length == 0) {
throw new NoDefaultQueryFieldsFoundException(searchableType.getType());
}

String queryString = queryParams.getQueryString();
boolean hasWildcard = containsWildcard(queryString);

BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (String fieldName : fieldNames) {
for (String fieldName : defaultFieldNames) {
Query query;
if (!hasWildcard) {
query = createWildcardQuery(fieldName, queryString);
Expand Down
Expand Up @@ -29,7 +29,6 @@
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -47,7 +46,7 @@ public class LuceneSearchableType implements SearchableType {
String name;
String permission;
List<LuceneSearchableField> fields;
String[] fieldNames;
String[] defaultFieldNames;
Map<String, Float> boosts;
Map<String, PointsConfig> pointsConfig;
TypeConverter typeConverter;
Expand All @@ -57,7 +56,7 @@ public LuceneSearchableType(Class<?> type, @Nonnull IndexedType annotation, List
this.name = Names.create(type, annotation);
this.permission = Strings.emptyToNull(annotation.permission());
this.fields = fields;
this.fieldNames = fieldNames(fields);
this.defaultFieldNames = defaultFieldNames(fields);
this.boosts = boosts(fields);
this.pointsConfig = pointsConfig(fields);
this.typeConverter = TypeConverters.create(type);
Expand All @@ -67,7 +66,7 @@ public Optional<String> getPermission() {
return Optional.ofNullable(permission);
}

private String[] fieldNames(List<LuceneSearchableField> fields) {
private String[] defaultFieldNames(List<LuceneSearchableField> fields) {
return fields.stream()
.filter(LuceneSearchableField::isDefaultQuery)
.map(LuceneSearchableField::getName)
Expand Down
Expand Up @@ -74,7 +74,7 @@ private Hit createHit(ScoreDoc scoreDoc) throws IOException, InvalidTokenOffsets
private Optional<Hit.Field> field(Document document, LuceneSearchableField field) throws IOException, InvalidTokenOffsetsException {
Object value = field.value(document);
if (value != null) {
if (field.isHighlighted()) {
if (highlighter.isHighlightable(field)) {
String[] fragments = createFragments(field, value.toString());
if (fragments.length > 0) {
return of(new Hit.HighlightedField(fragments));
Expand Down
Expand Up @@ -30,16 +30,25 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;

@ExtendWith(MockitoExtension.class)
class LuceneHighlighterTest {



@Test
void shouldHighlightText() throws InvalidTokenOffsetsException, IOException {
StandardAnalyzer analyzer = new StandardAnalyzer();
Expand Down Expand Up @@ -99,6 +108,45 @@ void shouldHighlightLastCodeLine() throws InvalidTokenOffsetsException, IOExcept
assertThat(snippets).hasSize(1);
}

@Nested
class IsHighlightableTests {

@Mock
private LuceneSearchableField field;

private LuceneHighlighter highlighter;

@BeforeEach
void setUpHighlighter() {
Query query = new TermQuery(new Term("content", "ka"));
highlighter = new LuceneHighlighter(new StandardAnalyzer(), query);
}

@Test
void shouldReturnFalseForNonHighlightedField() {
when(field.isHighlighted()).thenReturn(false);

assertThat(highlighter.isHighlightable(field)).isFalse();
}

@Test
void shouldReturnFalseIfNotInQuery() {
when(field.isHighlighted()).thenReturn(true);
when(field.getName()).thenReturn("name");

assertThat(highlighter.isHighlightable(field)).isFalse();
}

@Test
void shouldReturnTrue() {
when(field.isHighlighted()).thenReturn(true);
when(field.getName()).thenReturn("content");

assertThat(highlighter.isHighlightable(field)).isTrue();
}

}

private String[] highlightCode(String resource, String search) throws IOException, InvalidTokenOffsetsException {
NonNaturalLanguageAnalyzer analyzer = new NonNaturalLanguageAnalyzer();
Query query = new TermQuery(new Term("content", search));
Expand Down

0 comments on commit 9375d26

Please sign in to comment.