Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keep whole lines for code highlighting in search #1871

Merged
4 commits merged into from Nov 25, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions gradle/changelog/search_highlighter.yaml
@@ -0,0 +1,2 @@
- type: changed
description: Keep whole lines for code highlighting in search ([#1871](https://github.com/scm-manager/scm-manager/pull/1871))
97 changes: 97 additions & 0 deletions scm-webapp/src/main/java/sonia/scm/search/LuceneHighlighter.java
@@ -0,0 +1,97 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package sonia.scm.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public final class LuceneHighlighter {

private static final String PRE_TAG = "<|[[--";
private static final String POST_TAG = "--]]|>";

private static final int MAX_NUM_FRAGMENTS = 5;
private static final int FRAGMENT_SIZE = 200;

private final Analyzer analyzer;
private final Highlighter highlighter;

public LuceneHighlighter(Analyzer analyzer, Query query) {
this.analyzer = analyzer;
QueryScorer scorer = new QueryScorer(query);
this.highlighter = new Highlighter(new SimpleHTMLFormatter(PRE_TAG, POST_TAG), scorer);
this.highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, FRAGMENT_SIZE));
}

public String[] highlight(String fieldName, Indexed.Analyzer fieldAnalyzer, String value) throws InvalidTokenOffsetsException, IOException {
String[] fragments = highlighter.getBestFragments(analyzer, fieldName, value, MAX_NUM_FRAGMENTS);
if (fieldAnalyzer == Indexed.Analyzer.CODE) {
fragments = keepWholeLine(value, fragments);
}
return Arrays.stream(fragments)
.map(fragment -> fragment.replace(PRE_TAG, "<>").replace(POST_TAG, "</>"))
.toArray(String[]::new);
}

private String[] keepWholeLine(String value, String[] fragments) {
List<String> snippets = new ArrayList<>();
for (String fragment : fragments) {
snippets.add(keepWholeLine(value, fragment));
}
return snippets.toArray(new String[0]);
sdorra marked this conversation as resolved.
Show resolved Hide resolved
}

private String keepWholeLine(String content, String fragment) {
String raw = fragment.replace(PRE_TAG, "").replace(POST_TAG, "");
int index = content.indexOf(raw);

int c = index;
while (c > 0) {
c--;
if (content.charAt(c) == '\n') {
break;
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use public int lastIndexOf(int ch, int fromIndex)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is not possible here to use lastIndexOf, because we need not the next line ending from the position. We need the previous (c-- vs c++).


String snippet = content.substring(c, index) + fragment;

c = index + raw.length();
while (c < content.length()) {
c++;
if (content.charAt(c) == '\n') {
break;
}
}
sdorra marked this conversation as resolved.
Show resolved Hide resolved

return snippet + content.substring(index + raw.length(), c) + "\n";
}

}
18 changes: 3 additions & 15 deletions scm-webapp/src/main/java/sonia/scm/search/QueryResultFactory.java
Expand Up @@ -30,10 +30,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -47,23 +44,14 @@

public class QueryResultFactory {

private final Analyzer analyzer;
private final Highlighter highlighter;
private final LuceneHighlighter highlighter;
private final IndexSearcher searcher;
private final LuceneSearchableType searchableType;

public QueryResultFactory(Analyzer analyzer, IndexSearcher searcher, LuceneSearchableType searchableType, Query query) {
this.analyzer = analyzer;
this.searcher = searcher;
this.searchableType = searchableType;
this.highlighter = createHighlighter(query);
}

private Highlighter createHighlighter(Query query) {
return new Highlighter(
new SimpleHTMLFormatter("<>", "</>"),
new QueryScorer(query)
);
this.highlighter = new LuceneHighlighter(analyzer, query);
}

public QueryResult create(TopDocs topDocs) throws IOException, InvalidTokenOffsetsException {
Expand Down Expand Up @@ -98,7 +86,7 @@ private Optional<Hit.Field> field(Document document, LuceneSearchableField field
}

private String[] createFragments(LuceneSearchableField field, String value) throws InvalidTokenOffsetsException, IOException {
return highlighter.getBestFragments(analyzer, field.getName(), value, 5);
return highlighter.highlight(field.getName(), field.getAnalyzer(), value);
}

}
118 changes: 118 additions & 0 deletions scm-webapp/src/test/java/sonia/scm/search/LuceneHighlighterTest.java
@@ -0,0 +1,118 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package sonia.scm.search;

import com.google.common.io.Resources;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.net.URL;
import java.nio.charset.StandardCharsets;

import static org.assertj.core.api.Assertions.assertThat;

class LuceneHighlighterTest {

@Test
void shouldHighlightText() throws InvalidTokenOffsetsException, IOException {
StandardAnalyzer analyzer = new StandardAnalyzer();

Query query = new TermQuery(new Term("content", "golgafrinchan"));

String content = content("content");

LuceneHighlighter highlighter = new LuceneHighlighter(analyzer, query);
String[] snippets = highlighter.highlight("content", Indexed.Analyzer.DEFAULT, content);

assertThat(snippets).hasSize(1).allSatisfy(
snippet -> assertThat(snippet).contains("<>Golgafrinchan</>")
);
}

@Test
void shouldHighlightCodeAndKeepLines() throws IOException, InvalidTokenOffsetsException {
String[] snippets = highlightCode("GameOfLife.java", "die");

assertThat(snippets).hasSize(1).allSatisfy(
snippet -> assertThat(snippet.split("\n")).contains(
"\t\t\t\tint neighbors= getNeighbors(above, same, below);",
"\t\t\t\tif(neighbors < 2 || neighbors > 3){",
"\t\t\t\t\tnewGen[row]+= \"_\";//<2 or >3 neighbors -> <>die</>",
"\t\t\t\t}else if(neighbors == 3){",
"\t\t\t\t\tnewGen[row]+= \"#\";//3 neighbors -> spawn/live"
)
);
}

@Test
void shouldHighlightCodeInTsx() throws IOException, InvalidTokenOffsetsException {
String[] snippets = highlightCode("Button.tsx", "inherit");

assertThat(snippets).hasSize(1).allSatisfy(
snippet -> assertThat(snippet.split("\n")).contains(
"}) => {",
" const renderIcon = () => {",
" return <>{icon ? <Icon name={icon} color=\"<>inherit</>\" className=\"is-medium pr-1\" /> : null}</>;",
" };"
)
);
}

@Test
void shouldHighlightFirstCodeLine() throws InvalidTokenOffsetsException, IOException {
String[] snippets = highlightCode("GameOfLife.java", "gameoflife");

assertThat(snippets).hasSize(1);
}

@Test
void shouldHighlightLastCodeLine() throws InvalidTokenOffsetsException, IOException {
String[] snippets = highlightCode("Button.tsx", "default");

assertThat(snippets).hasSize(1);
}

private String[] highlightCode(String resource, String search) throws IOException, InvalidTokenOffsetsException {
NonNaturalLanguageAnalyzer analyzer = new NonNaturalLanguageAnalyzer();
Query query = new TermQuery(new Term("content", search));

String content = content(resource);

LuceneHighlighter highlighter = new LuceneHighlighter(analyzer, query);
return highlighter.highlight("content", Indexed.Analyzer.CODE, content);
}

@SuppressWarnings("UnstableApiUsage")
private String content(String resource) throws IOException {
URL url = Resources.getResource("sonia/scm/search/" + resource + ".txt");
return Resources.toString(url, StandardCharsets.UTF_8);
}

}
120 changes: 120 additions & 0 deletions scm-webapp/src/test/resources/sonia/scm/search/Button.tsx.txt
@@ -0,0 +1,120 @@
/*
* MIT License
*
* Copyright (c) 2020-present Cloudogu GmbH and Contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
import React, { FC, MouseEvent, ReactNode, KeyboardEvent } from "react";
import classNames from "classnames";
import { Link } from "react-router-dom";
import Icon from "../Icon";
import { createAttributesForTesting } from "../devBuild";

export type ButtonProps = {
label?: string;
title?: string;
loading?: boolean;
disabled?: boolean;
action?: (event: MouseEvent | KeyboardEvent) => void;
link?: string;
className?: string;
icon?: string;
fullWidth?: boolean;
reducedMobile?: boolean;
children?: ReactNode;
testId?: string;
};

type Props = ButtonProps & {
type?: "button" | "submit" | "reset";
color?: string;
};

const Button: FC<Props> = ({
link,
className,
icon,
fullWidth,
reducedMobile,
testId,
children,
label,
type = "button",
title,
loading,
disabled,
action,
color = "primary",
}) => {
const renderIcon = () => {
return <>{icon ? <Icon name={icon} color="inherit" className="is-medium pr-1" /> : null}</>;
};

if (link && !disabled) {
return (
<Link
className={classNames(
"button",
"is-" + color,
{ "is-loading": loading },
{ "is-fullwidth": fullWidth },
{ "is-reduced-mobile": reducedMobile },
className
)}
to={link}
aria-label={label}
>
{renderIcon()}{" "}
{(label || children) && (
<>
{label} {children}
</>
)}
</Link>
);
}

return (
<button
type={type}
title={title}
disabled={disabled}
onClick={(event) => action && action(event)}
className={classNames(
"button",
"is-" + color,
{ "is-loading": loading },
{ "is-fullwidth": fullWidth },
{ "is-reduced-mobile": reducedMobile },
className
)}
{...createAttributesForTesting(testId)}
>
{renderIcon()}{" "}
{(label || children) && (
<>
{label} {children}
</>
)}
</button>
);
};

export default Button;