Skip to content

Commit

Permalink
fix annotation regression (#4516)
Browse files Browse the repository at this point in the history
use Document to get the file type when checking if it is eligible for annotation

use document as a negative annotation capability check. if no document, fallback to repository check.

fixes #4515
  • Loading branch information
vladak committed Jan 30, 2024
1 parent ba9e18d commit 6bb48a9
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, 2021, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis;
Expand Down Expand Up @@ -624,9 +624,9 @@ public void populateDocument(Document doc, File file, String path, AbstractAnaly
}

if (fa != null) {
AbstractAnalyzer.Genre g = fa.getGenre();
if (g == AbstractAnalyzer.Genre.PLAIN || g == AbstractAnalyzer.Genre.XREFABLE || g == AbstractAnalyzer.Genre.HTML) {
doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms));
AbstractAnalyzer.Genre genre = fa.getGenre();
if (isXrefable(genre.typeName())) {
doc.add(new Field(QueryBuilder.T, genre.typeName(), string_ft_stored_nanalyzed_norms));
}
fa.analyze(doc, StreamSource.fromFile(file), xrefOut);

Expand All @@ -635,6 +635,16 @@ public void populateDocument(Document doc, File file, String path, AbstractAnaly
}
}

/**
* @param genreName genre name
* @return whether it is possible to produce xref for the genre
*/
public static boolean isXrefable(String genreName) {
return (genreName.equals(AbstractAnalyzer.Genre.PLAIN.typeName())
|| genreName.equals(AbstractAnalyzer.Genre.XREFABLE.typeName())
|| genreName.equals(AbstractAnalyzer.Genre.HTML.typeName()));
}

private static void populateDocumentHistory(Document doc, File file) {
try {
HistoryGuru histGuru = HistoryGuru.getInstance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2024, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.history;
Expand Down Expand Up @@ -47,10 +47,10 @@
import java.util.logging.Logger;
import java.util.stream.Collectors;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryparser.classic.ParseException;
import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.VisibleForTesting;
import org.opengrok.indexer.analysis.AbstractAnalyzer;
import org.opengrok.indexer.analysis.AnalyzerGuru;
import org.opengrok.indexer.configuration.CommandTimeoutType;
import org.opengrok.indexer.configuration.Configuration;
import org.opengrok.indexer.configuration.Configuration.RemoteSCM;
Expand All @@ -59,11 +59,15 @@
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.logger.LoggerFactory;
import org.opengrok.indexer.search.DirectoryEntry;
import org.opengrok.indexer.search.QueryBuilder;
import org.opengrok.indexer.util.ForbiddenSymlinkException;
import org.opengrok.indexer.util.PathUtils;
import org.opengrok.indexer.util.Progress;
import org.opengrok.indexer.util.Statistics;

import static org.opengrok.indexer.analysis.AnalyzerGuru.isXrefable;
import static org.opengrok.indexer.index.IndexDatabase.getDocument;

/**
* The HistoryGuru is used to implement an transparent layer to the various
* source control systems.
Expand Down Expand Up @@ -229,10 +233,14 @@ public String getAnnotationCacheInfo() {
*/
@Nullable
private Annotation getAnnotation(File file, @Nullable String rev, boolean fallback) throws IOException {
Annotation annotation;

Repository repository = getRepository(file);
if (annotationCache != null && repository != null && repository.isAnnotationCacheEnabled()) {
if (repository == null) {
LOGGER.log(Level.FINER, "no repository found for ''{0}'' to check for annotation", file);
return null;
}

Annotation annotation;
if (annotationCache != null && repository.isAnnotationCacheEnabled()) {
try {
annotation = annotationCache.get(file, rev);
if (annotation != null) {
Expand All @@ -248,9 +256,13 @@ private Annotation getAnnotation(File file, @Nullable String rev, boolean fallba
return null;
}

if (!HistoryGuru.getInstance().hasAnnotation(file)) {
LOGGER.log(Level.FINER, "skipped getting annotation for file ''{0}}''", file);
return null;
}

// Fall back to repository based annotation.
// It might be possible to store the annotation to the annotation cache here, needs further thought.
annotation = getAnnotationFromRepository(file, rev);
annotation = getAnnotationFromRepository(file, rev, repository);
if (annotation != null) {
annotation.setRevision(LatestRevisionUtil.getLatestRevision(file));
}
Expand All @@ -260,24 +272,21 @@ private Annotation getAnnotation(File file, @Nullable String rev, boolean fallba

/**
* Annotate given file using repository method. Makes sure that the resulting annotation has the revision set.
* Assumes the {@link HistoryGuru#hasAnnotation(File)} check was already done.
* @param file file object to generate the annotation for
* @param rev revision to get the annotation for or {@code null} for latest revision of given file
* @param repository {@link Repository} instance
* @return annotation object or {@code null}
* @throws IOException on error when getting the annotation
*/
@Nullable
private Annotation getAnnotationFromRepository(File file, @Nullable String rev) throws IOException {
private Annotation getAnnotationFromRepository(File file, @Nullable String rev, Repository repository) throws IOException {
if (!env.getPathAccepter().accept(file)) {
LOGGER.log(Level.FINEST, "file ''{0}'' not accepted for annotation", file);
return null;
}

Repository repository = getRepository(file);
if (repository != null && hasAnnotation(file)) {
return repository.annotate(file, rev);
}

return null;
return repository.annotate(file, rev);
}

/**
Expand Down Expand Up @@ -681,35 +690,60 @@ public boolean hasHistoryCacheForFile(File file) {
}

/**
* Check if we can annotate the specified file.
*
* Check if annotation can be produced for the specified file. If related document is specified,
* it will be used for negative check. If the document indicates that the type of file is xref-able
* or the document is {@code null}, the capability to produce annotation for the file will be checked
* in related repository.
* @param file the file to check
* @return whether the file is under version control, can be annotated and the
* version control system supports annotation
* @param document {@link Document} object related to the file, can be {@code null}.
* @return whether the file can be annotated
*/
public boolean hasAnnotation(File file) {
public boolean hasAnnotation(File file, @Nullable Document document) {
if (file.isDirectory()) {
LOGGER.log(Level.FINEST, "no annotations for directories (''{0}'') to check annotation presence",
file);
LOGGER.log(Level.FINEST, "no annotations for directories (''{0}'')", file);
return false;
}

AbstractAnalyzer.Genre genre = AnalyzerGuru.getGenre(file.toString());
if (genre == null) {
LOGGER.log(Level.INFO, "will not produce annotation for ''{0}'' with unknown genre", file);
return false;
if (document != null) {
// The "T" field is added to the document currently only for xref-able input data,
// however it does not hurt to check in case this will change.
String fileType = document.get(QueryBuilder.T);
if (fileType == null || !isXrefable(fileType)) {
LOGGER.log(Level.FINEST, "no file type found in document for ''{0}'' or not xref-able", file);
return false;
}
}
if (genre.equals(AbstractAnalyzer.Genre.DATA) || genre.equals(AbstractAnalyzer.Genre.IMAGE)) {
LOGGER.log(Level.INFO, "no sense to produce annotation for binary file ''{0}''", file);

return hasAnnotationInRepo(file);
}

/**
* Check if annotation can be produced for the specified file. Wrapper of {@link #hasAnnotation(File, Document)}
* @param file the file to check
* @return whether the file can be annotated
*/
public boolean hasAnnotation(File file) {
if (file.isDirectory()) {
LOGGER.log(Level.FINEST, "no annotations for directories (''{0}'')", file);
return false;
}

Document document = null;
try {
document = getDocument(file);
} catch (ParseException | IOException e) {
LOGGER.log(Level.FINEST, String.format("cannot get document for '%s' to check annotation", file), e);
}

return hasAnnotation(file, document);
}

private boolean hasAnnotationInRepo(File file) {
Repository repo = getRepository(file);
if (repo == null) {
LOGGER.log(Level.FINEST, "cannot find repository for ''{0}'' to check annotation presence", file);
return false;
}

if (!repo.isWorking()) {
LOGGER.log(Level.FINEST, "repository {0} for ''{1}'' is not working to check annotation presence",
new Object[]{repo, file});
Expand Down Expand Up @@ -1116,7 +1150,7 @@ public void createAnnotationCache(File file, String latestRev) throws CacheExcep
LOGGER.log(Level.FINEST, "creating annotation cache for ''{0}''", file);
try {
Statistics statistics = new Statistics();
Annotation annotation = getAnnotationFromRepository(file, null);
Annotation annotation = getAnnotationFromRepository(file, null, repository);
statistics.report(LOGGER, Level.FINEST, String.format("retrieved annotation for ''%s''", file),
"annotation.retrieve.latency");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*
* Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.index;
Expand Down Expand Up @@ -1229,23 +1229,32 @@ private void addFile(File file, String path, Ctags ctags) throws IOException, In

setDirty();

createAnnotationCache(file, doc);

for (IndexChangedListener listener : listeners) {
listener.fileAdded(path, fa.getClass().getSimpleName());
}
}

private static void createAnnotationCache(File file, Document doc) {
if (!HistoryGuru.getInstance().hasAnnotation(file, doc)) {
LOGGER.log(Level.FINER, "skipped creating annotation cache for file ''{0}}''", file);
return;
}

String lastRev = doc.get(QueryBuilder.LASTREV);
if (lastRev != null) {
try {
// The last revision should be fresh. Using LatestRevisionUtil#getLatestRevision()
// would not work here, because it uses IndexDatabase#getDocument() and the index searcher used therein
// does not know about updated document yet, so stale revision would be returned.
// does not know about the updated document yet, so stale revision would be returned.
// Instead, use the last revision (retrieved from the history in the populateDocument()
// call above) directly.
HistoryGuru.getInstance().createAnnotationCache(file, lastRev);
} catch (CacheException e) {
LOGGER.log(e.getLevel(), "failed to create annotation", e);
}
}

for (IndexChangedListener listener : listeners) {
listener.fileAdded(path, fa.getClass().getSimpleName());
}
}

private AbstractAnalyzer getAnalyzerFor(File file, String path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@
*/

/*
* Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2024, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2019, 2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.history;

import static org.junit.jupiter.api.Assertions.assertAll;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
Expand All @@ -46,15 +47,24 @@
import java.util.List;
import java.util.stream.Collectors;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexNotFoundException;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.opengrok.indexer.analysis.AbstractAnalyzer;
import org.opengrok.indexer.analysis.AnalyzerGuru;
import org.opengrok.indexer.condition.EnabledForRepository;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.index.IndexDatabase;
import org.opengrok.indexer.search.DirectoryEntry;
import org.opengrok.indexer.search.QueryBuilder;
import org.opengrok.indexer.util.FileUtilities;
import org.opengrok.indexer.util.TestRepository;

Expand All @@ -71,6 +81,8 @@ class HistoryGuruTest {

private static int savedNestingMaximum;

HistoryGuru instance = HistoryGuru.getInstance();

@BeforeAll
static void setUpClass() throws Exception {
env = RuntimeEnvironment.getInstance();
Expand Down Expand Up @@ -139,14 +151,57 @@ void testBug16465() throws HistoryException, IOException {
}
}

@Test
void testAnnotationSmokeTest() throws Exception {
HistoryGuru instance = HistoryGuru.getInstance();
for (File f : FILES) {
if (instance.hasAnnotation(f)) {
assertNotNull(instance.annotate(f, null));
}
@ParameterizedTest
@ValueSource(booleans = {true, false})
void testHasAnnotationWithDocument(boolean isXrefable) {
File file = Paths.get(repository.getSourceRoot(), "git", "main.c").toFile();
assertTrue(file.isFile());
Document document = new Document();
String typeName;
if (isXrefable) {
typeName = AbstractAnalyzer.Genre.PLAIN.typeName();
} else {
typeName = AbstractAnalyzer.Genre.DATA.typeName();
}
assertEquals(isXrefable, AnalyzerGuru.isXrefable(typeName));
document.add(new Field(QueryBuilder.T, typeName, new FieldType(StringField.TYPE_STORED)));

/*
* This test class does not perform the 2nd phase of indexing, therefore getDocument() for any file
* should result in exception. This differentiates the two hasAnnotation() implementations.
*/
assertThrows(IndexNotFoundException.class, () -> IndexDatabase.getDocument(file));
assertTrue(instance.hasAnnotation(file));
assertEquals(isXrefable, instance.hasAnnotation(file, document));
}

/**
* Check that {@link HistoryGuru#hasAnnotation(File, Document)} falls back to repository check
* if the document is {@code null}. Complements the {@link #testHasAnnotationWithDocument(boolean)} test.
*/
@Test
void testHasAnnotationWithoutDocument() {
File file = Paths.get(repository.getSourceRoot(), "git", "main.c").toFile();
assertTrue(file.isFile());
Repository repositoryForGit = HistoryGuru.getInstance().getRepository(file);
assertNotNull(repositoryForGit);
assertTrue(repositoryForGit.isWorking());
var repoHasAnnotation = repositoryForGit.fileHasAnnotation(file);
assertAll(() -> assertEquals(repoHasAnnotation, instance.hasAnnotation(file, null)),
() -> assertEquals(repoHasAnnotation, instance.hasAnnotation(file)));
}

/**
* Simple smoke test for hasAnnotation(). Because of the way how {@link HistoryGuru#hasAnnotation(File)}
* works and given this test class does not perform 2nd phase of the indexing, there will be some binary
* files in the list. These should not be included if the 2nd indexing phase was done as the
* {@link HistoryGuru#hasAnnotation(File)} should use the index document for negative check.
*/
@Test
void testHasAnnotationSmokeTest() {
// If hasAnnotation() returns true for a file, it should be possible to actually construct the annotation.
List<File> filesWithAnnotation = FILES.stream().filter(instance::hasAnnotation).collect(Collectors.toList());
assertAll(filesWithAnnotation.stream().map(f -> () -> assertNotNull(instance.annotate(f, null))));
}

/**
Expand Down Expand Up @@ -184,6 +239,7 @@ void testAnnotationFallback() throws Exception {
assertNotNull(latestRev);
instance.createAnnotationCache(file, latestRev);
Repository repository = instance.getRepository(file);
assertNotNull(repository);

// Ensure the annotation is loaded from the cache by moving the dot git directory away.
final String tmpDirName = "gitdisabled";
Expand Down
Loading

0 comments on commit 6bb48a9

Please sign in to comment.