Skip to content

Commit

Permalink
Show WARNINGs about content classified as Huge Text
Browse files Browse the repository at this point in the history
  • Loading branch information
idodeclare committed Apr 21, 2020
1 parent 63525bd commit 5dca6b3
Show file tree
Hide file tree
Showing 16 changed files with 87 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2013, 2018 Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2018, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2018, 2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis;

Expand Down Expand Up @@ -52,6 +52,11 @@ public abstract class StreamSource {
*/
public abstract InputStream getStream() throws IOException;

/**
* Gets a reportable identifier of the source.
*/
public abstract String getSourceIdentifier();

/**
* Helper method that creates a {@code StreamSource} instance that
* reads data from a file.
Expand All @@ -65,6 +70,11 @@ public static StreamSource fromFile(final File file) {
public InputStream getStream() throws IOException {
return new BufferedInputStream(new FileInputStream(file));
}

@Override
public String getSourceIdentifier() {
return file.getAbsolutePath();
}
};
}

Expand All @@ -82,6 +92,11 @@ public static StreamSource fromString(final String str) {
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(sbuf);
}

@Override
public String getSourceIdentifier() {
return "String";
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,11 @@ public InputStream getStream() throws IOException {
throw new IOException("Not BZIP2 format");
}
}

@Override
public String getSourceIdentifier() {
return src.getSourceIdentifier();
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@
import org.opengrok.indexer.analysis.StreamSource;
import org.opengrok.indexer.analysis.data.HugeTextAnalyzerFactory;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
import org.opengrok.indexer.logger.LoggerFactory;
import org.opengrok.indexer.search.QueryBuilder;

import java.io.IOException;
import java.io.InputStream;
import java.io.Writer;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Represents a base for compressed formats (e.g. gzip or bzip2) but not for
Expand All @@ -46,6 +49,8 @@
*/
public abstract class CompressedAnalyzer extends FileAnalyzer {

private static final Logger LOGGER = LoggerFactory.getLogger(CompressedAnalyzer.class);

private static final int CHUNK_SIZE = 8 * 1024;

protected Genre g;
Expand All @@ -68,8 +73,13 @@ protected void analyzeUncompressed(

if (fa.getGenre() == Genre.PLAIN) {
if (meetsHugeTextThreshold(compressedSrc)) {
String origFileTypeName = fa.getFileTypeName();
fa = HugeTextAnalyzerFactory.DEFAULT_INSTANCE.getAnalyzer();
g = Genre.DATA;
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "{0} is compressed huge text: {1}",
new Object[]{origFileTypeName, compressedSrc.getSourceIdentifier()});
}
} else {
g = Genre.XREFABLE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ public InputStream getStream() throws IOException {
return new BufferedInputStream(
new GZIPInputStream(src.getStream()));
}

@Override
public String getSourceIdentifier() {
return src.getSourceIdentifier();
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,12 @@ private void addFile(File file, String path, Ctags ctags)

if (AbstractAnalyzer.Genre.PLAIN.equals(fa.getGenre()) &&
file.length() >= env.getHugeTextThresholdBytes()) {
String origFileTypeName = fa.getFileTypeName();
fa = HugeTextAnalyzerFactory.DEFAULT_INSTANCE.getAnalyzer();
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "{0} is huge text: {1}",
new Object[]{origFileTypeName, path});
}
}

for (IndexChangedListener listener : listeners) {
Expand Down Expand Up @@ -1832,8 +1837,8 @@ private boolean checkSettings(File file, String path) throws IOException {
// If it is a Huge Text file, re-check constraints.
if (AnalyzerGuru.getHugeTextFileTypeName().equals(fileTypeName) &&
file.length() < env.getHugeTextThresholdBytes()) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "{0} no longer qualifies: {1}",
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "{0} no longer qualifies: {1}",
new Object[]{fileTypeName, path});
}
return false;
Expand All @@ -1843,8 +1848,8 @@ private boolean checkSettings(File file, String path) throws IOException {
// If the Genre is PLAIN, re-check Huge Text file constraints.
if (AbstractAnalyzer.Genre.PLAIN.equals(fa.getGenre()) &&
file.length() >= env.getHugeTextThresholdBytes()) {
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.log(Level.FINE, "{0} is now a huge text file: {1}",
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "{0} is now huge text: {1}",
new Object[]{fileTypeName, path});
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2010, 2019, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
*/

package org.opengrok.indexer.analysis;
Expand Down Expand Up @@ -505,6 +505,11 @@ public void testJavaClassAnalyzer() throws Exception {
".class";
return StringWriter.class.getResourceAsStream(path);
}

@Override
public String getSourceIdentifier() {
return "StringWriter.class";
}
};
Document doc = new Document();
StringWriter out = new StringWriter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017, 2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis;

Expand Down Expand Up @@ -49,6 +49,11 @@ private static StreamSource getStreamSource(final byte[] bytes) {
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(bytes);
}

@Override
public String getSourceIdentifier() {
return "byte[]";
}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis.c;

Expand Down Expand Up @@ -60,15 +60,6 @@ public class CAnalyzerFactoryTest {
private static TestRepository repository;
private static AbstractAnalyzer analyzer;

private static StreamSource getStreamSource(final String fname) {
return new StreamSource() {
@Override
public InputStream getStream() throws IOException {
return new FileInputStream(fname);
}
};
}

@BeforeClass
public static void setUpClass() throws Exception {
ctags = new Ctags();
Expand Down Expand Up @@ -110,7 +101,7 @@ public void testScopeAnalyzer() throws Exception {
StringWriter xrefOut = new StringWriter();
analyzer.setCtags(ctags);
analyzer.setScopesEnabled(true);
analyzer.analyze(doc, getStreamSource(path), xrefOut);
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);

IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
assertNotNull(scopesField);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis.c;

Expand Down Expand Up @@ -60,15 +60,6 @@ public class CxxAnalyzerFactoryTest {
private static TestRepository repository;
private static AbstractAnalyzer analyzer;

private static StreamSource getStreamSource(final String fname) {
return new StreamSource() {
@Override
public InputStream getStream() throws IOException {
return new FileInputStream(fname);
}
};
}

@BeforeClass
public static void setUpClass() throws Exception {
ctags = new Ctags();
Expand Down Expand Up @@ -111,7 +102,7 @@ public void testScopeAnalyzer() throws Exception {
analyzer.setScopesEnabled(true);
System.out.println(path);

analyzer.analyze(doc, getStreamSource(path), xrefOut);
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);

IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
assertNotNull(scopesField);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2016, 2018 Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis.clojure;

Expand Down Expand Up @@ -57,15 +57,6 @@ public class ClojureAnalyzerFactoryTest {
private static TestRepository repository;
private static AbstractAnalyzer analyzer;

private static StreamSource getStreamSource(final String fname) {
return new StreamSource() {
@Override
public InputStream getStream() throws IOException {
return new FileInputStream(fname);
}
};
}

@BeforeClass
public static void setUpClass() throws Exception {
ctags = new Ctags();
Expand Down Expand Up @@ -106,7 +97,7 @@ public void testScopeAnalyzer() throws Exception {
string_ft_nstored_nanalyzed_norms));
StringWriter xrefOut = new StringWriter();
analyzer.setCtags(ctags);
analyzer.analyze(doc, getStreamSource(path), xrefOut);
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);

Definitions definitions = Definitions.deserialize(doc.getField(QueryBuilder.TAGS).binaryValue().bytes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis.csharp;

Expand Down Expand Up @@ -56,15 +56,6 @@ public class CSharpAnalyzerFactoryTest {
private static TestRepository repository;
private static AbstractAnalyzer analyzer;

private static StreamSource getStreamSource(final String fname) {
return new StreamSource() {
@Override
public InputStream getStream() throws IOException {
return new FileInputStream(fname);
}
};
}

@BeforeClass
public static void setUpClass() throws Exception {
ctags = new Ctags();
Expand Down Expand Up @@ -105,7 +96,7 @@ public void testScopeAnalyzer() throws Exception {
StringWriter xrefOut = new StringWriter();
analyzer.setCtags(ctags);
analyzer.setScopesEnabled(true);
analyzer.analyze(doc, getStreamSource(path), xrefOut);
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);

IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
assertNotNull(scopesField);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
/*
* Copyright (c) 2009, 2018, Oracle and/or its affiliates. All rights reserved.
* Portions copyright 2009 - 2011 Jens Elkner.
* Portions Copyright (c) 2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis.document;

Expand Down Expand Up @@ -124,6 +125,11 @@ public void testAnalyze() throws IOException {
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(content.getBytes());
}

@Override
public String getSourceIdentifier() {
return "String";
}
}, xrefOut);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/*
* Copyright (c) 2015, 2018 Oracle and/or its affiliates. All rights reserved.
* Portions Copyright (c) 2017-2019, Chris Fraire <cfraire@me.com>.
* Portions Copyright (c) 2017-2020, Chris Fraire <cfraire@me.com>.
*/
package org.opengrok.indexer.analysis.java;

Expand Down Expand Up @@ -60,15 +60,6 @@ public class JavaAnalyzerFactoryTest {
private static TestRepository repository;
private static AbstractAnalyzer analyzer;

private static StreamSource getStreamSource(final String fname) {
return new StreamSource() {
@Override
public InputStream getStream() throws IOException {
return new FileInputStream(fname);
}
};
}

@BeforeClass
public static void setUpClass() throws Exception {
ctags = new Ctags();
Expand Down Expand Up @@ -108,7 +99,7 @@ public void testScopeAnalyzer() throws Exception {
StringWriter xrefOut = new StringWriter();
analyzer.setCtags(ctags);
analyzer.setScopesEnabled(true);
analyzer.analyze(doc, getStreamSource(path), xrefOut);
analyzer.analyze(doc, StreamSource.fromFile(f), xrefOut);

IndexableField scopesField = doc.getField(QueryBuilder.SCOPES);
assertNotNull(scopesField);
Expand Down
Loading

0 comments on commit 5dca6b3

Please sign in to comment.