Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] Add a filter possibility to FileCollector #4956

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/pages/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ This is a {{ site.pmd.release_type }} release.
### 🐛 Fixed Issues
* core
* [#494](https://github.com/pmd/pmd/issues/494): \[core] Adopt JApiCmp to enforce control over API changes
* [#4942](https://github.com/pmd/pmd/issues/4942): \[core] CPD: `--skip-duplicate-files` has no effect (7.0.0 regression)
* cli
* [#4791](https://github.com/pmd/pmd/issues/4791): \[cli] Could not find or load main class
* [#4913](https://github.com/pmd/pmd/issues/4913): \[cli] cpd-gui closes immediately
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,21 @@

import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.sourceforge.pmd.AbstractConfiguration;
import net.sourceforge.pmd.cpd.CPDConfiguration;
import net.sourceforge.pmd.lang.document.FileCollector;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.InternalApiBridge;
Expand All @@ -36,6 +41,35 @@ private FileCollectionUtil() {

}

public static void collectFiles(CPDConfiguration cpdConfiguration, FileCollector collector) {
if (cpdConfiguration.isSkipDuplicates()) {
final Set<String> alreadyAddedFileNamesWithSize = new HashSet<>();
collector.setFileFilter(fileId -> {
Path path = Paths.get(fileId.getAbsolutePath());
if (!Files.isRegularFile(path)) {
// file is not a simple file, maybe inside a ZIP archive
// don't filter these out
LOG.debug("Path {} is not a regular file, skipping in fileFilter", path);
return true;
}

try {
String signature = path.getFileName() + "_" + Files.size(path);
if (!alreadyAddedFileNamesWithSize.add(signature)) {
LOG.info("Skipping {} since it appears to be a duplicate file and --skip-duplicate-files is set",
path);
return false;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return true;
});
}

collectFiles((AbstractConfiguration) cpdConfiguration, collector);
}

public static void collectFiles(AbstractConfiguration configuration, FileCollector collector) {
if (configuration.getSourceEncoding() != null) {
collector.setCharset(configuration.getSourceEncoding());
Expand Down Expand Up @@ -77,23 +111,23 @@ public static void collectFiles(FileCollector collector, List<Path> filePaths) {
try {
addRoot(collector, rootLocation);
} catch (IOException e) {
collector.getReporter().errorEx("Error collecting " + rootLocation, e);
collector.getReporter().errorEx("Error collecting {0}", new Object[]{ rootLocation }, e);
}
}
}

public static void collectFileList(FileCollector collector, Path fileList) {
LOG.debug("Reading file list {}.", fileList);
if (!Files.exists(fileList)) {
collector.getReporter().error("No such file {}", fileList);
collector.getReporter().error("No such file {0}", fileList);
return;
}

List<Path> filePaths;
try {
filePaths = FileUtil.readFilelistEntries(fileList);
} catch (IOException e) {
collector.getReporter().errorEx("Error reading {}", new Object[] { fileList }, e);
collector.getReporter().errorEx("Error reading {0}", new Object[] { fileList }, e);
adangel marked this conversation as resolved.
Show resolved Hide resolved
return;
}
collectFiles(collector, filePaths);
Expand Down Expand Up @@ -135,15 +169,15 @@ public static void collectDB(FileCollector collector, URI uri) {
String source = IOUtil.readToString(sourceCode);
collector.addSourceFile(FileId.fromPathLikeString(falseFilePath), source);
} catch (SQLException ex) {
collector.getReporter().warnEx("Cannot get SourceCode for {} - skipping ...",
collector.getReporter().warnEx("Cannot get SourceCode for {0} - skipping ...",
new Object[] { falseFilePath },
ex);
}
}
} catch (ClassNotFoundException e) {
collector.getReporter().errorEx("Cannot get files from DB - probably missing database JDBC driver", e);
} catch (Exception e) {
collector.getReporter().errorEx("Cannot get files from DB - ''{}''", new Object[] { uri }, e);
collector.getReporter().errorEx("Cannot get files from DB - ''{0}''", new Object[] { uri }, e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -60,6 +61,7 @@ public final class FileCollector implements AutoCloseable {
private final FileId outerFsPath;
private boolean closed;
private boolean recursive = true;
private Predicate<FileId> fileFilter = file -> true;

// construction

Expand Down Expand Up @@ -206,6 +208,12 @@ && addFileImpl(TextFile.builderForCharSeq(sourceContents, fileId, version)

private boolean addFileImpl(TextFile textFile) {
LOG.trace("Adding file {} (lang: {}) ", textFile.getFileId().getAbsolutePath(), textFile.getLanguageVersion().getTerseName());

if (!fileFilter.test(textFile.getFileId())) {
LOG.trace("File was skipped due to fileFilter...");
return false;
}

if (allFilesToProcess.add(textFile)) {
return true;
}
Expand Down Expand Up @@ -377,6 +385,17 @@ public void setCharset(Charset charset) {
this.charset = Objects.requireNonNull(charset);
}

/**
* Sets an additional filter that is being called before adding the
* file to the list.
*
* @param fileFilter the filter should return {@code true} if the file
* should be collected and analyzed.
* @throws NullPointerException if {@code fileFilter} is {@code null}.
*/
public void setFileFilter(Predicate<FileId> fileFilter) {
this.fileFilter = Objects.requireNonNull(fileFilter);
}

// filtering

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang3.SystemUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledOnOs;
import org.junit.jupiter.api.condition.OS;
import org.junit.jupiter.api.io.TempDir;

import net.sourceforge.pmd.lang.DummyLanguageModule;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.TextFile;

/**
* Unit test for {@link CpdAnalysis}
Expand All @@ -29,14 +36,13 @@ class CpdAnalysisTest {
private static final String BASE_TEST_RESOURCE_PATH = "src/test/resources/net/sourceforge/pmd/cpd/files/";
private static final String TARGET_TEST_RESOURCE_PATH = "target/classes/net/sourceforge/pmd/cpd/files/";

@TempDir
private Path tempDir;

// Symlinks are not well supported under Windows - so the tests are
// simply executed only on linux.
private boolean canTestSymLinks = SystemUtils.IS_OS_UNIX;
CPDConfiguration config = new CPDConfiguration();
private CPDConfiguration config = new CPDConfiguration();

@BeforeEach
void setup() throws Exception {
void setup() {
config.setOnlyRecognizeLanguage(DummyLanguageModule.getInstance());
config.setMinimumTileSize(10);
}
Expand All @@ -49,8 +55,6 @@ void setup() throws Exception {
* any error
*/
private void prepareSymLinks() throws Exception {
assumeTrue(canTestSymLinks, "Skipping unit tests with symlinks.");

Runtime runtime = Runtime.getRuntime();
if (!new File(TARGET_TEST_RESOURCE_PATH, "symlink-for-real-file.txt").exists()) {
runtime.exec(new String[] { "ln", "-s", BASE_TEST_RESOURCE_PATH + "real-file.txt",
Expand All @@ -70,6 +74,7 @@ private void prepareSymLinks() throws Exception {
* any error
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testFileSectionWithBrokenSymlinks() throws Exception {
prepareSymLinks();

Expand All @@ -91,6 +96,7 @@ void testFileSectionWithBrokenSymlinks() throws Exception {
* any error
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testFileAddedAsSymlinkAndReal() throws Exception {
prepareSymLinks();

Expand All @@ -109,6 +115,7 @@ void testFileAddedAsSymlinkAndReal() throws Exception {
* A file should be not be added via a sym link.
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testNoFileAddedAsSymlink() throws Exception {
prepareSymLinks();

Expand Down Expand Up @@ -180,6 +187,29 @@ void testFileOrderRelevance() throws Exception {
}
}

@Test
void duplicatedFilesShouldBeSkipped() throws IOException {
String filename = "file1.dummy";
Path aFile1 = Files.createDirectory(tempDir.resolve("a")).resolve(filename).toAbsolutePath();
Path bFile1 = Files.createDirectory(tempDir.resolve("b")).resolve(filename).toAbsolutePath();

Files.write(aFile1, "Same content".getBytes(StandardCharsets.UTF_8));
Files.write(bFile1, "Same content".getBytes(StandardCharsets.UTF_8));

config.setSkipDuplicates(true);
config.setInputPathList(Arrays.asList(tempDir));
try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
List<TextFile> collectedFiles = cpd.files().getCollectedFiles();
collectedFiles.stream().map(TextFile::getFileId).map(FileId::getAbsolutePath).forEach(System.out::println);
assertEquals(1, collectedFiles.size());

// the order of directory traversal is most likely not defined, so either one
// of the two files might be added, but not both
String collectedFile = collectedFiles.get(0).getFileId().getAbsolutePath();
assertTrue(collectedFile.equals(aFile1.toString()) || collectedFile.equals(bFile1.toString()));
}
}

/**
* Simple listener that fails, if too many files were added and not skipped.
*/
Expand Down