Skip to content

Commit

Permalink
[cpd] Add ignore literals and identifiers capability to C++
Browse files Browse the repository at this point in the history
  • Loading branch information
jdupak committed May 24, 2024
1 parent 2c64541 commit 47d5fbf
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
4 changes: 2 additions & 2 deletions docs/pages/pmd/userdocs/cpd/cpd.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ exactly identical.
{% include custom/cli_option_row.html options="--ignore-literals"
description="Ignore literal values such as numbers and strings when comparing text.
By default, literals are not ignored."
languages="Java"
languages="Java, C++"
%}
{% include custom/cli_option_row.html options="--ignore-literal-sequences"
description="Ignore sequences of literals such as list initializers.
Expand All @@ -168,7 +168,7 @@ exactly identical.
{% include custom/cli_option_row.html options="--ignore-identifiers"
description="Ignore names of classes, methods, variables, constants, etc. when comparing text.
By default, identifier names are not ignored."
languages="Java"
languages="Java, C++"
%}
{% include custom/cli_option_row.html options="--ignore-annotations"
description="Ignore language annotations (Java) or attributes (C#) when comparing text.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public LanguagePropertyBundle newPropertyBundle() {
LanguagePropertyBundle bundle = super.newPropertyBundle();
bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES);
bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES);
bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS);
bundle.definePropertyDescriptor(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS);
bundle.definePropertyDescriptor(CPD_SKIP_BLOCKS);
return bundle;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
import org.apache.commons.lang3.StringUtils;

import net.sourceforge.pmd.cpd.CpdLanguageProperties;
import net.sourceforge.pmd.cpd.impl.CpdLexerBase;
import net.sourceforge.pmd.cpd.TokenFactory;
import net.sourceforge.pmd.cpd.impl.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.impl.JavaccCpdLexer;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.impl.javacc.CharStream;
Expand All @@ -26,17 +27,21 @@
*
* <p>Note: This class has been called CPPTokenizer in PMD 6</p>.
*/
public class CppCpdLexer extends CpdLexerBase<JavaccToken> {
public class CppCpdLexer extends JavaccCpdLexer {

private boolean skipBlocks;
private Pattern skipBlocksStart;
private Pattern skipBlocksEnd;
private final boolean ignoreIdentifierAndLiteralSeqences;
private final boolean ignoreLiteralSequences;
private final boolean ignoreLiterals;
private final boolean ignoreIdentifiers;

public CppCpdLexer(LanguagePropertyBundle cppProperties) {
ignoreLiteralSequences = cppProperties.getProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES);
ignoreIdentifierAndLiteralSeqences = cppProperties.getProperty(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES);
ignoreLiterals = cppProperties.getProperty(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS);
ignoreIdentifiers = cppProperties.getProperty(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS);
String skipBlocksPattern = cppProperties.getProperty(CppLanguageModule.CPD_SKIP_BLOCKS);
if (StringUtils.isNotBlank(skipBlocksPattern)) {
skipBlocks = true;
Expand Down Expand Up @@ -73,6 +78,23 @@ protected TokenManager<JavaccToken> filterTokenStream(final TokenManager<JavaccT
return new CppTokenFilter(tokenManager, ignoreLiteralSequences, ignoreIdentifierAndLiteralSeqences);
}

@Override
protected void processToken(TokenFactory tokenEntries, JavaccToken currentToken) {
int kind = currentToken.getKind();
String image = currentToken.getImage();

boolean isLiteral = kind == CppTokenKinds.STRING || kind == CppTokenKinds.CHARACTER || kind == CppTokenKinds.DECIMAL_INT_LITERAL || kind == CppTokenKinds.HEXADECIMAL_INT_LITERAL || kind == CppTokenKinds.OCTAL_INT_LITERAL || kind == CppTokenKinds.FLOAT_LITERAL || kind == CppTokenKinds.BINARY_INT_LITERAL || kind == CppTokenKinds.ZERO;
if (ignoreLiterals && isLiteral) {
image = CppTokenKinds.describe(kind);
}

if (ignoreIdentifiers && (kind == CppTokenKinds.ID)) {
image = CppTokenKinds.describe(kind);
}

tokenEntries.recordToken(image, currentToken.getReportLocation());
}

private static class CppTokenFilter extends JavaCCTokenFilter {

private final boolean ignoreLiteralSequences;
Expand Down

0 comments on commit 47d5fbf

Please sign in to comment.