Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cpp] Add option to ignore sequences of literals #2963

Merged
merged 5 commits into from Jan 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 10 additions & 1 deletion pmd-core/src/main/java/net/sourceforge/pmd/cpd/GUI.java
Expand Up @@ -167,7 +167,16 @@ public boolean canIgnoreUsings() {

@Override
public boolean canIgnoreLiteralSequences() {
return "cs".equals(terseName);
if (terseName == null) {
return false;
}
switch(terseName) {
case "cpp":
case "cs":
return true;
default:
return false;
}
}
};
}
Expand Down
Expand Up @@ -132,6 +132,7 @@ private void computeEndCoords() {
}
}

@Override
public int getKind() {
return token.getType();
}
Expand Down
Expand Up @@ -4,6 +4,8 @@

package net.sourceforge.pmd.lang.ast;

import net.sourceforge.pmd.annotation.Experimental;

/**
* Represents a language-independent token such as constants, values language reserved keywords, or comments.
*/
Expand Down Expand Up @@ -51,4 +53,12 @@ public interface GenericToken {
* @return a non-negative integer containing the begin column
*/
int getEndColumn();

/**
* Gets a unique integer representing the kind of token this is.
*
* The semantics of this kind depend on the language.
*/
@Experimental
int getKind();
adangel marked this conversation as resolved.
Show resolved Hide resolved
}
Expand Up @@ -63,6 +63,11 @@ public int getBeginColumn() {
public int getEndColumn() {
return 0;
}

@Override
public int getKind() {
return 0;
}
}

class StringTokenManager implements TokenManager {
Expand Down
4 changes: 3 additions & 1 deletion pmd-cpp/etc/grammar/cpp.jj
Expand Up @@ -284,12 +284,14 @@ TOKEN :

TOKEN:
{
< #DECIMALDIGIT: ["0"-"9"] >
< #BINARYDIGIT: ["0"-"1"] >
| < #OCTALDIGIT: ["0"-"7"] >
| < #DECIMALDIGIT: ["0"-"9"] >
| < #HEXDIGIT: ["a"-"f", "A"-"F", "0"-"9"] >
| < #INT_SUFFIX: ["u", "U", "l", "L"] | "uL" | "Ul" | "UL" | "ul" | "lu" | "Lu" | "lU" | "LU" >

| < ZERO: "0" >
| < BINARY_INT_LITERAL: "0" ["b", "B"] ("'" | <BINARYDIGIT>)+ (<INT_SUFFIX>)? >
| < OCTAL_INT_LITERAL: "0" ("'" | <OCTALDIGIT>)+ (<INT_SUFFIX>)? >
| < DECIMAL_INT_LITERAL: ["1"-"9"] ("'" | <DECIMALDIGIT>)* (<INT_SUFFIX>)? >
| < HEXADECIMAL_INT_LITERAL: "0" ["x", "X"] <HEXDIGIT> ("'" | <HEXDIGIT>)+ (<INT_SUFFIX>)? >
Expand Down
5 changes: 5 additions & 0 deletions pmd-cpp/src/main/ant/alljavacc.xml
Expand Up @@ -88,6 +88,11 @@ public class Token implements GenericToken, java.io.Serializable]]></replacevalu
return endColumn;
}

@Override
public int getKind() {
return kind;
}

]]></replacevalue>
</replace>

Expand Down
91 changes: 89 additions & 2 deletions pmd-cpp/src/main/java/net/sourceforge/pmd/cpd/CPPTokenizer.java
Expand Up @@ -11,8 +11,12 @@

import net.sourceforge.pmd.PMD;
import net.sourceforge.pmd.cpd.internal.JavaCCTokenizer;
import net.sourceforge.pmd.cpd.token.JavaCCTokenFilter;
import net.sourceforge.pmd.cpd.token.TokenFilter;
import net.sourceforge.pmd.lang.TokenManager;
import net.sourceforge.pmd.lang.ast.GenericToken;
import net.sourceforge.pmd.lang.cpp.CppTokenManager;
import net.sourceforge.pmd.lang.cpp.ast.CppParserConstants;
import net.sourceforge.pmd.util.IOUtil;

/**
Expand All @@ -23,14 +27,15 @@ public class CPPTokenizer extends JavaCCTokenizer {
private boolean skipBlocks = true;
private String skipBlocksStart;
private String skipBlocksEnd;
private boolean ignoreLiteralSequences = false;

/**
* Sets the possible options for the C++ tokenizer.
*
* @param properties
* the properties
* @param properties the properties
* @see #OPTION_SKIP_BLOCKS
* @see #OPTION_SKIP_BLOCKS_PATTERN
adangel marked this conversation as resolved.
Show resolved Hide resolved
* @see #OPTION_IGNORE_LITERAL_SEQUENCES
*/
public void setProperties(Properties properties) {
skipBlocks = Boolean.parseBoolean(properties.getProperty(OPTION_SKIP_BLOCKS, Boolean.TRUE.toString()));
Expand All @@ -44,6 +49,7 @@ public void setProperties(Properties properties) {
skipBlocksEnd = split[1];
}
}
ignoreLiteralSequences = Boolean.parseBoolean(properties.getProperty(OPTION_IGNORE_LITERAL_SEQUENCES, "false"));
}

private String maybeSkipBlocks(String test) throws IOException {
Expand Down Expand Up @@ -80,4 +86,85 @@ protected TokenManager getLexerForSource(SourceCode sourceCode) {
throw new RuntimeException(e);
}
}

@Override
protected TokenFilter getTokenFilter(final TokenManager tokenManager) {
return new CppTokenFilter(tokenManager, ignoreLiteralSequences);
}

private static class CppTokenFilter extends JavaCCTokenFilter {
private final boolean ignoreLiteralSequences;
private GenericToken discardingLiteralsUntil = null;
private boolean discardCurrent = false;

CppTokenFilter(final TokenManager tokenManager, final boolean ignoreLiteralSequences) {
super(tokenManager);
this.ignoreLiteralSequences = ignoreLiteralSequences;
}

@Override
protected void analyzeTokens(final GenericToken currentToken, final Iterable<GenericToken> remainingTokens) {
discardCurrent = false;
skipLiteralSequences(currentToken, remainingTokens);
}

private void skipLiteralSequences(final GenericToken currentToken, final Iterable<GenericToken> remainingTokens) {
if (ignoreLiteralSequences) {
final int kind = currentToken.getKind();
if (isDiscardingLiterals()) {
if (currentToken == discardingLiteralsUntil) { // NOPMD - intentional check for reference equality
discardingLiteralsUntil = null;
discardCurrent = true;
}
} else if (kind == CppParserConstants.LCURLYBRACE) {
final GenericToken finalToken = findEndOfSequenceOfLiterals(remainingTokens);
discardingLiteralsUntil = finalToken;
}
}
}

private static GenericToken findEndOfSequenceOfLiterals(final Iterable<GenericToken> remainingTokens) {
boolean seenLiteral = false;
int braceCount = 0;
for (final GenericToken token : remainingTokens) {
switch (token.getKind()) {
case CppParserConstants.BINARY_INT_LITERAL:
case CppParserConstants.DECIMAL_INT_LITERAL:
case CppParserConstants.FLOAT_LITERAL:
case CppParserConstants.HEXADECIMAL_INT_LITERAL:
case CppParserConstants.OCTAL_INT_LITERAL:
case CppParserConstants.ZERO:
seenLiteral = true;
break; // can be skipped; continue to the next token
case CppParserConstants.COMMA:
break; // can be skipped; continue to the next token
case CppParserConstants.LCURLYBRACE:
braceCount++;
break; // curly braces are allowed, as long as they're balanced
case CppParserConstants.RCURLYBRACE:
braceCount--;
if (braceCount < 0) {
// end of the list; skip all contents
return seenLiteral ? token : null;
} else {
// curly braces are not yet balanced; continue to the next token
break;
}
default:
// some other token than the expected ones; this is not a sequence of literals
return null;
}
}
return null;
}

private boolean isDiscardingLiterals() {
return discardingLiteralsUntil != null;
}

@Override
protected boolean isLanguageSpecificDiscarding() {
return isDiscardingLiterals() || discardCurrent;
}
}
}
Expand Up @@ -129,25 +129,39 @@ public void testTabWidth() {
doTest("tabWidth");
}

@Test
public void testLongListsOfNumbersAreNotIgnored() {
doTest("listOfNumbers");
}

@Test
public void testLongListsOfNumbersAreIgnored() {
doTest("listOfNumbers", "_ignored", skipLiteralSequences());
}

private static Properties skipBlocks(String skipPattern) {
return properties(true, skipPattern);
return properties(true, skipPattern, false);
}

private static Properties skipBlocks() {
return skipBlocks(null);
}

private static Properties dontSkipBlocks() {
return properties(false, null);
return properties(false, null, false);
}

private static Properties skipLiteralSequences() {
return properties(false, null, true);
}

private static Properties properties(boolean skipBlocks, String skipPattern) {
private static Properties properties(boolean skipBlocks, String skipPattern, boolean skipLiteralSequences) {
Properties properties = new Properties();
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS, Boolean.toString(skipBlocks));
if (skipPattern != null) {
properties.setProperty(Tokenizer.OPTION_SKIP_BLOCKS_PATTERN, skipPattern);
}
properties.setProperty(Tokenizer.OPTION_IGNORE_LITERAL_SEQUENCES, Boolean.toString(skipLiteralSequences));
return properties;
}
}
@@ -0,0 +1,26 @@
#include <iostream>
int main() {
int a[50] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
double b[14] = {
157, // decimal literal
0377, // octal literal
36'000'000, // literal with digit separators
0x3fff, // hexadecimal literal
0X3FFF, // same hexadecimal literal
328u, // unsigned value
0x7FFFFFL, // long value
0776745ul, // unsigned long value
18.46, // double with number after decimal point
38., // double without number after decimal point
18.46e0, // double with exponent
18.46e1, // double with exponent
0B001101, // C++ 14 binary literal
0b000001, // C++ 14 binary literal
};
int c[3][4] = {{0,1,2,3},{4,5,6,7},{8,9,10,11}}; // multi-dimensional array
int d[3] = {a, a, a}; // identifiers should not be filtered out
int e[1][3] = {{a, a, a}}; // identifiers in multi-dimensional array
int f[1] = {main()}; // method invocations should not be filtered out
int g[1][1] = {{main()}}; // method invocation in multi-dimensional array
return 0;
}