From ba776a42affccf0aa013b44a24de4e23c1477069 Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Tue, 28 Mar 2023 16:18:38 -0300 Subject: [PATCH 1/7] Clean up JavadocTokenizer --- .../sun/tools/javac/parser/JavaTokenizer.java | 180 +++++++++--------- .../tools/javac/parser/JavadocTokenizer.java | 152 +++++++++++++-- .../com/sun/tools/javac/parser/Tokens.java | 6 +- .../sun/tools/javac/parser/UnicodeReader.java | 135 +++---------- 4 files changed, 248 insertions(+), 225 deletions(-) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java index 833d945642159..e2bbf7ec4e805 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java @@ -307,6 +307,15 @@ protected boolean acceptOneOfThenPut(char ch1, char ch2) { return false; } + /** + * Test if the current character is a line terminator. + * + * @return true if current character is a line terminator. + */ + private boolean isEOLN() { + return isOneOf('\n', '\r'); + } + /** * Skip and process a line terminator sequence. */ @@ -1085,7 +1094,7 @@ public Token readToken() { if (scannerDebug) { System.out.println("nextToken(" + pos + "," + endPos + ")=|" + - getRawString(pos, endPos) + new String(getRawCharacters(pos, endPos)) + "|"); } } @@ -1137,11 +1146,13 @@ protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) if (scannerDebug) { System.out.println("processComment(" + pos + "," + endPos + "," + style + ")=|" - + getRawString(pos, endPos) + + new String(getRawCharacters(pos, endPos)) + "|"); } - return new BasicComment(style,this, pos, endPos); + char[] buf = getRawCharacters(pos, endPos); + + return new BasicComment(style, fac, buf, pos); } /** @@ -1156,8 +1167,8 @@ protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) protected void processWhiteSpace(int pos, int endPos) { if (scannerDebug) { System.out.println("processWhitespace(" + pos - + "," + endPos + ")=|" - + getRawString(pos, endPos) + + "," + endPos + ")=|" + + new String(getRawCharacters(pos, endPos)) + "|"); } } @@ -1171,8 +1182,8 @@ protected void processWhiteSpace(int pos, int endPos) { protected void processLineTerminator(int pos, int endPos) { if (scannerDebug) { System.out.println("processTerminator(" + pos - + "," + endPos + ")=|" - + getRawString(pos, endPos) + + "," + endPos + ")=|" + + new String(getRawCharacters(pos, endPos)) + "|"); } } @@ -1195,6 +1206,9 @@ public Position.LineMap getLineMap() { protected static class BasicComment extends PositionTrackingReader implements Comment { /** * Style of comment + * LINE starting with // + * BLOCK starting with /* + * JAVADOC starting with /** */ CommentStyle cs; @@ -1211,13 +1225,13 @@ protected static class BasicComment extends PositionTrackingReader implements Co /** * Constructor. * - * @param cs comment style - * @param reader existing reader - * @param pos start of meaningful content in buffer. - * @param endPos end of meaningful content in buffer. + * @param cs comment style + * @param sf Scan factory. + * @param array Array containing contents of source. + * @param offset Position offset in original source buffer. */ - protected BasicComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) { - super(reader, pos, endPos); + protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) { + super(sf, array, offset); this.cs = cs; } @@ -1233,7 +1247,8 @@ public String getText() { /** * Return buffer position in original buffer mapped from buffer position in comment. * - * @param pos buffer position in comment. + * @param pos buffer position in comment. + * * @return buffer position in original buffer. */ public int getSourcePos(int pos) { @@ -1242,8 +1257,11 @@ public int getSourcePos(int pos) { /** * Return style of comment. + * LINE starting with // + * BLOCK starting with /* + * JAVADOC starting with /** * - * @return style of comment. + * @return */ public CommentStyle getStyle() { return cs; @@ -1255,104 +1273,76 @@ public CommentStyle getStyle() { * @return true if comment contains @deprecated. */ public boolean isDeprecated() { - if (!scanned) { + if (!scanned && cs == CommentStyle.JAVADOC) { scanDocComment(); } + return deprecatedFlag; } /** - * Detect the deprecated tag. - * - * @param line line reader - * - * @return true if deprecated tag is present. + * Scan JAVADOC comment for details. */ - protected boolean hasDeprecated(UnicodeReader line) { - return line.accept("@deprecated") && - (line.isWhitespace() || line.is('*') || line.isEOLN()); - } - - /** - * Remove closing star(s) slash from comment. - * - * @param line line reader - * - * @return new line reader if detected otherwise original line reader. - */ - UnicodeReader trimEndOfComment(UnicodeReader line) { - int pos = line.position(); + protected void scanDocComment() { + try { + boolean deprecatedPrefix = false; + accept("/**"); - while (line.isAvailable()) { - if (line.is('*')) { - int endPos = line.position(); - line.skip('*'); + forEachLine: + while (isAvailable()) { + // Skip optional WhiteSpace at beginning of line + skipWhitespace(); - if (line.is('/')) { - return line.lineReader(pos, endPos); + // Skip optional consecutive Stars + while (accept('*')) { + if (is('/')) { + return; + } } - } else { - line.next(); - } - } - line.reset(pos); + // Skip optional WhiteSpace after Stars + skipWhitespace(); - return line; - } + // At beginning of line in the JavaDoc sense. + deprecatedPrefix = deprecatedFlag || accept("@deprecated"); - /** - * Trim the first part of the JavaDoc comment. - * - * @param line line reader - * - * @return modified line reader - */ - UnicodeReader trimJavadocComment(UnicodeReader line) { - int pos = line.position(); - line.skipWhitespace(); + if (deprecatedPrefix && isAvailable()) { + if (Character.isWhitespace(get())) { + deprecatedFlag = true; + } else if (accept('*')) { + if (is('/')) { + deprecatedFlag = true; + return; + } + } + } - if (line.skip('*') == 0) { - line.reset(pos); - } + // Skip rest of line + while (isAvailable()) { + switch (get()) { + case '*': + next(); - return trimEndOfComment(line); - } + if (is('/')) { + return; + } - /** - * Put the line into the buffer. - * - * @param line line reader - */ - protected void putLine(UnicodeReader line) { - // ignore overridden in subclass - } + break; + case '\r': // (Spec 3.4) + case '\n': // (Spec 3.4) + accept('\r'); + accept('\n'); + continue forEachLine; - /** - * Scan document comment for content. - */ - protected void scanDocComment() { - if (!scanned) { - deprecatedFlag = false; + default: + next(); + break; + } + } // rest of line + } // forEachLine + return; + } finally { scanned = true; - - if (!accept("/**")) { - return; - } - - while (isAvailable()) { - UnicodeReader line = lineReader(); - line = trimJavadocComment(line); - - // If standalone @deprecated tag - int pos = line.position(); - if (hasDeprecated(line)) { - deprecatedFlag = true; - } - - line.reset(pos); - putLine(line); - } } } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java index c8e00e1abc4d0..748f0d9875ec0 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java @@ -79,7 +79,8 @@ protected JavadocTokenizer(ScannerFactory fac, char[] array, int length) { @Override protected Comment processComment(int pos, int endPos, CommentStyle style) { - return new JavadocComment(style, this, pos, endPos); + char[] buf = getRawCharacters(pos, endPos); + return new JavadocComment(style, fac, buf, pos); } /** @@ -87,6 +88,13 @@ protected Comment processComment(int pos, int endPos, CommentStyle style) { * of a Javadoc comment. */ protected static class JavadocComment extends BasicComment { + /** + * Pattern used to detect a well formed @deprecated tag in a Javadoc + * comment. + */ + private static final Pattern DEPRECATED_PATTERN = + Pattern.compile("(?sm).*^\\s*@deprecated( |$).*"); + /** * The relevant portion of the comment that is of interest to Javadoc. * Produced by invoking scanDocComment. @@ -104,35 +112,45 @@ protected static class JavadocComment extends BasicComment { */ OffsetMap offsetMap = new OffsetMap(); - JavadocComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) { - super(cs, reader, pos, endPos); + JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) { + super( cs, sf, array, offset); this.sb = new StringBuilder(); } /** - * Add current character or code point from line to the extraction buffer. + * Add a character to the extraction buffer. * - * @param line line reader + * @param ch character to add. */ - protected void putLine(UnicodeReader line) { - while (line.isAvailable()) { - offsetMap.add(sb.length(), line.position()); + protected void put(char ch) { + offsetMap.add(sb.length(), offsetPosition()); + sb.append(ch); + } - if (line.isSurrogate()) { - sb.appendCodePoint(line.getCodepoint()); - } else { - sb.append(line.get()); - } + /** + * Add a code point to the extraction buffer. + * + * @param codePoint code point to add. + */ + protected void putCodePoint(int codePoint) { + offsetMap.add(sb.length(), offsetPosition()); + sb.appendCodePoint(codePoint); + } - line.next(); + /** + * Add current character or code point to the extraction buffer. + */ + protected void put() { + if (isSurrogate()) { + putCodePoint(getCodepoint()); + } else { + put(get()); } - offsetMap.add(sb.length(), line.position()); - sb.append('\n'); } @Override public String getText() { - if (!scanned) { + if (!scanned && cs == CommentStyle.JAVADOC) { scanDocComment(); } return docComment; @@ -153,10 +171,104 @@ public int getSourcePos(int pos) { @Override protected void scanDocComment() { - try { - super.scanDocComment(); + try { + boolean firstLine = true; + + // Skip over /* + accept("/*"); + + // Consume any number of stars + skip('*'); + + // Is the comment in the form /**/, /***/, /****/, etc. ? + if (is('/')) { + docComment = ""; + return; + } + + // Skip line terminator on the first line of the comment. + if (isOneOf('\n', '\r')) { + accept('\r'); + accept('\n'); + firstLine = false; + } + + outerLoop: + // The outerLoop processes the doc comment, looping once + // for each line. For each line, it first strips off + // whitespace, then it consumes any stars, then it + // puts the rest of the line into the extraction buffer. + while (isAvailable()) { + int begin_pos = position(); + // Consume whitespace from the beginning of each line. + skipWhitespace(); + // Are there stars here? If so, consume them all + // and check for the end of comment. + if (is('*')) { + // skip all of the stars + skip('*'); + + // check for the closing slash. + if (accept('/')) { + // We're done with the Javadoc comment + break outerLoop; + } + } else if (!firstLine) { + // The current line does not begin with a '*' so we will + // treat it as comment + reset(begin_pos); + } + + textLoop: + // The textLoop processes the rest of the characters + // on the line, adding them to the extraction buffer. + while (isAvailable()) { + if (accept("*/")) { + // This is the end of the comment, return + // the contents of the extraction buffer. + break outerLoop; + } else if (isOneOf('\n', '\r')) { + // We've seen a newline. Add it to our + // buffer and break out of this loop, + // starting fresh on a new line. + put('\n'); + accept('\r'); + accept('\n'); + break textLoop; + } else if (is('\f')){ + next(); + break textLoop; // treat as end of line + + } else { + // Add the character to our buffer. + put(); + next(); + } + } // end textLoop + firstLine = false; + } // end outerLoop + + // If extraction buffer is not empty. + if (sb.length() > 0) { + // Remove trailing asterisks. + int i = sb.length() - 1; + while (i > -1 && sb.charAt(i) == '*') { + i--; + } + sb.setLength(i + 1) ; + + // Store the text of the doc comment + docComment = sb.toString(); + } else { + docComment = ""; + } } finally { - docComment = sb.toString(); + scanned = true; + + // Check if comment contains @deprecated comment. + if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) { + deprecatedFlag = true; + } } } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java index e513cab734c69..c5924c44aa399 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java @@ -273,9 +273,9 @@ public boolean test(TokenKind that) { public interface Comment { enum CommentStyle { - LINE, // Starting with // - BLOCK, // starting with /* - JAVADOC, // starting with /** + LINE, + BLOCK, + JAVADOC, } String getText(); diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java index 20dda34d1f8d8..c21d08e9d86a1 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java @@ -104,39 +104,15 @@ public class UnicodeReader { */ @SuppressWarnings("this-escape") protected UnicodeReader(ScannerFactory sf, char[] array, int length) { - this(sf.log, array, length); - } - - /** - * Constructor. - * - * @param log Log for error reporting. - * @param array array containing contents of source. - * @param length length of meaningful content in buffer. - */ - protected UnicodeReader(Log log, char[] array, int length) { - this(log, array, 0, length); - } - - /** - * Constructor. - * - * @param log Log for error reporting. - * @param array array containing contents of source. - * @param pos start of meaningful content in buffer. - * @param endPos end of meaningful content in buffer. - */ - @SuppressWarnings("this-escape") - protected UnicodeReader(Log log, char[] array, int pos, int endPos) { this.buffer = array; - this.length = endPos; - this.position = pos; + this.length = length; + this.position = 0; this.width = 0; this.character = '\0'; this.codepoint = 0; this.wasBackslash = false; this.wasUnicodeEscape = false; - this.log = log; + this.log = sf.log; nextCodePoint(); } @@ -450,68 +426,25 @@ protected boolean acceptOneOf(char ch1, char ch2) { return false; } - /** - * Match one of the arguments and advance if a match. Returns true if a match. - */ protected boolean acceptOneOf(char ch1, char ch2, char ch3) { if (isOneOf(ch1, ch2, ch3)) { next(); return true; } - return false; - } - - /** - * Return a reader which is bracketed by the currect position - * and the next line terminator. - * - * @return a new reader - */ - protected UnicodeReader lineReader() { - int pos = position; - skipToEOLN(); - int endPos = position; - accept('\r'); - accept('\n'); - return lineReader(pos, endPos); - } - /** - * Return a reader which is bracketed by the {@code pos} - * and {@code endPos}. - * - * @param pos initial position - * @param endPos end position - * - * @return a new reader - */ - protected UnicodeReader lineReader(int pos, int endPos) { - return new UnicodeReader(log, buffer, pos, endPos); + return false; } /** * Skip over all occurrences of character. * * @param ch character to accept. - * - * @return number of characters skipped */ - protected int skip(char ch) { - int count = 0; + protected void skip(char ch) { while (accept(ch)) { - count++; + // next } - return count; - } - - /** - * Is ASCII white space character. - * - * @return true if is ASCII white space character - */ - protected boolean isWhitespace() { - return isOneOf(' ', '\t', '\f'); } /** @@ -523,26 +456,18 @@ protected void skipWhitespace() { } } - /** - * Is ASCII line terminator. - * - * @return true if is ASCII white space character - */ - protected boolean isEOLN() { - return isOneOf('\r', '\n'); - } - /** * Skip to end of line. */ protected void skipToEOLN() { while (isAvailable()) { - if (isEOLN()) { + if (isOneOf('\r', '\n')) { break; } next(); } + } /** @@ -640,25 +565,6 @@ public char[] getRawCharacters(int beginIndex, int endIndex) { return Arrays.copyOfRange(buffer, beginIndex, endIndex); } - /** - * Returns a string subset of the input buffer. - * The returned string begins at the {@code beginIndex} and - * extends to the character at index {@code endIndex - 1}. - * Thus the length of the substring is {@code endIndex-beginIndex}. - * This behavior is like - * {@code String.substring(beginIndex, endIndex)}. - * Unicode escape sequences are not translated. - * - * @param beginIndex the beginning index, inclusive. - * @param endIndex the ending index, exclusive. - * - * @throws ArrayIndexOutOfBoundsException if either offset is outside of the - * array bounds - */ - public String getRawString(int beginIndex, int endIndex) { - return new String(buffer, beginIndex, endIndex - beginIndex); - } - /** * This is a specialized version of UnicodeReader that keeps track of the * column position within a given character stream. Used for Javadoc @@ -666,6 +572,11 @@ public String getRawString(int beginIndex, int endIndex) { * to positions in the source file. */ static class PositionTrackingReader extends UnicodeReader { + /** + * Offset from the beginning of the original reader buffer. + */ + private final int offset; + /** * Current column in the comment. */ @@ -674,12 +585,13 @@ static class PositionTrackingReader extends UnicodeReader { /** * Constructor. * - * @param reader existing reader - * @param pos start of meaningful content in buffer. - * @param endPos end of meaningful content in buffer. + * @param sf Scan factory. + * @param array Array containing contents of source. + * @param offset Position offset in original source buffer. */ - protected PositionTrackingReader(UnicodeReader reader, int pos, int endPos) { - super(reader.log, reader.buffer, pos, endPos); + protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) { + super(sf, array, array.length); + this.offset = offset; this.column = 0; } @@ -711,6 +623,15 @@ protected char next() { protected int column() { return column; } + + /** + * Returns position relative to the original source buffer. + * + * @return + */ + protected int offsetPosition() { + return position() + offset; + } } } From cda5a5aa7bd19a6486f21971335d10c8a0288c1d Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Tue, 28 Mar 2023 16:46:26 -0300 Subject: [PATCH 2/7] Clean up JavadocTokenizer --- .../sun/tools/javac/parser/JavaTokenizer.java | 180 +++++++++--------- .../tools/javac/parser/JavadocTokenizer.java | 152 ++------------- .../com/sun/tools/javac/parser/Tokens.java | 6 +- .../sun/tools/javac/parser/UnicodeReader.java | 135 ++++++++++--- 4 files changed, 225 insertions(+), 248 deletions(-) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java index e2bbf7ec4e805..833d945642159 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java @@ -307,15 +307,6 @@ protected boolean acceptOneOfThenPut(char ch1, char ch2) { return false; } - /** - * Test if the current character is a line terminator. - * - * @return true if current character is a line terminator. - */ - private boolean isEOLN() { - return isOneOf('\n', '\r'); - } - /** * Skip and process a line terminator sequence. */ @@ -1094,7 +1085,7 @@ public Token readToken() { if (scannerDebug) { System.out.println("nextToken(" + pos + "," + endPos + ")=|" + - new String(getRawCharacters(pos, endPos)) + getRawString(pos, endPos) + "|"); } } @@ -1146,13 +1137,11 @@ protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) if (scannerDebug) { System.out.println("processComment(" + pos + "," + endPos + "," + style + ")=|" - + new String(getRawCharacters(pos, endPos)) + + getRawString(pos, endPos) + "|"); } - char[] buf = getRawCharacters(pos, endPos); - - return new BasicComment(style, fac, buf, pos); + return new BasicComment(style,this, pos, endPos); } /** @@ -1167,8 +1156,8 @@ protected Tokens.Comment processComment(int pos, int endPos, CommentStyle style) protected void processWhiteSpace(int pos, int endPos) { if (scannerDebug) { System.out.println("processWhitespace(" + pos - + "," + endPos + ")=|" + - new String(getRawCharacters(pos, endPos)) + + "," + endPos + ")=|" + + getRawString(pos, endPos) + "|"); } } @@ -1182,8 +1171,8 @@ protected void processWhiteSpace(int pos, int endPos) { protected void processLineTerminator(int pos, int endPos) { if (scannerDebug) { System.out.println("processTerminator(" + pos - + "," + endPos + ")=|" + - new String(getRawCharacters(pos, endPos)) + + "," + endPos + ")=|" + + getRawString(pos, endPos) + "|"); } } @@ -1206,9 +1195,6 @@ public Position.LineMap getLineMap() { protected static class BasicComment extends PositionTrackingReader implements Comment { /** * Style of comment - * LINE starting with // - * BLOCK starting with /* - * JAVADOC starting with /** */ CommentStyle cs; @@ -1225,13 +1211,13 @@ protected static class BasicComment extends PositionTrackingReader implements Co /** * Constructor. * - * @param cs comment style - * @param sf Scan factory. - * @param array Array containing contents of source. - * @param offset Position offset in original source buffer. + * @param cs comment style + * @param reader existing reader + * @param pos start of meaningful content in buffer. + * @param endPos end of meaningful content in buffer. */ - protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) { - super(sf, array, offset); + protected BasicComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) { + super(reader, pos, endPos); this.cs = cs; } @@ -1247,8 +1233,7 @@ public String getText() { /** * Return buffer position in original buffer mapped from buffer position in comment. * - * @param pos buffer position in comment. - * + * @param pos buffer position in comment. * @return buffer position in original buffer. */ public int getSourcePos(int pos) { @@ -1257,11 +1242,8 @@ public int getSourcePos(int pos) { /** * Return style of comment. - * LINE starting with // - * BLOCK starting with /* - * JAVADOC starting with /** * - * @return + * @return style of comment. */ public CommentStyle getStyle() { return cs; @@ -1273,76 +1255,104 @@ public CommentStyle getStyle() { * @return true if comment contains @deprecated. */ public boolean isDeprecated() { - if (!scanned && cs == CommentStyle.JAVADOC) { + if (!scanned) { scanDocComment(); } - return deprecatedFlag; } /** - * Scan JAVADOC comment for details. + * Detect the deprecated tag. + * + * @param line line reader + * + * @return true if deprecated tag is present. */ - protected void scanDocComment() { - try { - boolean deprecatedPrefix = false; - accept("/**"); + protected boolean hasDeprecated(UnicodeReader line) { + return line.accept("@deprecated") && + (line.isWhitespace() || line.is('*') || line.isEOLN()); + } - forEachLine: - while (isAvailable()) { - // Skip optional WhiteSpace at beginning of line - skipWhitespace(); + /** + * Remove closing star(s) slash from comment. + * + * @param line line reader + * + * @return new line reader if detected otherwise original line reader. + */ + UnicodeReader trimEndOfComment(UnicodeReader line) { + int pos = line.position(); - // Skip optional consecutive Stars - while (accept('*')) { - if (is('/')) { - return; - } + while (line.isAvailable()) { + if (line.is('*')) { + int endPos = line.position(); + line.skip('*'); + + if (line.is('/')) { + return line.lineReader(pos, endPos); } + } else { + line.next(); + } + } - // Skip optional WhiteSpace after Stars - skipWhitespace(); + line.reset(pos); - // At beginning of line in the JavaDoc sense. - deprecatedPrefix = deprecatedFlag || accept("@deprecated"); + return line; + } - if (deprecatedPrefix && isAvailable()) { - if (Character.isWhitespace(get())) { - deprecatedFlag = true; - } else if (accept('*')) { - if (is('/')) { - deprecatedFlag = true; - return; - } - } - } + /** + * Trim the first part of the JavaDoc comment. + * + * @param line line reader + * + * @return modified line reader + */ + UnicodeReader trimJavadocComment(UnicodeReader line) { + int pos = line.position(); + line.skipWhitespace(); - // Skip rest of line - while (isAvailable()) { - switch (get()) { - case '*': - next(); + if (line.skip('*') == 0) { + line.reset(pos); + } - if (is('/')) { - return; - } + return trimEndOfComment(line); + } - break; - case '\r': // (Spec 3.4) - case '\n': // (Spec 3.4) - accept('\r'); - accept('\n'); - continue forEachLine; + /** + * Put the line into the buffer. + * + * @param line line reader + */ + protected void putLine(UnicodeReader line) { + // ignore overridden in subclass + } - default: - next(); - break; - } - } // rest of line - } // forEachLine - return; - } finally { + /** + * Scan document comment for content. + */ + protected void scanDocComment() { + if (!scanned) { + deprecatedFlag = false; scanned = true; + + if (!accept("/**")) { + return; + } + + while (isAvailable()) { + UnicodeReader line = lineReader(); + line = trimJavadocComment(line); + + // If standalone @deprecated tag + int pos = line.position(); + if (hasDeprecated(line)) { + deprecatedFlag = true; + } + + line.reset(pos); + putLine(line); + } } } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java index 748f0d9875ec0..c8e00e1abc4d0 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java @@ -79,8 +79,7 @@ protected JavadocTokenizer(ScannerFactory fac, char[] array, int length) { @Override protected Comment processComment(int pos, int endPos, CommentStyle style) { - char[] buf = getRawCharacters(pos, endPos); - return new JavadocComment(style, fac, buf, pos); + return new JavadocComment(style, this, pos, endPos); } /** @@ -88,13 +87,6 @@ protected Comment processComment(int pos, int endPos, CommentStyle style) { * of a Javadoc comment. */ protected static class JavadocComment extends BasicComment { - /** - * Pattern used to detect a well formed @deprecated tag in a Javadoc - * comment. - */ - private static final Pattern DEPRECATED_PATTERN = - Pattern.compile("(?sm).*^\\s*@deprecated( |$).*"); - /** * The relevant portion of the comment that is of interest to Javadoc. * Produced by invoking scanDocComment. @@ -112,45 +104,35 @@ protected static class JavadocComment extends BasicComment { */ OffsetMap offsetMap = new OffsetMap(); - JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) { - super( cs, sf, array, offset); + JavadocComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) { + super(cs, reader, pos, endPos); this.sb = new StringBuilder(); } /** - * Add a character to the extraction buffer. + * Add current character or code point from line to the extraction buffer. * - * @param ch character to add. + * @param line line reader */ - protected void put(char ch) { - offsetMap.add(sb.length(), offsetPosition()); - sb.append(ch); - } + protected void putLine(UnicodeReader line) { + while (line.isAvailable()) { + offsetMap.add(sb.length(), line.position()); - /** - * Add a code point to the extraction buffer. - * - * @param codePoint code point to add. - */ - protected void putCodePoint(int codePoint) { - offsetMap.add(sb.length(), offsetPosition()); - sb.appendCodePoint(codePoint); - } + if (line.isSurrogate()) { + sb.appendCodePoint(line.getCodepoint()); + } else { + sb.append(line.get()); + } - /** - * Add current character or code point to the extraction buffer. - */ - protected void put() { - if (isSurrogate()) { - putCodePoint(getCodepoint()); - } else { - put(get()); + line.next(); } + offsetMap.add(sb.length(), line.position()); + sb.append('\n'); } @Override public String getText() { - if (!scanned && cs == CommentStyle.JAVADOC) { + if (!scanned) { scanDocComment(); } return docComment; @@ -171,104 +153,10 @@ public int getSourcePos(int pos) { @Override protected void scanDocComment() { - try { - boolean firstLine = true; - - // Skip over /* - accept("/*"); - - // Consume any number of stars - skip('*'); - - // Is the comment in the form /**/, /***/, /****/, etc. ? - if (is('/')) { - docComment = ""; - return; - } - - // Skip line terminator on the first line of the comment. - if (isOneOf('\n', '\r')) { - accept('\r'); - accept('\n'); - firstLine = false; - } - - outerLoop: - // The outerLoop processes the doc comment, looping once - // for each line. For each line, it first strips off - // whitespace, then it consumes any stars, then it - // puts the rest of the line into the extraction buffer. - while (isAvailable()) { - int begin_pos = position(); - // Consume whitespace from the beginning of each line. - skipWhitespace(); - // Are there stars here? If so, consume them all - // and check for the end of comment. - if (is('*')) { - // skip all of the stars - skip('*'); - - // check for the closing slash. - if (accept('/')) { - // We're done with the Javadoc comment - break outerLoop; - } - } else if (!firstLine) { - // The current line does not begin with a '*' so we will - // treat it as comment - reset(begin_pos); - } - - textLoop: - // The textLoop processes the rest of the characters - // on the line, adding them to the extraction buffer. - while (isAvailable()) { - if (accept("*/")) { - // This is the end of the comment, return - // the contents of the extraction buffer. - break outerLoop; - } else if (isOneOf('\n', '\r')) { - // We've seen a newline. Add it to our - // buffer and break out of this loop, - // starting fresh on a new line. - put('\n'); - accept('\r'); - accept('\n'); - break textLoop; - } else if (is('\f')){ - next(); - break textLoop; // treat as end of line - - } else { - // Add the character to our buffer. - put(); - next(); - } - } // end textLoop - firstLine = false; - } // end outerLoop - - // If extraction buffer is not empty. - if (sb.length() > 0) { - // Remove trailing asterisks. - int i = sb.length() - 1; - while (i > -1 && sb.charAt(i) == '*') { - i--; - } - sb.setLength(i + 1) ; - - // Store the text of the doc comment - docComment = sb.toString(); - } else { - docComment = ""; - } + try { + super.scanDocComment(); } finally { - scanned = true; - - // Check if comment contains @deprecated comment. - if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) { - deprecatedFlag = true; - } + docComment = sb.toString(); } } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java index c5924c44aa399..e513cab734c69 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java @@ -273,9 +273,9 @@ public boolean test(TokenKind that) { public interface Comment { enum CommentStyle { - LINE, - BLOCK, - JAVADOC, + LINE, // Starting with // + BLOCK, // starting with /* + JAVADOC, // starting with /** } String getText(); diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java index c21d08e9d86a1..20dda34d1f8d8 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java @@ -104,15 +104,39 @@ public class UnicodeReader { */ @SuppressWarnings("this-escape") protected UnicodeReader(ScannerFactory sf, char[] array, int length) { + this(sf.log, array, length); + } + + /** + * Constructor. + * + * @param log Log for error reporting. + * @param array array containing contents of source. + * @param length length of meaningful content in buffer. + */ + protected UnicodeReader(Log log, char[] array, int length) { + this(log, array, 0, length); + } + + /** + * Constructor. + * + * @param log Log for error reporting. + * @param array array containing contents of source. + * @param pos start of meaningful content in buffer. + * @param endPos end of meaningful content in buffer. + */ + @SuppressWarnings("this-escape") + protected UnicodeReader(Log log, char[] array, int pos, int endPos) { this.buffer = array; - this.length = length; - this.position = 0; + this.length = endPos; + this.position = pos; this.width = 0; this.character = '\0'; this.codepoint = 0; this.wasBackslash = false; this.wasUnicodeEscape = false; - this.log = sf.log; + this.log = log; nextCodePoint(); } @@ -426,25 +450,68 @@ protected boolean acceptOneOf(char ch1, char ch2) { return false; } + /** + * Match one of the arguments and advance if a match. Returns true if a match. + */ protected boolean acceptOneOf(char ch1, char ch2, char ch3) { if (isOneOf(ch1, ch2, ch3)) { next(); return true; } - return false; } + /** + * Return a reader which is bracketed by the currect position + * and the next line terminator. + * + * @return a new reader + */ + protected UnicodeReader lineReader() { + int pos = position; + skipToEOLN(); + int endPos = position; + accept('\r'); + accept('\n'); + return lineReader(pos, endPos); + } + + /** + * Return a reader which is bracketed by the {@code pos} + * and {@code endPos}. + * + * @param pos initial position + * @param endPos end position + * + * @return a new reader + */ + protected UnicodeReader lineReader(int pos, int endPos) { + return new UnicodeReader(log, buffer, pos, endPos); + } + /** * Skip over all occurrences of character. * * @param ch character to accept. + * + * @return number of characters skipped */ - protected void skip(char ch) { + protected int skip(char ch) { + int count = 0; while (accept(ch)) { - // next + count++; } + return count; + } + + /** + * Is ASCII white space character. + * + * @return true if is ASCII white space character + */ + protected boolean isWhitespace() { + return isOneOf(' ', '\t', '\f'); } /** @@ -456,18 +523,26 @@ protected void skipWhitespace() { } } + /** + * Is ASCII line terminator. + * + * @return true if is ASCII white space character + */ + protected boolean isEOLN() { + return isOneOf('\r', '\n'); + } + /** * Skip to end of line. */ protected void skipToEOLN() { while (isAvailable()) { - if (isOneOf('\r', '\n')) { + if (isEOLN()) { break; } next(); } - } /** @@ -565,6 +640,25 @@ public char[] getRawCharacters(int beginIndex, int endIndex) { return Arrays.copyOfRange(buffer, beginIndex, endIndex); } + /** + * Returns a string subset of the input buffer. + * The returned string begins at the {@code beginIndex} and + * extends to the character at index {@code endIndex - 1}. + * Thus the length of the substring is {@code endIndex-beginIndex}. + * This behavior is like + * {@code String.substring(beginIndex, endIndex)}. + * Unicode escape sequences are not translated. + * + * @param beginIndex the beginning index, inclusive. + * @param endIndex the ending index, exclusive. + * + * @throws ArrayIndexOutOfBoundsException if either offset is outside of the + * array bounds + */ + public String getRawString(int beginIndex, int endIndex) { + return new String(buffer, beginIndex, endIndex - beginIndex); + } + /** * This is a specialized version of UnicodeReader that keeps track of the * column position within a given character stream. Used for Javadoc @@ -572,11 +666,6 @@ public char[] getRawCharacters(int beginIndex, int endIndex) { * to positions in the source file. */ static class PositionTrackingReader extends UnicodeReader { - /** - * Offset from the beginning of the original reader buffer. - */ - private final int offset; - /** * Current column in the comment. */ @@ -585,13 +674,12 @@ static class PositionTrackingReader extends UnicodeReader { /** * Constructor. * - * @param sf Scan factory. - * @param array Array containing contents of source. - * @param offset Position offset in original source buffer. + * @param reader existing reader + * @param pos start of meaningful content in buffer. + * @param endPos end of meaningful content in buffer. */ - protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) { - super(sf, array, array.length); - this.offset = offset; + protected PositionTrackingReader(UnicodeReader reader, int pos, int endPos) { + super(reader.log, reader.buffer, pos, endPos); this.column = 0; } @@ -623,15 +711,6 @@ protected char next() { protected int column() { return column; } - - /** - * Returns position relative to the original source buffer. - * - * @return - */ - protected int offsetPosition() { - return position() + offset; - } } } From c3e9b00413379489f6aa6af0d640e63e1aa28007 Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Wed, 29 Mar 2023 09:01:13 -0300 Subject: [PATCH 3/7] Skip whitespace before @deprecated --- .../sun/tools/javac/parser/JavaTokenizer.java | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java index 833d945642159..b285120e2941a 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java @@ -1261,18 +1261,6 @@ public boolean isDeprecated() { return deprecatedFlag; } - /** - * Detect the deprecated tag. - * - * @param line line reader - * - * @return true if deprecated tag is present. - */ - protected boolean hasDeprecated(UnicodeReader line) { - return line.accept("@deprecated") && - (line.isWhitespace() || line.is('*') || line.isEOLN()); - } - /** * Remove closing star(s) slash from comment. * @@ -1346,7 +1334,10 @@ protected void scanDocComment() { // If standalone @deprecated tag int pos = line.position(); - if (hasDeprecated(line)) { + line.skipWhitespace(); + + if (line.accept("@deprecated") && + (line.isWhitespace() || line.is('*') || line.isEOLN())) { deprecatedFlag = true; } From 83e68b383b0788620093126ba370945d5b35868d Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Wed, 29 Mar 2023 11:47:31 -0300 Subject: [PATCH 4/7] Clean up end of comment detection --- .../sun/tools/javac/parser/JavaTokenizer.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java index b285120e2941a..4baad4622035a 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java @@ -1272,13 +1272,10 @@ UnicodeReader trimEndOfComment(UnicodeReader line) { int pos = line.position(); while (line.isAvailable()) { - if (line.is('*')) { - int endPos = line.position(); - line.skip('*'); + int endPos = line.position(); - if (line.is('/')) { - return line.lineReader(pos, endPos); - } + if (line.skip('*') != 0 && line.is('/')) { + return line.lineReader(pos, endPos); } else { line.next(); } @@ -1302,9 +1299,13 @@ UnicodeReader trimJavadocComment(UnicodeReader line) { if (line.skip('*') == 0) { line.reset(pos); - } - return trimEndOfComment(line); + return line; + } else if (line.is('/')) { + return line.lineReader(pos, pos); + } else { + return trimEndOfComment(line); + } } /** From c0860de93ea7c847ba5b46275ad6305af32426a9 Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Wed, 29 Mar 2023 16:04:03 -0300 Subject: [PATCH 5/7] Handle line terminators. --- .../share/classes/com/sun/tools/javac/parser/JavaTokenizer.java | 2 +- .../share/classes/com/sun/tools/javac/parser/UnicodeReader.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java index 4baad4622035a..102a91353259c 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java @@ -1338,7 +1338,7 @@ protected void scanDocComment() { line.skipWhitespace(); if (line.accept("@deprecated") && - (line.isWhitespace() || line.is('*') || line.isEOLN())) { + (line.isWhitespace() || line.isEOLN() || line.get() == 0x1A)) { deprecatedFlag = true; } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java index 20dda34d1f8d8..e5aee6cee7ccf 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java @@ -526,7 +526,7 @@ protected void skipWhitespace() { /** * Is ASCII line terminator. * - * @return true if is ASCII white space character + * @return true if is ASCII line terminator. */ protected boolean isEOLN() { return isOneOf('\r', '\n'); From 0fd05c940e4a8f0b7b111affdb25af9874712ab8 Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Thu, 30 Mar 2023 10:15:19 -0300 Subject: [PATCH 6/7] Tweaks to get tests running --- .../sun/tools/javac/parser/JavaTokenizer.java | 32 +++++++++++++------ .../tools/javac/parser/JavadocTokenizer.java | 12 +++++-- .../sun/tools/javac/parser/UnicodeReader.java | 16 ++++++++++ 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java index 102a91353259c..23fc4076caf49 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java @@ -1270,13 +1270,15 @@ public boolean isDeprecated() { */ UnicodeReader trimEndOfComment(UnicodeReader line) { int pos = line.position(); + boolean allWhitespace = true; while (line.isAvailable()) { int endPos = line.position(); if (line.skip('*') != 0 && line.is('/')) { - return line.lineReader(pos, endPos); + return line.lineReader(allWhitespace ? endPos : pos, endPos); } else { + allWhitespace = allWhitespace && line.isWhitespace(); line.next(); } } @@ -1294,18 +1296,19 @@ UnicodeReader trimEndOfComment(UnicodeReader line) { * @return modified line reader */ UnicodeReader trimJavadocComment(UnicodeReader line) { + line = trimEndOfComment(line); int pos = line.position(); line.skipWhitespace(); + if (!line.isAvailable()) { + return line; + } + if (line.skip('*') == 0) { line.reset(pos); - - return line; - } else if (line.is('/')) { - return line.lineReader(pos, pos); - } else { - return trimEndOfComment(line); } + + return line; } /** @@ -1314,7 +1317,7 @@ UnicodeReader trimJavadocComment(UnicodeReader line) { * @param line line reader */ protected void putLine(UnicodeReader line) { - // ignore overridden in subclass + // ignore, overridden in subclass } /** @@ -1329,6 +1332,14 @@ protected void scanDocComment() { return; } + skip('*'); + skipWhitespace(); + + if (isEOLN()) { + accept('\r'); + accept('\n'); + } + while (isAvailable()) { UnicodeReader line = lineReader(); line = trimJavadocComment(line); @@ -1338,7 +1349,10 @@ protected void scanDocComment() { line.skipWhitespace(); if (line.accept("@deprecated") && - (line.isWhitespace() || line.isEOLN() || line.get() == 0x1A)) { + (!line.isAvailable() || + line.isWhitespace() || + line.isEOLN() || + line.get() == EOI)) { deprecatedFlag = true; } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java index c8e00e1abc4d0..f6e4cfdcfb98f 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java @@ -98,6 +98,11 @@ protected static class JavadocComment extends BasicComment { */ private final StringBuilder sb; + /** + * Indicates if newline is required. + */ + private boolean firstLine = true; + /** * Map used to map the extracted Javadoc comment's character positions back to * the original source. @@ -115,6 +120,11 @@ protected static class JavadocComment extends BasicComment { * @param line line reader */ protected void putLine(UnicodeReader line) { + if (firstLine) { + firstLine = false; + } else { + sb.append('\n'); + } while (line.isAvailable()) { offsetMap.add(sb.length(), line.position()); @@ -126,8 +136,6 @@ protected void putLine(UnicodeReader line) { line.next(); } - offsetMap.add(sb.length(), line.position()); - sb.append('\n'); } @Override diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java index e5aee6cee7ccf..62a0b22492799 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java @@ -659,6 +659,22 @@ public String getRawString(int beginIndex, int endIndex) { return new String(buffer, beginIndex, endIndex - beginIndex); } + /** + * Returns a string subset of the input buffer. + * The returned string begins at the {@code position} and + * extends to the character at index {@code length - 1}. + * Thus the length of the substring is {@code length-position}. + * This behavior is like + * {@code String.substring(position, length)}. + * Unicode escape sequences are not translated. + * + * @throws ArrayIndexOutOfBoundsException if either offset is outside of the + * array bounds + */ + public String getRawString() { + return getRawString(position, length); + } + /** * This is a specialized version of UnicodeReader that keeps track of the * column position within a given character stream. Used for Javadoc From a9d5ba06a95551502fae105f9002199a76d9160d Mon Sep 17 00:00:00 2001 From: JimLaskey Date: Fri, 31 Mar 2023 07:22:02 -0300 Subject: [PATCH 7/7] Correct position for end of line --- .../classes/com/sun/tools/javac/parser/JavadocTokenizer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java index f6e4cfdcfb98f..4cd0e1c2fcdcb 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java @@ -124,6 +124,7 @@ protected void putLine(UnicodeReader line) { firstLine = false; } else { sb.append('\n'); + offsetMap.add(sb.length(), line.position()); } while (line.isAvailable()) { offsetMap.add(sb.length(), line.position());