Skip to content

Commit

Permalink
YAML: Support other cases of lists of lists (#4187)
Browse files Browse the repository at this point in the history
The commit 264e8e8 introduced support for list of lists of scalars.
But this didn't fix lists of lists of other blocks (like mappings).
This commit will also fix these cases.

The underlying problem was that some tokens were interpreted twice.
This was caused by YamlParser.parseFromInput. This method calculates the
prefix of a token by using the substring yamlSource[lastEnd:tokenIdx].
Normally, the characters that were newly interpreted should be marked as
used by incrementing the index `lastEnd`. But for sequences this index
wasn't updated in every case. It was only updated if there was an anchor
or an openingBracketIndex. Because of this, the prefix was used for
multiple consecutive sequences in other cases.
To fix this, update `lastEnd` in every case for sequences.

The yaml-parser has inconsistent behaviour when parsing sequences with
dashes. If a dash-sequence has indentation the start- and the event-mark
point to the same character: the dash. If a dash-sequence has no
indentation the event-mark points to the character AFTER the dash.
Because of this, the `lastEnd` would get an offset and one which means
the YamlParser would skip one character.
Introduce a workaround to correct the `lastEnd` if this faulty case.

With these changes, every dash is only interpreted once. That means
that in the SequenceBuilder.push method the rawPrefix will contain at
most one dash. Because of this, the old fix isn't needed anymore.
Remove it.

Fixes #4176
  • Loading branch information
pdulich committed May 11, 2024
1 parent 93076a3 commit 2122964
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 29 deletions.
39 changes: 18 additions & 21 deletions rewrite-yaml/src/main/java/org/openrewrite/yaml/YamlParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,10 @@ private Yaml.Documents parseFromInput(Path sourceFile, EncodingDetectingInputStr
if (openingBracketIndex != -1) {
int startIndex = commentAwareIndexOf(':', fullPrefix) + 1;
startBracketPrefix = fullPrefix.substring(startIndex, openingBracketIndex);
lastEnd = event.getEndMark().getIndex();
}
lastEnd = event.getEndMark().getIndex();
if (shouldUseYamlParserBugWorkaround(sse)) {
lastEnd--;
}
blockStack.push(new SequenceBuilder(fmt, startBracketPrefix, anchor));
break;
Expand Down Expand Up @@ -375,6 +378,17 @@ private Yaml.Documents parseFromInput(Path sourceFile, EncodingDetectingInputStr
}
}

/*
The yaml-parser library unfortunately returns inconsistent marks.
If the dashes of the sequence have an indentation, the end mark and the start mark point to the dash.
If the dashes of the sequence do not have an indentation, the end mark will point to the character AFTER the dash.
*/
private boolean shouldUseYamlParserBugWorkaround(SequenceStartEvent event) {
int startChar = event.getStartMark().getBuffer()[event.getStartMark().getIndex()];
int endChar = event.getEndMark().getBuffer()[event.getEndMark().getIndex()];
return startChar == '-' && endChar != '-';
}

private Yaml.Anchor buildYamlAnchor(FormatPreservingReader reader, int lastEnd, String eventPrefix, String anchorKey, int eventEndIndex, boolean isForScalar) {
int anchorLength = isForScalar ? anchorKey.length() + 1 : anchorKey.length();
String whitespaceAndScalar = reader.prefix(
Expand All @@ -398,16 +412,7 @@ private Yaml.Anchor buildYamlAnchor(FormatPreservingReader reader, int lastEnd,
}

private static int commentAwareIndexOf(char target, String s) {
return commentAwareIndexOf(target, s, FindIndexStrategy.FIRST);
}

/**
* Return the first or last index of the target character that appears in a non-comment portion of the String,
* or -1 if it does not appear.
*/
private static int commentAwareIndexOf(char target, String s, FindIndexStrategy strategy) {
boolean inComment = false;
int lastFoundIndex = -1;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (inComment) {
Expand All @@ -416,21 +421,13 @@ private static int commentAwareIndexOf(char target, String s, FindIndexStrategy
}
} else {
if (c == target) {
if (strategy == FindIndexStrategy.FIRST) {
return i;
}
lastFoundIndex = i;
return i;
} else if (c == '#') {
inComment = true;
}
}
}
return lastFoundIndex;
}

private enum FindIndexStrategy {
FIRST,
LAST
return -1;
}

@Override
Expand Down Expand Up @@ -527,7 +524,7 @@ public void push(Yaml.Block block) {

public void push(Yaml.Block block, @Nullable String commaPrefix) {
String rawPrefix = block.getPrefix();
int dashIndex = commentAwareIndexOf('-', rawPrefix, FindIndexStrategy.LAST);
int dashIndex = commentAwareIndexOf('-', rawPrefix);
String entryPrefix;
String blockPrefix;
boolean hasDash = dashIndex != -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,38 @@ void fourBytesUnicode() {

@Test
@Issue("https://github.com/openrewrite/rewrite/issues/4176")
void listOfLists() {
void listsAndListsOfLists() {
rewriteRun(
yaml(
"""
root:
listOfLists:
- - a
- b
- - c
- d
- - e
- f
normalListOfScalars:
- a
- b
normalListOfScalarsWithIndentation:
- a
- b
normalListOfMappings:
- a: b
c: d
- e: f
normalListOfSquareBracketLists:
- [ mno, pqr]
- [stu , vwx]
squareList: [x, y, z]
listOfListsOfScalars:
- - a
- b
listOfListsOfScalarsWithIndentation:
- - a
- b
listOfListsOfMappings:
- - a: b
c: d
- e: f
listOfListsOfSquareBracketLists:
- - [mno, pqr ]
- [stu , vwx]
"""
)
);
Expand Down

0 comments on commit 2122964

Please sign in to comment.