diff --git a/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java b/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java index b6537659..8f79e430 100644 --- a/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java +++ b/org.eclipse.tm4e.core.tests/src/main/java/org/eclipse/tm4e/core/grammar/GrammarSuiteTest.java @@ -36,7 +36,6 @@ * * @see * github.com/Microsoft/vscode-textmate/blob/master/src/tests/tests.ts - * */ public class GrammarSuiteTest { @@ -45,7 +44,7 @@ public class GrammarSuiteTest { // TODO: fix thoses tests: // It seems that problem comes from with encoding. OnigString should support UTF-16 like // https://github.com/atom/node-oniguruma/blob/master/src/onig-string.cc - private static final List IGNORE_TESTS = List.of("TEST #24", "TEST #66", "Issue #82", "Issue #119"); + private static final List IGNORE_TESTS = List.of("TEST #24", "TEST #66"); @TestFactory @DisplayName("Tokenization /first-mate/") diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java index d92c4dda..af526844 100644 --- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java +++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/grammar/StackElement.java @@ -1,5 +1,5 @@ /** - * Copyright (c) 2015-2017 Angelo ZERR. + * Copyright (c) 2015-2017 Angelo ZERR. * This program and the accompanying materials are made * available under the terms of the Eclipse Public License 2.0 * which is available at https://www.eclipse.org/legal/epl-2.0/ @@ -11,8 +11,8 @@ * Initial license: MIT * * Contributors: - * - Microsoft Corporation: Initial code, written in TypeScript, licensed under MIT license - * - Angelo Zerr - translation and adaptation to Java + * - Microsoft Corporation: Initial code, written in TypeScript, licensed under MIT license + * - Angelo Zerr - translation and adaptation to Java */ package org.eclipse.tm4e.core.grammar; @@ -20,6 +20,7 @@ import java.util.List; import java.util.Objects; +import org.eclipse.jdt.annotation.Nullable; import org.eclipse.tm4e.core.internal.grammar.ScopeListElement; import org.eclipse.tm4e.core.internal.rule.IRuleRegistry; import org.eclipse.tm4e.core.internal.rule.Rule; @@ -33,7 +34,7 @@ */ public class StackElement { - public static final StackElement NULL = new StackElement(null, 0, 0, null, null, null); + public static final StackElement NULL = new StackElement(null, 0, 0, 0, false, null, null, null); /** * The position on the current line where this state was pushed. @@ -42,10 +43,19 @@ public class StackElement { */ private int enterPosition; + /** + * The captured anchor position when this stack element was pushed. + * This is relevant only while tokenizing a line, to restore the anchor position when popping. + * Its value is meaningless across lines. + */ + private int anchorPos; + /** * The previous state on the stack (or null for the root state). */ + @Nullable public final StackElement parent; + /** * The depth of the stack. */ @@ -55,13 +65,21 @@ public class StackElement { * The state (rule) that this element represents. */ public final int ruleId; + + /** + * The state has entered and captured \n. This means that the next line should have an anchorPosition of 0. + */ + public final boolean beginRuleCapturedEOL; + /** * The "pop" (end) condition for this state in case that it was dynamically generated through captured text. */ public final String endRule; + /** * The list of scopes containing the "name" for this state. */ + public final ScopeListElement nameScopesList; /** * The list of scopes containing the "contentName" (besides "name") for this state. @@ -69,11 +87,21 @@ public class StackElement { */ public final ScopeListElement contentNameScopesList; - public StackElement(StackElement parent, int ruleId, int enterPos, String endRule, ScopeListElement nameScopesList, ScopeListElement contentNameScopesList) { + public StackElement( + @Nullable StackElement parent, + int ruleId, + int enterPos, + int anchorPos, + boolean beginRuleCapturedEOL, + @Nullable String endRule, + ScopeListElement nameScopesList, + ScopeListElement contentNameScopesList) { this.parent = parent; this.depth = (this.parent != null ? this.parent.depth + 1 : 1); this.ruleId = ruleId; this.enterPosition = enterPos; + this.anchorPos = anchorPos; + this.beginRuleCapturedEOL = beginRuleCapturedEOL; this.endRule = endRule; this.nameScopesList = nameScopesList; this.contentNameScopesList = contentNameScopesList; @@ -82,44 +110,71 @@ public StackElement(StackElement parent, int ruleId, int enterPos, String endRul /** * A structural equals check. Does not take into account `scopes`. */ - private static boolean structuralEquals(StackElement a, StackElement b) { + private static boolean structuralEquals(@Nullable StackElement a, @Nullable StackElement b) { + do { + if (a == b) { + return true; + } + + if (a == null && b == null) { + // End of list reached for both + return true; + } + + if (a == null || b == null) { + // End of list reached only for one + return false; + } + + if (a.depth != b.depth || a.ruleId != b.ruleId || !Objects.equals(a.endRule, b.endRule)) { + return false; + } + + // Go to previous pair + a = a.parent; + b = b.parent; + } while (true); + } + + @SuppressWarnings("null") + private static boolean equals(@Nullable StackElement a, @Nullable StackElement b) { if (a == b) { return true; } - if (a == null || b == null) { + if (!structuralEquals(a, b)) { return false; } - return a.depth == b.depth && a.ruleId == b.ruleId && Objects.equals(a.endRule, b.endRule) && structuralEquals(a.parent, b.parent); + return a.contentNameScopesList.equals(b.contentNameScopesList); } @Override public boolean equals(Object other) { - if (other == this) { - return true; - } - if (other == null) { + if (other == null || other.getClass() != StackElement.class) { return false; } - if (!(other instanceof StackElement)) { - return false; - } - StackElement stackElement = (StackElement)other; - return structuralEquals(this, stackElement) && this.contentNameScopesList.equals(stackElement.contentNameScopesList); + return equals(this, (StackElement) other); } @Override public int hashCode() { - return Objects.hash(depth, ruleId, endRule, parent, contentNameScopesList); + final int prime = 31; + int result = 1; + result = prime * result + Objects.hash(endRule, parent, contentNameScopesList); + result = prime * result + depth; + result = prime * result + ruleId; + return result; } public void reset() { StackElement el = this; while (el != null) { el.enterPosition = -1; + el.anchorPos = -1; el = el.parent; } } + @Nullable public StackElement pop() { return this.parent; } @@ -131,8 +186,19 @@ public StackElement safePop() { return this; } - public StackElement push(int ruleId, int enterPos, String endRule, ScopeListElement nameScopesList, ScopeListElement contentNameScopesList) { - return new StackElement(this, ruleId, enterPos, endRule, nameScopesList, contentNameScopesList); + public StackElement push(int ruleId, + int enterPos, + int anchorPos, + boolean beginRuleCapturedEOL, + @Nullable String endRule, + ScopeListElement nameScopesList, + ScopeListElement contentNameScopesList) { + return new StackElement(this, ruleId, enterPos, anchorPos, beginRuleCapturedEOL, endRule, nameScopesList, + contentNameScopesList); + } + + public int getAnchorPos() { + return anchorPos; } public int getEnterPos() { @@ -147,7 +213,8 @@ private void appendString(List res) { if (this.parent != null) { this.parent.appendString(res); } - res.add('(' + Integer.toString(this.ruleId) + ')'); //, TODO-${this.nameScopesList}, TODO-${this.contentNameScopesList})`; + res.add('(' + Integer.toString(this.ruleId) + ')'); // , TODO-${this.nameScopesList}, + // TODO-${this.contentNameScopesList})`; } @Override @@ -161,14 +228,18 @@ public StackElement setContentNameScopesList(ScopeListElement contentNameScopesL if (this.contentNameScopesList.equals(contentNameScopesList)) { return this; } - return this.parent.push(this.ruleId, this.enterPosition, this.endRule, this.nameScopesList, contentNameScopesList); + final var parent = this.parent; + assert parent != null; + return parent.push(this.ruleId, this.enterPosition, this.anchorPos, this.beginRuleCapturedEOL, this.endRule, + this.nameScopesList, contentNameScopesList); } public StackElement setEndRule(String endRule) { if (this.endRule != null && this.endRule.equals(endRule)) { return this; } - return new StackElement(this.parent, this.ruleId, this.enterPosition, endRule, this.nameScopesList, this.contentNameScopesList); + return new StackElement(this.parent, this.ruleId, this.enterPosition, this.anchorPos, this.beginRuleCapturedEOL, + this.endRule, this.nameScopesList, this.contentNameScopesList); } public boolean hasSameRuleAs(final StackElement other) { diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java index e67221fd..692d26c0 100644 --- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java +++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/Grammar.java @@ -50,9 +50,9 @@ /** * TextMate grammar implementation. * - * @see + * @see * github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts - * */ public final class Grammar implements IGrammar, IRuleFactoryHelper { @@ -198,7 +198,7 @@ public ITokenizeLineResult2 tokenizeLine2(String lineText, StackElement prevStat } @SuppressWarnings("unchecked") - private T tokenize(String lineText, StackElement prevState, boolean emitBinaryTokens) { + private T tokenize(String lineText, @Nullable StackElement prevState, boolean emitBinaryTokens) { if (this.rootId == -1) { this.rootId = RuleFactory.getCompiledRuleId(this.rawGrammar.getRepository().getSelf(), this, this.rawGrammar.getRepository()); @@ -219,7 +219,7 @@ private T tokenize(String lineText, StackElement prevState, boolean emitBina ScopeListElement scopeList = new ScopeListElement(null, rootScopeName, rootMetadata); - prevState = new StackElement(null, this.rootId, -1, null, scopeList, scopeList); + prevState = new StackElement(null, this.rootId, -1, -1, false, null, scopeList, scopeList); } else { isFirstLine = false; prevState.reset(); diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java index 9e2800f5..26ea4a5b 100644 --- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java +++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/grammar/LineTokenizer.java @@ -35,6 +35,11 @@ import org.eclipse.tm4e.core.internal.rule.MatchRule; import org.eclipse.tm4e.core.internal.rule.Rule; +/** + * @see + * github.com/Microsoft/vscode-textmate/blob/master/src/grammar.ts + */ final class LineTokenizer { private static final Logger LOGGER = System.getLogger(LineTokenizer.class.getName()); @@ -145,8 +150,9 @@ private void scanNext() { lineTokens.produce(stack, captureIndices[0].getEnd()); // pop - StackElement popped = stack; + final var popped = stack; stack = stack.pop(); + anchorPosition = popped.getAnchorPos(); if (!hasAdvanced && popped.getEnterPos() == linePos) { // Grammar pushed & popped a rule without advancing @@ -171,7 +177,8 @@ private void scanNext() { // push it on the stack rule String scopeName = rule.getName(lineText.string, captureIndices); ScopeListElement nameScopesList = stack.contentNameScopesList.push(grammar, scopeName); - stack = stack.push(matchedRuleId, linePos, null, nameScopesList, nameScopesList); + stack = stack.push(matchedRuleId, linePos, anchorPosition, + captureIndices[0].getEnd() == lineText.bytesCount, null, nameScopesList, nameScopesList); if (rule instanceof BeginEndRule) { BeginEndRule pushedRule = (BeginEndRule) rule; @@ -450,11 +457,11 @@ private void handleCaptures(Grammar grammar, OnigString lineText, boolean isFirs ScopeListElement contentNameScopesList = nameScopesList.push(grammar, contentName); // the capture requires additional matching - StackElement stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.getStart(), null, - nameScopesList, contentNameScopesList); - tokenizeString(grammar, - OnigString.of(lineText.string.substring(0, captureIndex.getEnd())), - (isFirstLine && captureIndex.getStart() == 0), captureIndex.getStart(), stackClone, lineTokens); + StackElement stackClone = stack.push(retokenizeCapturedWithRuleId, captureIndex.getStart(), -1, false, + null, nameScopesList, contentNameScopesList); + final var onigSubStr = OnigString.of(lineText.string.substring(0, captureIndex.getEnd())); + tokenizeString(grammar, onigSubStr, (isFirstLine && captureIndex.getStart() == 0), + captureIndex.getStart(), stackClone, lineTokens); continue; } @@ -485,7 +492,7 @@ private void handleCaptures(Grammar grammar, OnigString lineText, boolean isFirs */ private WhileCheckResult checkWhileConditions(Grammar grammar, OnigString lineText, boolean isFirstLine, int linePos, StackElement stack, LineTokens lineTokens) { - int currentanchorPosition = -1; + int currentanchorPosition = stack.beginRuleCapturedEOL ? 0 : -1; List whileRules = new ArrayList<>(); for (StackElement node = stack; node != null; node = node.pop()) { Rule nodeRule = node.getRule(grammar);