diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b11c08..15f2c95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ By https://github.com/polvalente -- Fix 'ignoreInDelimiters' nesting +- Fix 'ignoreInDelimiters' nesting (#20) +- Refactor code to include new parsing engine (#21) ## 0.7.3 diff --git a/package.json b/package.json index 4999cad..4422bb3 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "license": "Apache-2.0", "displayName": "Rainbow End", "description": "This extension allows to identify keyword / end with colours.", - "version": "0.7.2", + "version": "0.8.0", "icon": "images/logo.png", "engines": { "vscode": "^1.29.0" @@ -71,4 +71,4 @@ "typescript": "^2.6.1", "vscode": "^1.1.26" } -} \ No newline at end of file +} diff --git a/src/extension.ts b/src/extension.ts index 2ed8d82..2e69cf1 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -2,37 +2,30 @@ import * as vscode from "vscode"; import { languages } from "./languages"; +import { tokenize, loadRegexes, Token, TokenizeParams } from "./tokenizer"; +import { parse, deepDecorations } from "./parser"; -const deepDecorations = [ - vscode.window.createTextEditorDecorationType({ - color: { id: "rainbowend.deep1" } - }), - vscode.window.createTextEditorDecorationType({ - color: { id: "rainbowend.deep2" } - }), - vscode.window.createTextEditorDecorationType({ - color: { id: "rainbowend.deep3" } - }) -]; +export function activate(context: vscode.ExtensionContext) { + let regExps: { + [index: string]: TokenizeParams; + } = {}; -let timeout: NodeJS.Timer | null = null; -let regExs: { [index: string]: RegExp } = {}; + let timeout: NodeJS.Timer | null = null; -export function activate(context: vscode.ExtensionContext) { Object.keys(languages).forEach(language => { - regExs[language] = buildRegex(language); + regExps[language] = loadRegexes(languages[language]); }); let activeEditor = vscode.window.activeTextEditor; if (activeEditor) { - triggerUpdateDecorations(activeEditor); + triggerUpdateDecorations(timeout, regExps); } vscode.window.onDidChangeActiveTextEditor( editor => { activeEditor = editor; if (activeEditor) { - triggerUpdateDecorations(activeEditor); + triggerUpdateDecorations(timeout, regExps); } }, null, @@ -42,7 +35,7 @@ export function activate(context: vscode.ExtensionContext) { vscode.workspace.onDidChangeTextDocument( event => { if (activeEditor && event.document === activeEditor.document) { - triggerUpdateDecorations(activeEditor); + timeout = triggerUpdateDecorations(timeout, regExps); } }, null, @@ -50,184 +43,46 @@ export function activate(context: vscode.ExtensionContext) { ); } -function triggerUpdateDecorations(activeEditor: vscode.TextEditor) { - if (timeout) { - clearTimeout(timeout); +function triggerUpdateDecorations( + timeout: NodeJS.Timer | null, + regExps: { + [index: string]: TokenizeParams; } - timeout = setTimeout(updateDecorations, 250); -} - -function buildRegex(language: string) { - const languageConfiguration = languages[language]; - let tokens: Array = languageConfiguration["openTokens"]; - tokens = tokens.concat(languageConfiguration["inlineOpenTokens"]); - tokens = tokens.concat(languageConfiguration["closeTokens"]); - tokens = tokens.concat(languageConfiguration["neutralTokens"]); - return RegExp("(\\b)(" + tokens.join("|") + ")(\\b)", "gm"); -} - -function ignoreInDelimiters( - token_pairs: - | Array<{ - open: string; - close: string; - }> - | undefined, - text: string ) { - /* This function replaces text inside each token pair with spaces, - so as to ignore the text between delimiters */ - if (token_pairs) { - token_pairs.forEach(({ open: open_delim, close: close_delim }) => { - /* Only allow nesting if delimiters are different */ - if (open_delim == close_delim) { - let regexp = RegExp( - `${open_delim}[^${close_delim}]*${close_delim}`, - "gm" - ); - text = text.replace(regexp, match => { - return " ".repeat(match.length); - }); - } else { - let openRegexp = RegExp(`${open_delim}`, "gm"); - let closeRegexp = RegExp(`${close_delim}`, "gm"); - - let indices = []; - - let match = openRegexp.exec(text); - if (match == null) { - return; - } - - while (match != null) { - indices.push({ index: match.index, type: "open" }); - match = openRegexp.exec(text); - } - - match = closeRegexp.exec(text); - if (match == null) { - return; - } - - while (match != null) { - indices.push({ index: match.index, type: "close" }); - match = closeRegexp.exec(text); - } - - /* Sort by index */ - indices = indices.sort(({ index: a }, { index: b }) => a - b); - - let ignore_env_counter = 0; - let first_index = indices[0].index; - - let index: number; - let type: string; - - /* This isn't so inefficient in that it is - O(indices.length), instead of O(text.length). - Also, the list is already ordered, which is really helpful */ - for ({ index, type } of indices) { - /* skip current token if trying to close when there is no open block - cannot just break because '\n' can be both a closing token and a - normal line end - */ - if (type == "close" && ignore_env_counter == 0) { - continue; - } - - /* if counter is zero, should begin an ignore block */ - if (ignore_env_counter == 0) { - first_index = index; - } - - if (type == "open") { - /* if it is an open token, always increment env counter */ - ignore_env_counter++; - } else { - ignore_env_counter--; - /* if counter has reached zero after a closing token, - end ignore block */ - let last_index = index; - - /* Set ignore block slice as whitespace and keep the rest */ - text = - text.slice(0, first_index) + - " ".repeat(last_index - first_index + 1) + - text.slice(last_index + 1); - } - } - - if (ignore_env_counter != 0) { - /* Didn't close last block */ - text = - text.slice(0, first_index) + - " ".repeat(text.length - first_index + 1); - } - } - }); + if (timeout) { + clearTimeout(timeout); } - return text; + return setTimeout(() => updateDecorations(regExps), 250); } -function updateDecorations() { +function updateDecorations(regExps: { [index: string]: TokenizeParams }) { const activeEditor = vscode.window.activeTextEditor; if (!activeEditor) { return; } - const languageConfiguration = languages[activeEditor.document.languageId]; + let lang = activeEditor.document.languageId; + const languageConfiguration = languages[lang]; + + if (!languageConfiguration) { + return; + } let text = activeEditor.document.getText(); const options: vscode.DecorationOptions[][] = []; - deepDecorations.forEach(d => { + deepDecorations.forEach((d: any) => { options.push([]); }); - let match; - let deep = 0; - // if we are not case sensitive, then ensure the case of text matches then keyworkd matches + // if we are not case sensitive, then ensure the case of text matches the keyword matches if (!languageConfiguration.caseSensitive) { text = text.toLowerCase(); } - // substitute all ignore intervals with spaces - // this ensures commented code or - // keywords inside strings are ignored properly - - // also, prepend a whitespace to allow matching the first character in document - // if needed - text = - " " + ignoreInDelimiters(languageConfiguration.ignoreInDelimiters, text); - while ((match = regExs[activeEditor.document.languageId].exec(text))) { - const startIndex = match.index + match[1].length - 1; // Decrement to compensate for added character - const startPos = activeEditor.document.positionAt(startIndex); - const endPos = activeEditor.document.positionAt( - startIndex + match[2].length - ); - const decoration: vscode.DecorationOptions = { - range: new vscode.Range(startPos, endPos) - }; + let tokens: Token[] = tokenize(text, regExps[lang]); - if (languageConfiguration.closeTokens.indexOf(match[2]) > -1) { - if (deep > 0) { - deep -= 1; - } - options[deep % deepDecorations.length].push(decoration); - } else if (languageConfiguration.neutralTokens.indexOf(match[2]) > -1) { - if (deep > 0) { - options[(deep - 1) % deepDecorations.length].push(decoration); - } - } else if (languageConfiguration.openTokens.indexOf(match[2]) > -1) { - options[deep % deepDecorations.length].push(decoration); - deep += 1; - } else { - if (match[1].length === 0 || match[1].match("^[\\s\n]+$")) { - options[deep % deepDecorations.length].push(decoration); - deep += 1; - } - } - } + parse({ activeEditor, options, tokens }); - deepDecorations.forEach((deepDecoration, i) => { + deepDecorations.forEach((deepDecoration: any, i: number) => { activeEditor.setDecorations(deepDecoration, options[i]); }); } diff --git a/src/languages.ts b/src/languages.ts index ab9499d..c45da03 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -1,22 +1,28 @@ +export interface LangParams { + caseSensitive: boolean; + ignoreBlocks?: Array<{ + open: string; + close?: string; + singleline?: boolean; + }>; + listComprehensions?: Array<{ + open: string; + close: string; + }>; + openTokens: Array; + closeTokens: Array; + neutralTokens: Array; +} + export const languages: { - [index: string]: { - caseSensitive: boolean; - ignoreInDelimiters?: Array<{ - open: string; - close: string; - }>; - inlineOpenTokens: Array; - openTokens: Array; - closeTokens: Array; - neutralTokens: Array; - }; + [index: string]: LangParams; } = { ruby: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", - close: "\n" + singleline: true }, { open: "=begin", close: "=end" }, { @@ -24,11 +30,6 @@ export const languages: { close: '"' } ], - inlineOpenTokens: [ - // Allow stuff like return toto if tutu - "if", - "unless" - ], openTokens: [ "class", "module", @@ -44,7 +45,7 @@ export const languages: { }, lua: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: '"', close: '"' @@ -53,26 +54,27 @@ export const languages: { open: "'", close: "'" }, - { - open: "--\\[\\[", - close: "--\\]\\]" - }, { open: "--", - close: "\n" + singleline: true } ], - inlineOpenTokens: [], openTokens: ["function", "if", "while", "for"], closeTokens: ["end"], neutralTokens: ["do", "then", "else", "elseif"] }, elixir: { caseSensitive: true, - ignoreInDelimiters: [ + listComprehensions: [ + { + open: "\\[", + close: "\\]" + } + ], + ignoreBlocks: [ { open: "#", - close: "\n" + singleline: true }, { open: '"""', @@ -85,16 +87,19 @@ export const languages: { { open: "'", close: "'" + }, + { + open: "\\(", + close: "\\)" } ], - inlineOpenTokens: [], openTokens: [ "fn", "defmodule", - "defmacro(?=.+do)", - "defmacrop(?=.+do)", - "def(?=.+do)", - "defp(?=.+do)", + "defmacro", + "defmacrop", + "def", + "defp", "if", "while", "for", @@ -108,18 +113,18 @@ export const languages: { "defimpl", "schema", "embedded_schema", - "resources(?=.+do)", - "scope(?=.+do)" + "resources", + "scope" ], - closeTokens: ["end", ", do"], - neutralTokens: ["do", "else", "elseif", "rescue", "after"] + closeTokens: ["end", "do:"], + neutralTokens: ["do", "else", "elseif", "rescue", "after", "->", "<-"] }, julia: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", - close: "\n" + singleline: true }, { open: '"""', @@ -134,7 +139,12 @@ export const languages: { close: "'" } ], - inlineOpenTokens: [], + listComprehensions: [ + { + open: "\\[", + close: "\\]" + } + ], openTokens: [ "if", "struct", @@ -151,10 +161,10 @@ export const languages: { }, shellscript: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", - close: "\n" + singleline: true }, { open: '"', @@ -165,55 +175,50 @@ export const languages: { close: "'" } ], - inlineOpenTokens: [], openTokens: ["for", "if", "while", "until"], closeTokens: ["fi", "done"], neutralTokens: ["do", "in", "then", "else"] }, verilog: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "/\\*", close: "\\*/" }, { open: "//", - close: "\n" + singleline: true } ], - inlineOpenTokens: [], openTokens: ["module", "case", "begin"], closeTokens: ["end", "endmodule", "endcase"], neutralTokens: [] }, vhdl: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "--", - close: "\n" + singleline: true } ], - inlineOpenTokens: [], openTokens: ["entity", "component", "case", "begin"], closeTokens: ["end", "endcase"], neutralTokens: [] }, crystal: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: '"', close: '"' }, { open: "#", - close: "\n" + singleline: true } ], - - inlineOpenTokens: [], openTokens: [ "class", "module", @@ -234,7 +239,6 @@ export const languages: { }, COBOL: { caseSensitive: false, - inlineOpenTokens: [], openTokens: [ "program-id", "perform", diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..ff2db5f --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,148 @@ +"use strict"; + +import * as vscode from "vscode"; +import { Token } from "./tokenizer"; + +interface ParseParams { + activeEditor: vscode.TextEditor; + options: vscode.DecorationOptions[][]; + tokens: Token[]; +} + +interface SubParserParams { + decoration: vscode.DecorationOptions; + options: vscode.DecorationOptions[][]; + token: Token; + decorationDepth: number; +} + +interface SubParserResult { + options: vscode.DecorationOptions[][]; + decorationDepth: number; +} + +export const deepDecorations = [ + vscode.window.createTextEditorDecorationType({ + color: { id: "rainbowend.deep1" } + }), + vscode.window.createTextEditorDecorationType({ + color: { id: "rainbowend.deep2" } + }), + vscode.window.createTextEditorDecorationType({ + color: { id: "rainbowend.deep3" } + }) +]; + +const DEFAULT = 0; +const IGNORE = 1; +const COMPREHENSION = 2; + +export function parse({ activeEditor, options, tokens }: ParseParams) { + let decorationDepth = 0; + let mode = DEFAULT; + let comprehensionDepthStack = []; + + for (let token of tokens) { + let { pos, length, type } = token; + /* Switch parsing modes if any of the mode delimiters has been reached */ + if (type === "OPEN IGNORE") { + mode = IGNORE; + continue; + } else if (type === "OPEN COMPREHENSION") { + mode = COMPREHENSION; + comprehensionDepthStack.push(1); + continue; + } else if (type === "CLOSE IGNORE" || type === "CLOSE COMPREHENSION") { + comprehensionDepthStack.pop(); + + if (comprehensionDepthStack.length > 0) { + continue; + } + mode = DEFAULT; + continue; + } + const startPos = activeEditor.document.positionAt(pos); + const endPos = activeEditor.document.positionAt(pos + length); + const decoration: vscode.DecorationOptions = { + range: new vscode.Range(startPos, endPos) + }; + + let result = { decorationDepth, options }; + switch (mode) { + // case IGNORE: + /* A new parseInComment function could be implemented to allow for different highlighting + instead of just ignoring */ + // result = parseInComment({ decoration, decorationDepth, options, token }); + // break; + case COMPREHENSION: + result = parseInComprehension({ + decoration, + decorationDepth, + options, + token + }); + break; + case DEFAULT: + result = parseDefault({ decoration, decorationDepth, options, token }); + break; + default: + break; + } + decorationDepth = result.decorationDepth; + options = result.options; + } +} + +function parseDefault(params: SubParserParams): SubParserResult { + let { decoration, token, decorationDepth, options } = params; + switch (token.type) { + case "OPEN BLOCK": + // If beginning a new block, push new decoration and increment decorationDepth + options[decorationDepth % deepDecorations.length].push(decoration); + decorationDepth++; + break; + case "CLOSE BLOCK": + // If closing a block, decrement decorationDepth + decorationDepth = decorationDepth > 0 ? decorationDepth - 1 : 0; + options[decorationDepth % deepDecorations.length].push(decoration); + break; + default: + if (decorationDepth > 0) { + // As default, if the token is in non-zero decorationDepth, it is a continuation token and should keep the same color as the opening token + options[(decorationDepth - 1) % deepDecorations.length].push( + decoration + ); + } + break; + } + + return { decorationDepth, options }; +} + +function parseInComprehension(params: SubParserParams): SubParserResult { + /* For simplicity, in comprehensions, + all open-block and close-block tokens will be highlighted with the same decorationDepth color + The color is the next down from the previous block + + i.e.: + if + [ + for x if + ] + */ + + let { decoration, token, decorationDepth, options } = params; + let comprehensionDecorationDepth = decorationDepth + 1; + + if ( + token.type === "OPEN BLOCK" || + token.type === "CLOSE BLOCK" || + token.type === "NEUTRAL" + ) { + options[comprehensionDecorationDepth % deepDecorations.length].push( + decoration + ); + } + + return { decorationDepth, options }; +} diff --git a/src/tokenizer.ts b/src/tokenizer.ts new file mode 100644 index 0000000..580c046 --- /dev/null +++ b/src/tokenizer.ts @@ -0,0 +1,203 @@ +"use strict"; + +import { LangParams } from "./languages"; +export interface Token { + pos: number; + length: number; + type: string; + content: string; +} + +export interface TokenizeParams { + openRegExp: RegExp; + closeRegExp: RegExp; + neutralRegExp: RegExp; + ignoreRegExp: RegExp | null; + singleLineIgnoreRegExp: RegExp | null; + openListComprehensionRegExp: RegExp | null; + closeListComprehensionRegExp: RegExp | null; +} + +export function loadRegexes(langParams: LangParams) { + const { + ignoreBlocks, + openTokens, + closeTokens, + neutralTokens, + listComprehensions + } = langParams; + + let ignoreTokens = null; + let singleLineIgnoreTokens = null; + let ignoreRegExp = null; + let singleLineIgnoreRegExp = null; + if (ignoreBlocks) { + ignoreTokens = ignoreBlocks + .filter(token => !token.singleline) + .map(({ open, close }) => `${open}[^(${close})]*${close}`) + .join("|"); + ignoreRegExp = RegExp(`${ignoreTokens}`, "gm"); + + singleLineIgnoreTokens = ignoreBlocks + .filter(token => token.singleline) + .map(({ open }) => `${open}`) + .join("|"); + singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*`, "g"); + } + + /* + The `regexpPrefix` and `regexpSuffix` separators are used instead of \b to ensure that any regexp + provided as the configurable tokens can be matched. This is relaxed so that words preceded or followed by + parentheses, square brackets or curly brackets are also matched. + Previously, there was an issue involving the ':' character + */ + + const regexpPrefix = "(^|\\s)"; + const regexpSuffix = "($|\\s)"; + + let openRegExp = RegExp( + `(?<=${regexpPrefix})(${openTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); + let closeRegExp = RegExp( + `(?<=${regexpPrefix})(${closeTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); + let neutralRegExp = RegExp( + `(?<=${regexpPrefix})(${neutralTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); + + let openListComprehensionRegExp = null; + let closeListComprehensionRegExp = null; + + if (listComprehensions) { + let openListComprehensionTokens = listComprehensions + .map(({ open }) => `${open}`) + .join("|"); + openListComprehensionRegExp = RegExp( + `(${openListComprehensionTokens})`, + "gm" + ); + let closeListComprehensionTokens = listComprehensions + .map(({ close }) => `${close}`) + .join("|"); + closeListComprehensionRegExp = RegExp( + `(${closeListComprehensionTokens})`, + "gm" + ); + } + + return { + openRegExp, + closeRegExp, + ignoreRegExp, + singleLineIgnoreRegExp, + neutralRegExp, + openListComprehensionRegExp, + closeListComprehensionRegExp + }; +} + +function findAllMatches(str: string, regexp: RegExp | null, type: string) { + if (!regexp) { + return []; + } + + let matches = []; + let m: any = {}; + + while ((m = regexp.exec(str))) { + matches.push(m); + } + + return matches.map(match => { + return { + content: match[0], + pos: match.index, + length: match[0].length, + keep: true, + type + }; + }); +} + +export function tokenize( + text: string, + { + openRegExp, + closeRegExp, + neutralRegExp, + ignoreRegExp, + singleLineIgnoreRegExp, + openListComprehensionRegExp, + closeListComprehensionRegExp + }: TokenizeParams +): Token[] { + let openMatches = findAllMatches(text, openRegExp, "OPEN BLOCK"); + let closeMatches = findAllMatches(text, closeRegExp, "CLOSE BLOCK"); + let neutralMatches = findAllMatches(text, neutralRegExp, "NEUTRAL"); + let ignoreMatches = findAllMatches(text, ignoreRegExp, "COMMENT"); + let openListComprehensionMatches = findAllMatches( + text, + openListComprehensionRegExp, + "OPEN COMPREHENSION" + ); + let closeListComprehensionMatches = findAllMatches( + text, + closeListComprehensionRegExp, + "CLOSE COMPREHENSION" + ); + + let singleLineIgnoreMatches = findAllMatches( + text, + singleLineIgnoreRegExp, + "SINGLE LINE COMMENT" + ); + + const matchReducer = function(acc: Token[], token: Token, suffix: string) { + let { pos, length } = token; + + let open = { ...token, length: 1, type: `OPEN ${suffix}` }; + + let close = { + ...token, + length: 1, + pos: pos + length - 1, + type: `CLOSE ${suffix}` + }; + + return [...acc, open, close]; + }; + + const convertedIgnoreMatches = [ + ...singleLineIgnoreMatches, + ...ignoreMatches + ].reduce( + (acc: Token[], token: Token) => matchReducer(acc, token, "IGNORE"), + [] + ); + + let matches = [ + ...openListComprehensionMatches, + ...closeListComprehensionMatches, + ...convertedIgnoreMatches, + ...openMatches, + ...closeMatches, + ...neutralMatches + ]; + + let tokens = matches.sort(({ pos: posX }, { pos: posY }) => { + if (posX < posY) { + return -1; + } + + if (posX > posY) { + return 1; + } + + return 0; + }); + + return tokens; +}