From db81f652a576f2a0129cbbe168d7df82a0bb3354 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Mon, 7 Oct 2019 07:24:52 -0300 Subject: [PATCH 01/13] refactor: clean-up string matching engine --- src/extension.ts | 229 ++++++++++++++++------------------------------- src/languages.ts | 27 +++--- src/tokenizer.ts | 76 ++++++++++++++++ 3 files changed, 169 insertions(+), 163 deletions(-) create mode 100644 src/tokenizer.ts diff --git a/src/extension.ts b/src/extension.ts index 2ed8d82..0c70ac9 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -2,6 +2,7 @@ import * as vscode from "vscode"; import { languages } from "./languages"; +import { tokenize, Token } from "./tokenizer"; const deepDecorations = [ vscode.window.createTextEditorDecorationType({ @@ -16,11 +17,61 @@ const deepDecorations = [ ]; let timeout: NodeJS.Timer | null = null; -let regExs: { [index: string]: RegExp } = {}; +let regExps: { + [index: string]: { + openRegExp: RegExp; + closeRegExp: RegExp; + neutralRegExp: RegExp; + ignoreRegExp: RegExp | null; + singleLineIgnoreRegExp: RegExp | null; + }; +} = {}; + +function loadRegexes(language: string) { + const { + ignoreInDelimiters, + openTokens, + closeTokens, + neutralTokens + } = languages[language]; + + let ignoreTokens = null; + let singleLineIgnoreTokens = null; + let ignoreRegExp = null; + let singleLineIgnoreRegExp = null; + if (ignoreInDelimiters) { + ignoreTokens = ignoreInDelimiters + .filter(token => !token.singleline) + .map(({ open, close }) => `${open}[^(${close})]*${close}`) + .join("|"); + ignoreRegExp = RegExp(`${ignoreTokens}`, "gm"); + + singleLineIgnoreTokens = ignoreInDelimiters + .filter(token => !token.singleline) + .map(({ open, close }) => `${open}[^(${close})]*${close}`) + .join("|"); + singleLineIgnoreRegExp = RegExp(`${singleLineIgnoreTokens}`, "g"); + } + + let openRegExp = RegExp(`(^|\\s)(${openTokens.join("|")})(?=($|\\s))`, "g"); + let closeRegExp = RegExp(`(^|\\s)(${closeTokens.join("|")})(?=($|\\s))`, "g"); + let neutralRegExp = RegExp( + `(^|\\s)(${neutralTokens.join("|")})(?=($|\\s))`, + "g" + ); + + return { + openRegExp, + closeRegExp, + ignoreRegExp, + singleLineIgnoreRegExp, + neutralRegExp + }; +} export function activate(context: vscode.ExtensionContext) { Object.keys(languages).forEach(language => { - regExs[language] = buildRegex(language); + regExps[language] = loadRegexes(language); }); let activeEditor = vscode.window.activeTextEditor; @@ -57,176 +108,54 @@ function triggerUpdateDecorations(activeEditor: vscode.TextEditor) { timeout = setTimeout(updateDecorations, 250); } -function buildRegex(language: string) { - const languageConfiguration = languages[language]; - let tokens: Array = languageConfiguration["openTokens"]; - tokens = tokens.concat(languageConfiguration["inlineOpenTokens"]); - tokens = tokens.concat(languageConfiguration["closeTokens"]); - tokens = tokens.concat(languageConfiguration["neutralTokens"]); - return RegExp("(\\b)(" + tokens.join("|") + ")(\\b)", "gm"); -} - -function ignoreInDelimiters( - token_pairs: - | Array<{ - open: string; - close: string; - }> - | undefined, - text: string -) { - /* This function replaces text inside each token pair with spaces, - so as to ignore the text between delimiters */ - if (token_pairs) { - token_pairs.forEach(({ open: open_delim, close: close_delim }) => { - /* Only allow nesting if delimiters are different */ - if (open_delim == close_delim) { - let regexp = RegExp( - `${open_delim}[^${close_delim}]*${close_delim}`, - "gm" - ); - text = text.replace(regexp, match => { - return " ".repeat(match.length); - }); - } else { - let openRegexp = RegExp(`${open_delim}`, "gm"); - let closeRegexp = RegExp(`${close_delim}`, "gm"); - - let indices = []; - - let match = openRegexp.exec(text); - if (match == null) { - return; - } - - while (match != null) { - indices.push({ index: match.index, type: "open" }); - match = openRegexp.exec(text); - } - - match = closeRegexp.exec(text); - if (match == null) { - return; - } - - while (match != null) { - indices.push({ index: match.index, type: "close" }); - match = closeRegexp.exec(text); - } - - /* Sort by index */ - indices = indices.sort(({ index: a }, { index: b }) => a - b); - - let ignore_env_counter = 0; - let first_index = indices[0].index; - - let index: number; - let type: string; - - /* This isn't so inefficient in that it is - O(indices.length), instead of O(text.length). - Also, the list is already ordered, which is really helpful */ - for ({ index, type } of indices) { - /* skip current token if trying to close when there is no open block - cannot just break because '\n' can be both a closing token and a - normal line end - */ - if (type == "close" && ignore_env_counter == 0) { - continue; - } - - /* if counter is zero, should begin an ignore block */ - if (ignore_env_counter == 0) { - first_index = index; - } - - if (type == "open") { - /* if it is an open token, always increment env counter */ - ignore_env_counter++; - } else { - ignore_env_counter--; - /* if counter has reached zero after a closing token, - end ignore block */ - let last_index = index; - - /* Set ignore block slice as whitespace and keep the rest */ - text = - text.slice(0, first_index) + - " ".repeat(last_index - first_index + 1) + - text.slice(last_index + 1); - } - } - - if (ignore_env_counter != 0) { - /* Didn't close last block */ - text = - text.slice(0, first_index) + - " ".repeat(text.length - first_index + 1); - } - } - }); - } - return text; -} - function updateDecorations() { const activeEditor = vscode.window.activeTextEditor; if (!activeEditor) { return; } - const languageConfiguration = languages[activeEditor.document.languageId]; + let lang = activeEditor.document.languageId; + const languageConfiguration = languages[lang]; let text = activeEditor.document.getText(); const options: vscode.DecorationOptions[][] = []; deepDecorations.forEach(d => { options.push([]); }); - let match; - let deep = 0; + let depth = 0; // if we are not case sensitive, then ensure the case of text matches then keyworkd matches if (!languageConfiguration.caseSensitive) { text = text.toLowerCase(); } - // substitute all ignore intervals with spaces - // this ensures commented code or - // keywords inside strings are ignored properly - - // also, prepend a whitespace to allow matching the first character in document - // if needed - - text = - " " + ignoreInDelimiters(languageConfiguration.ignoreInDelimiters, text); - while ((match = regExs[activeEditor.document.languageId].exec(text))) { - const startIndex = match.index + match[1].length - 1; // Decrement to compensate for added character - const startPos = activeEditor.document.positionAt(startIndex); - const endPos = activeEditor.document.positionAt( - startIndex + match[2].length - ); + + let tokens: Token[] = tokenize(text, regExps[lang]); + + for (let { pos, length, type } of tokens) { + const startPos = activeEditor.document.positionAt(pos); + const endPos = activeEditor.document.positionAt(pos + length); const decoration: vscode.DecorationOptions = { range: new vscode.Range(startPos, endPos) }; - if (languageConfiguration.closeTokens.indexOf(match[2]) > -1) { - if (deep > 0) { - deep -= 1; - } - options[deep % deepDecorations.length].push(decoration); - } else if (languageConfiguration.neutralTokens.indexOf(match[2]) > -1) { - if (deep > 0) { - options[(deep - 1) % deepDecorations.length].push(decoration); - } - } else if (languageConfiguration.openTokens.indexOf(match[2]) > -1) { - options[deep % deepDecorations.length].push(decoration); - deep += 1; - } else { - if (match[1].length === 0 || match[1].match("^[\\s\n]+$")) { - options[deep % deepDecorations.length].push(decoration); - deep += 1; - } + switch (type) { + case "OPEN BLOCK": + // If beginning a new block, push new decoration and increment depth + options[depth % deepDecorations.length].push(decoration); + depth++; + break; + case "CLOSE BLOCK": + // If closing a block, decrement depth + depth = depth > 0 ? depth - 1 : 0; + options[depth % deepDecorations.length].push(decoration); + break; + default: + if (depth > 0) { + // As default, if the token is in non-zero depth, it is a continuation token and should keep the same color as the opening token + options[(depth - 1) % deepDecorations.length].push(decoration); + } + break; } } - deepDecorations.forEach((deepDecoration, i) => { activeEditor.setDecorations(deepDecoration, options[i]); }); diff --git a/src/languages.ts b/src/languages.ts index ab9499d..0a1c7d1 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -3,7 +3,8 @@ export const languages: { caseSensitive: boolean; ignoreInDelimiters?: Array<{ open: string; - close: string; + close?: string; + singleline?: boolean; }>; inlineOpenTokens: Array; openTokens: Array; @@ -16,7 +17,7 @@ export const languages: { ignoreInDelimiters: [ { open: "#", - close: "\n" + singleline: true }, { open: "=begin", close: "=end" }, { @@ -59,7 +60,7 @@ export const languages: { }, { open: "--", - close: "\n" + singleline: true } ], inlineOpenTokens: [], @@ -72,7 +73,7 @@ export const languages: { ignoreInDelimiters: [ { open: "#", - close: "\n" + singleline: true }, { open: '"""', @@ -106,20 +107,20 @@ export const languages: { "with", "defprotocol", "defimpl", - "schema", - "embedded_schema", + "schema(?=.+do)", + "embedded_schema(?=.+do)", "resources(?=.+do)", "scope(?=.+do)" ], - closeTokens: ["end", ", do"], - neutralTokens: ["do", "else", "elseif", "rescue", "after"] + closeTokens: ["end", "do:"], + neutralTokens: ["do", "else", "elseif", "rescue", "after", "->", "<-"] }, julia: { caseSensitive: true, ignoreInDelimiters: [ { open: "#", - close: "\n" + singleline: true }, { open: '"""', @@ -154,7 +155,7 @@ export const languages: { ignoreInDelimiters: [ { open: "#", - close: "\n" + singleline: true }, { open: '"', @@ -179,7 +180,7 @@ export const languages: { }, { open: "//", - close: "\n" + singleline: true } ], inlineOpenTokens: [], @@ -192,7 +193,7 @@ export const languages: { ignoreInDelimiters: [ { open: "--", - close: "\n" + singleline: true } ], inlineOpenTokens: [], @@ -209,7 +210,7 @@ export const languages: { }, { open: "#", - close: "\n" + singleline: true } ], diff --git a/src/tokenizer.ts b/src/tokenizer.ts new file mode 100644 index 0000000..370a61c --- /dev/null +++ b/src/tokenizer.ts @@ -0,0 +1,76 @@ +export interface Token { + pos: number; + length: number; + type: string; +} + +interface TokenizeParams { + openRegExp: RegExp; + closeRegExp: RegExp; + neutralRegExp: RegExp; + ignoreRegExp: RegExp | null; + singleLineIgnoreRegExp: RegExp | null; +} + +function findAllMatches(str: any, regexp: RegExp | null, type: string) { + if (!regexp) { + return []; + } + + let matches = str.matchAll(regexp); + + return [...matches].map(match => { + return { + pos: match.index, + length: match[0].length, + keep: true, + type + }; + }); +} + +export function tokenize( + text: string, + { + openRegExp, + closeRegExp, + neutralRegExp, + ignoreRegExp, + singleLineIgnoreRegExp + }: TokenizeParams +): Token[] { + let openMatches = findAllMatches(text, openRegExp, "OPEN BLOCK"); + let closeMatches = findAllMatches(text, closeRegExp, "CLOSE BLOCK"); + let neutralMatches = findAllMatches(text, neutralRegExp, "NEUTRAL"); + let ignoreMatches = findAllMatches(text, ignoreRegExp, "IGNORE").concat( + findAllMatches(text, singleLineIgnoreRegExp, "IGNORE") + ); + + let matches = openMatches.concat(closeMatches).concat(neutralMatches); + matches.sort(({ pos: posX }, { pos: posY }) => { + if (posX < posY) { + return -1; + } + + if (posX > posY) { + return 1; + } + + return 0; + }); + + for (let { pos: begin, length: length } of ignoreMatches) { + let end = begin + length; + for (let match of matches) { + let { pos } = match; + if (begin <= pos && pos < end) { + /* token is inside an ignore block and should be ignored */ + match.keep = false; + } + } + } + + let tokens = matches.filter(({ keep }) => keep); + + return tokens; +} From 8fd74d19c78760c2c143c845e57ccdc804a8469b Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Mon, 7 Oct 2019 07:46:12 -0300 Subject: [PATCH 02/13] fix: single line ignore --- package.json | 4 ++-- src/extension.ts | 4 ++-- src/tokenizer.ts | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/package.json b/package.json index 4999cad..4422bb3 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "license": "Apache-2.0", "displayName": "Rainbow End", "description": "This extension allows to identify keyword / end with colours.", - "version": "0.7.2", + "version": "0.8.0", "icon": "images/logo.png", "engines": { "vscode": "^1.29.0" @@ -71,4 +71,4 @@ "typescript": "^2.6.1", "vscode": "^1.1.26" } -} \ No newline at end of file +} diff --git a/src/extension.ts b/src/extension.ts index 0c70ac9..776aa8d 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -48,9 +48,9 @@ function loadRegexes(language: string) { singleLineIgnoreTokens = ignoreInDelimiters .filter(token => !token.singleline) - .map(({ open, close }) => `${open}[^(${close})]*${close}`) + .map(({ open, close }) => `${open}`) .join("|"); - singleLineIgnoreRegExp = RegExp(`${singleLineIgnoreTokens}`, "g"); + singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*${}`, "g"); } let openRegExp = RegExp(`(^|\\s)(${openTokens.join("|")})(?=($|\\s))`, "g"); diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 370a61c..9a2bedd 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -12,14 +12,19 @@ interface TokenizeParams { singleLineIgnoreRegExp: RegExp | null; } -function findAllMatches(str: any, regexp: RegExp | null, type: string) { +function findAllMatches(str: string, regexp: RegExp | null, type: string) { if (!regexp) { return []; } - let matches = str.matchAll(regexp); + let matches = []; + let m: any = {}; - return [...matches].map(match => { + while ((m = regexp.exec(str))) { + matches.push(m); + } + + return matches.map(match => { return { pos: match.index, length: match[0].length, @@ -58,6 +63,9 @@ export function tokenize( return 0; }); + console.log(matches); + + console.log(ignoreMatches); for (let { pos: begin, length: length } of ignoreMatches) { let end = begin + length; @@ -71,6 +79,7 @@ export function tokenize( } let tokens = matches.filter(({ keep }) => keep); + console.log(tokens); return tokens; } From c64de8699ef0de09494000dcef34ea625b3068f9 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Mon, 7 Oct 2019 08:16:57 -0300 Subject: [PATCH 03/13] fix: match elixir properly --- src/extension.ts | 21 +++++++++++++++------ src/languages.ts | 16 ++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/extension.ts b/src/extension.ts index 776aa8d..ad1cdcf 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -47,17 +47,26 @@ function loadRegexes(language: string) { ignoreRegExp = RegExp(`${ignoreTokens}`, "gm"); singleLineIgnoreTokens = ignoreInDelimiters - .filter(token => !token.singleline) - .map(({ open, close }) => `${open}`) + .filter(token => token.singleline) + .map(({ open }) => `${open}`) .join("|"); - singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*${}`, "g"); + singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*`, "g"); + console.log(singleLineIgnoreRegExp); } - let openRegExp = RegExp(`(^|\\s)(${openTokens.join("|")})(?=($|\\s))`, "g"); - let closeRegExp = RegExp(`(^|\\s)(${closeTokens.join("|")})(?=($|\\s))`, "g"); + /* + The (^|\s) and ($|\s) separators are used instead of \b to ensure that any regexp + provided as the configurable tokens can be matched. + Previously, there was an issue involving the ':' character + */ + let openRegExp = RegExp(`(^|\\s)(${openTokens.join("|")})(?=($|\\s))`, "gm"); + let closeRegExp = RegExp( + `(^|\\s)(${closeTokens.join("|")})(?=($|\\s))`, + "gm" + ); let neutralRegExp = RegExp( `(^|\\s)(${neutralTokens.join("|")})(?=($|\\s))`, - "g" + "gm" ); return { diff --git a/src/languages.ts b/src/languages.ts index 0a1c7d1..218b03e 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -92,10 +92,10 @@ export const languages: { openTokens: [ "fn", "defmodule", - "defmacro(?=.+do)", - "defmacrop(?=.+do)", - "def(?=.+do)", - "defp(?=.+do)", + "defmacro", + "defmacrop", + "def", + "defp", "if", "while", "for", @@ -107,10 +107,10 @@ export const languages: { "with", "defprotocol", "defimpl", - "schema(?=.+do)", - "embedded_schema(?=.+do)", - "resources(?=.+do)", - "scope(?=.+do)" + "schema", + "embedded_schema", + "resources", + "scope" ], closeTokens: ["end", "do:"], neutralTokens: ["do", "else", "elseif", "rescue", "after", "->", "<-"] From 3ba91c8b78f21b2b818df1f267347335ade4dd2d Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Tue, 8 Oct 2019 01:37:59 -0300 Subject: [PATCH 04/13] wip --- TODO.md | 6 ++++ src/languages.ts | 18 ++++++------ src/tokenizer.ts | 75 ++++++++++++++++++++++++++++++++++-------------- 3 files changed, 68 insertions(+), 31 deletions(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..e4e5be0 --- /dev/null +++ b/TODO.md @@ -0,0 +1,6 @@ +- [ ] rewrite languages.ts to support list comprehensions where it is due +- [ ] rewrite parser in extension.ts to allow for 3 differente environments: + * default: cycles colors through nesting code blocks + * comments: enter upon an open ignore-block token. ignores everything inside until the closing token + * comprehensions: matches diferrently, keeping its own color counter, + allowing just for inline tokens. should support nesting diff --git a/src/languages.ts b/src/languages.ts index 218b03e..8e31b82 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -1,7 +1,7 @@ export const languages: { [index: string]: { caseSensitive: boolean; - ignoreInDelimiters?: Array<{ + ignoreBlocks?: Array<{ open: string; close?: string; singleline?: boolean; @@ -14,7 +14,7 @@ export const languages: { } = { ruby: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", singleline: true @@ -45,7 +45,7 @@ export const languages: { }, lua: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: '"', close: '"' @@ -70,7 +70,7 @@ export const languages: { }, elixir: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", singleline: true @@ -117,7 +117,7 @@ export const languages: { }, julia: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", singleline: true @@ -152,7 +152,7 @@ export const languages: { }, shellscript: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "#", singleline: true @@ -173,7 +173,7 @@ export const languages: { }, verilog: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "/\\*", close: "\\*/" @@ -190,7 +190,7 @@ export const languages: { }, vhdl: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: "--", singleline: true @@ -203,7 +203,7 @@ export const languages: { }, crystal: { caseSensitive: true, - ignoreInDelimiters: [ + ignoreBlocks: [ { open: '"', close: '"' diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 9a2bedd..d88ea24 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,3 +1,5 @@ +import { close } from "fs"; + export interface Token { pos: number; length: number; @@ -10,6 +12,8 @@ interface TokenizeParams { neutralRegExp: RegExp; ignoreRegExp: RegExp | null; singleLineIgnoreRegExp: RegExp | null; + openListComprehensionRegExp: RegExp | null; + closeListComprehensionRegExp: RegExp | null; } function findAllMatches(str: string, regexp: RegExp | null, type: string) { @@ -41,18 +45,62 @@ export function tokenize( closeRegExp, neutralRegExp, ignoreRegExp, - singleLineIgnoreRegExp + singleLineIgnoreRegExp, + openListComprehensionRegExp, + closeListComprehensionRegExp }: TokenizeParams ): Token[] { let openMatches = findAllMatches(text, openRegExp, "OPEN BLOCK"); let closeMatches = findAllMatches(text, closeRegExp, "CLOSE BLOCK"); let neutralMatches = findAllMatches(text, neutralRegExp, "NEUTRAL"); - let ignoreMatches = findAllMatches(text, ignoreRegExp, "IGNORE").concat( - findAllMatches(text, singleLineIgnoreRegExp, "IGNORE") + let ignoreMatches = findAllMatches(text, ignoreRegExp, "COMMENT"); + let openListComprehensionMatches = findAllMatches( + text, + openListComprehensionRegExp, + "OPEN COMPREHENSION" + ); + let closeListComprehensionMatches = findAllMatches( + text, + closeListComprehensionRegExp, + "CLOSE COMPREHENSION" + ); + + let singleLineIgnoreMatches = findAllMatches( + text, + singleLineIgnoreRegExp, + "SINGLE LINE COMMENT" ); - let matches = openMatches.concat(closeMatches).concat(neutralMatches); - matches.sort(({ pos: posX }, { pos: posY }) => { + const ignoreMatchReducer = function(acc: Token[], token: Token) { + let { pos, length } = token; + + let open = { ...token, length: 1, type: "OPEN IGNORE" }; + + let close = { + ...token, + length: 1, + pos: pos + length - 1, + type: "CLOSE IGNORE" + }; + + return [...acc, open, close]; + }; + + const convertedIgnoreMatches = [ + ...singleLineIgnoreMatches, + ...ignoreMatches + ].reduce(ignoreMatchReducer, []); + + let matches = [ + ...convertedIgnoreMatches, + ...openMatches, + ...closeMatches, + ...neutralMatches, + ...openListComprehensionMatches, + ...closeListComprehensionMatches + ]; + + let tokens = matches.sort(({ pos: posX }, { pos: posY }) => { if (posX < posY) { return -1; } @@ -63,23 +111,6 @@ export function tokenize( return 0; }); - console.log(matches); - - console.log(ignoreMatches); - - for (let { pos: begin, length: length } of ignoreMatches) { - let end = begin + length; - for (let match of matches) { - let { pos } = match; - if (begin <= pos && pos < end) { - /* token is inside an ignore block and should be ignored */ - match.keep = false; - } - } - } - - let tokens = matches.filter(({ keep }) => keep); - console.log(tokens); return tokens; } From f7345311d7b76c2332c29d4a06744b3554e8ad7f Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Sun, 13 Oct 2019 19:11:03 -0300 Subject: [PATCH 05/13] wip --- src/extension.ts | 64 ++++++++++++++---------------------------------- src/parser.ts | 59 ++++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.ts | 5 ++-- 3 files changed, 79 insertions(+), 49 deletions(-) create mode 100644 src/parser.ts diff --git a/src/extension.ts b/src/extension.ts index ad1cdcf..46c43d0 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -2,7 +2,8 @@ import * as vscode from "vscode"; import { languages } from "./languages"; -import { tokenize, Token } from "./tokenizer"; +import { tokenize, Token, TokenizeParams } from "./tokenizer"; +import { parse } from "./parser"; const deepDecorations = [ vscode.window.createTextEditorDecorationType({ @@ -18,35 +19,26 @@ const deepDecorations = [ let timeout: NodeJS.Timer | null = null; let regExps: { - [index: string]: { - openRegExp: RegExp; - closeRegExp: RegExp; - neutralRegExp: RegExp; - ignoreRegExp: RegExp | null; - singleLineIgnoreRegExp: RegExp | null; - }; + [index: string]: TokenizeParams; } = {}; function loadRegexes(language: string) { - const { - ignoreInDelimiters, - openTokens, - closeTokens, - neutralTokens - } = languages[language]; + const { ignoreBlocks, openTokens, closeTokens, neutralTokens } = languages[ + language + ]; let ignoreTokens = null; let singleLineIgnoreTokens = null; let ignoreRegExp = null; let singleLineIgnoreRegExp = null; - if (ignoreInDelimiters) { - ignoreTokens = ignoreInDelimiters + if (ignoreBlocks) { + ignoreTokens = ignoreBlocks .filter(token => !token.singleline) .map(({ open, close }) => `${open}[^(${close})]*${close}`) .join("|"); ignoreRegExp = RegExp(`${ignoreTokens}`, "gm"); - singleLineIgnoreTokens = ignoreInDelimiters + singleLineIgnoreTokens = ignoreBlocks .filter(token => token.singleline) .map(({ open }) => `${open}`) .join("|"); @@ -69,12 +61,17 @@ function loadRegexes(language: string) { "gm" ); + let openListComprehensionRegExp = null; + let closeListComprehensionRegExp = null; + return { openRegExp, closeRegExp, ignoreRegExp, singleLineIgnoreRegExp, - neutralRegExp + neutralRegExp, + openListComprehensionRegExp, + closeListComprehensionRegExp }; } @@ -130,41 +127,16 @@ function updateDecorations() { deepDecorations.forEach(d => { options.push([]); }); - let depth = 0; - // if we are not case sensitive, then ensure the case of text matches then keyworkd matches + // if we are not case sensitive, then ensure the case of text matches the keyword matches if (!languageConfiguration.caseSensitive) { text = text.toLowerCase(); } let tokens: Token[] = tokenize(text, regExps[lang]); - for (let { pos, length, type } of tokens) { - const startPos = activeEditor.document.positionAt(pos); - const endPos = activeEditor.document.positionAt(pos + length); - const decoration: vscode.DecorationOptions = { - range: new vscode.Range(startPos, endPos) - }; - - switch (type) { - case "OPEN BLOCK": - // If beginning a new block, push new decoration and increment depth - options[depth % deepDecorations.length].push(decoration); - depth++; - break; - case "CLOSE BLOCK": - // If closing a block, decrement depth - depth = depth > 0 ? depth - 1 : 0; - options[depth % deepDecorations.length].push(decoration); - break; - default: - if (depth > 0) { - // As default, if the token is in non-zero depth, it is a continuation token and should keep the same color as the opening token - options[(depth - 1) % deepDecorations.length].push(decoration); - } - break; - } - } + parse({ activeEditor, options, tokens }); + deepDecorations.forEach((deepDecoration, i) => { activeEditor.setDecorations(deepDecoration, options[i]); }); diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..3f000eb --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,59 @@ +"use strict"; + +import * as vscode from "vscode"; +import { Token } from "./tokenizer"; + +interface ParseParams { + activeEditor: vscode.TextEditor; + options: vscode.DecorationOptions[][]; + tokens: Token[]; +} + +const DEFAULT = 0; +const COMMENT = 1; +const COMPREHENSION = 2; + +export function parse({ activeEditor, options, tokens }: ParseParams) { + let depth = 0; + for (let { pos, length, type } of tokens) { + let mode = null; + if (type == "COMMENT" || type == "SINGLE LINE COMMENT") { + mode = COMMENT; + } else if (type == "OPEN COMPREHENSION" || type == "CLOSE COMPREHENSION") { + mode = COMPREHENSION; + } else { + mode = DEFAULT; + } + /* + + + + + + */ + const startPos = activeEditor.document.positionAt(pos); + const endPos = activeEditor.document.positionAt(pos + length); + const decoration: vscode.DecorationOptions = { + range: new vscode.Range(startPos, endPos) + }; + + switch (type) { + case "OPEN BLOCK": + // If beginning a new block, push new decoration and increment depth + options[depth % deepDecorations.length].push(decoration); + depth++; + break; + case "CLOSE BLOCK": + // If closing a block, decrement depth + depth = depth > 0 ? depth - 1 : 0; + options[depth % deepDecorations.length].push(decoration); + break; + default: + if (depth > 0) { + // As default, if the token is in non-zero depth, it is a continuation token and should keep the same color as the opening token + options[(depth - 1) % deepDecorations.length].push(decoration); + } + break; + } + } +} diff --git a/src/tokenizer.ts b/src/tokenizer.ts index d88ea24..140f854 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,12 +1,11 @@ -import { close } from "fs"; - +"use strict"; export interface Token { pos: number; length: number; type: string; } -interface TokenizeParams { +export interface TokenizeParams { openRegExp: RegExp; closeRegExp: RegExp; neutralRegExp: RegExp; From 211ceae514d13ef4316ea3060e0dec55925da713 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Tue, 15 Oct 2019 04:41:59 -0300 Subject: [PATCH 06/13] feat: add nesting comprehension parsing --- src/extension.ts | 67 +++++++++++++++-------- src/languages.ts | 10 ++++ src/parser.ts | 137 ++++++++++++++++++++++++++++++++++++++--------- src/tokenizer.ts | 20 ++++--- 4 files changed, 178 insertions(+), 56 deletions(-) diff --git a/src/extension.ts b/src/extension.ts index 46c43d0..046942f 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -3,19 +3,7 @@ import * as vscode from "vscode"; import { languages } from "./languages"; import { tokenize, Token, TokenizeParams } from "./tokenizer"; -import { parse } from "./parser"; - -const deepDecorations = [ - vscode.window.createTextEditorDecorationType({ - color: { id: "rainbowend.deep1" } - }), - vscode.window.createTextEditorDecorationType({ - color: { id: "rainbowend.deep2" } - }), - vscode.window.createTextEditorDecorationType({ - color: { id: "rainbowend.deep3" } - }) -]; +import { parse, deepDecorations } from "./parser"; let timeout: NodeJS.Timer | null = null; let regExps: { @@ -23,9 +11,13 @@ let regExps: { } = {}; function loadRegexes(language: string) { - const { ignoreBlocks, openTokens, closeTokens, neutralTokens } = languages[ - language - ]; + const { + ignoreBlocks, + openTokens, + closeTokens, + neutralTokens, + listComprehensions + } = languages[language]; let ignoreTokens = null; let singleLineIgnoreTokens = null; @@ -47,23 +39,48 @@ function loadRegexes(language: string) { } /* - The (^|\s) and ($|\s) separators are used instead of \b to ensure that any regexp - provided as the configurable tokens can be matched. + The `regexpPrefix` and `regexpSuffix` separators are used instead of \b to ensure that any regexp + provided as the configurable tokens can be matched. This is relaxed so that words preceded or followed by + parentheses, square brackets or curly brackets are also matched. Previously, there was an issue involving the ':' character */ - let openRegExp = RegExp(`(^|\\s)(${openTokens.join("|")})(?=($|\\s))`, "gm"); + + const regexpPrefix = "(^|\\s)"; + const regexpSuffix = "($|\\s)"; + + let openRegExp = RegExp( + `(?<=${regexpPrefix})(${openTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); let closeRegExp = RegExp( - `(^|\\s)(${closeTokens.join("|")})(?=($|\\s))`, + `(?<=${regexpPrefix})(${closeTokens.join("|")})(?=${regexpSuffix})`, "gm" ); let neutralRegExp = RegExp( - `(^|\\s)(${neutralTokens.join("|")})(?=($|\\s))`, + `(?<=${regexpPrefix})(${neutralTokens.join("|")})(?=${regexpSuffix})`, "gm" ); let openListComprehensionRegExp = null; let closeListComprehensionRegExp = null; + if (listComprehensions) { + let openListComprehensionTokens = listComprehensions + .map(({ open }) => `${open}`) + .join("|"); + openListComprehensionRegExp = RegExp( + `(${openListComprehensionTokens})`, + "gm" + ); + let closeListComprehensionTokens = listComprehensions + .map(({ close }) => `${close}`) + .join("|"); + closeListComprehensionRegExp = RegExp( + `(${closeListComprehensionTokens})`, + "gm" + ); + } + return { openRegExp, closeRegExp, @@ -122,9 +139,13 @@ function updateDecorations() { let lang = activeEditor.document.languageId; const languageConfiguration = languages[lang]; + if (!languageConfiguration) { + return; + } + let text = activeEditor.document.getText(); const options: vscode.DecorationOptions[][] = []; - deepDecorations.forEach(d => { + deepDecorations.forEach((d: any) => { options.push([]); }); @@ -137,7 +158,7 @@ function updateDecorations() { parse({ activeEditor, options, tokens }); - deepDecorations.forEach((deepDecoration, i) => { + deepDecorations.forEach((deepDecoration: any, i: number) => { activeEditor.setDecorations(deepDecoration, options[i]); }); } diff --git a/src/languages.ts b/src/languages.ts index 8e31b82..683d06f 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -6,6 +6,10 @@ export const languages: { close?: string; singleline?: boolean; }>; + listComprehensions?: Array<{ + open: string; + close: string; + }>; inlineOpenTokens: Array; openTokens: Array; closeTokens: Array; @@ -88,6 +92,12 @@ export const languages: { close: "'" } ], + listComprehensions: [ + { + open: "\\[", + close: "\\]" + } + ], inlineOpenTokens: [], openTokens: [ "fn", diff --git a/src/parser.ts b/src/parser.ts index 3f000eb..e968796 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -9,51 +9,136 @@ interface ParseParams { tokens: Token[]; } +interface SubParserParams { + decoration: vscode.DecorationOptions; + options: vscode.DecorationOptions[][]; + token: Token; + depth: number; +} + +interface SubParserResult { + options: vscode.DecorationOptions[][]; + depth: number; +} + +export const deepDecorations = [ + vscode.window.createTextEditorDecorationType({ + color: { id: "rainbowend.deep1" } + }), + vscode.window.createTextEditorDecorationType({ + color: { id: "rainbowend.deep2" } + }), + vscode.window.createTextEditorDecorationType({ + color: { id: "rainbowend.deep3" } + }) +]; + const DEFAULT = 0; -const COMMENT = 1; +const IGNORE = 1; const COMPREHENSION = 2; export function parse({ activeEditor, options, tokens }: ParseParams) { let depth = 0; - for (let { pos, length, type } of tokens) { - let mode = null; - if (type == "COMMENT" || type == "SINGLE LINE COMMENT") { - mode = COMMENT; - } else if (type == "OPEN COMPREHENSION" || type == "CLOSE COMPREHENSION") { + let comprehensionDepth = 0; + let mode = DEFAULT; + + for (let token of tokens) { + let { pos, length, type } = token; + /* Switch parsing modes if any of the mode delimiters has been reached */ + if (type === "OPEN IGNORE") { + mode = IGNORE; + continue; + } else if (type === "OPEN COMPREHENSION") { mode = COMPREHENSION; - } else { + comprehensionDepth++; + continue; + } else if (type === "CLOSE IGNORE" || type === "CLOSE COMPREHENSION") { + comprehensionDepth--; + if (comprehensionDepth > 0) { + continue; + } + comprehensionDepth = 0; mode = DEFAULT; + continue; } - /* - - - - - */ const startPos = activeEditor.document.positionAt(pos); const endPos = activeEditor.document.positionAt(pos + length); const decoration: vscode.DecorationOptions = { range: new vscode.Range(startPos, endPos) }; - switch (type) { - case "OPEN BLOCK": - // If beginning a new block, push new decoration and increment depth - options[depth % deepDecorations.length].push(decoration); - depth++; + let result = { depth, options }; + console.log(mode); + switch (mode) { + // case IGNORE: + /* A new parseInComment function could be implemented to allow for different highlighting + instead of just ignoring */ + // result = parseInComment({ decoration, depth, options, token }); + // break; + case COMPREHENSION: + console.log(token); + result = parseInComprehension({ decoration, depth, options, token }); break; - case "CLOSE BLOCK": - // If closing a block, decrement depth - depth = depth > 0 ? depth - 1 : 0; - options[depth % deepDecorations.length].push(decoration); + case DEFAULT: + console.log(token); + result = parseDefault({ decoration, depth, options, token }); break; default: - if (depth > 0) { - // As default, if the token is in non-zero depth, it is a continuation token and should keep the same color as the opening token - options[(depth - 1) % deepDecorations.length].push(decoration); - } + console.log("default: skip"); break; } + depth = result.depth; + options = result.options; } } + +function parseDefault(params: SubParserParams): SubParserResult { + let { decoration, token, depth, options } = params; + switch (token.type) { + case "OPEN BLOCK": + // If beginning a new block, push new decoration and increment depth + options[depth % deepDecorations.length].push(decoration); + depth++; + break; + case "CLOSE BLOCK": + // If closing a block, decrement depth + depth = depth > 0 ? depth - 1 : 0; + options[depth % deepDecorations.length].push(decoration); + break; + default: + if (depth > 0) { + // As default, if the token is in non-zero depth, it is a continuation token and should keep the same color as the opening token + options[(depth - 1) % deepDecorations.length].push(decoration); + } + break; + } + + return { depth, options }; +} + +function parseInComprehension(params: SubParserParams): SubParserResult { + /* For simplicity, in comprehensions, + all open-block and close-block tokens will be highlighted with the same depth color + The color is the next down from the previous block + + i.e.: + if + [ + for x if + ] + */ + + let { decoration, token, depth, options } = params; + let comprehensionDepth = depth + 1; + + if ( + token.type === "OPEN BLOCK" || + token.type === "CLOSE BLOCK" || + token.type === "NEUTRAL" + ) { + options[comprehensionDepth % deepDecorations.length].push(decoration); + } + + return { depth, options }; +} diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 140f854..8fa1724 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -3,6 +3,7 @@ export interface Token { pos: number; length: number; type: string; + content: string; } export interface TokenizeParams { @@ -29,6 +30,7 @@ function findAllMatches(str: string, regexp: RegExp | null, type: string) { return matches.map(match => { return { + content: match[0], pos: match.index, length: match[0].length, keep: true, @@ -70,16 +72,16 @@ export function tokenize( "SINGLE LINE COMMENT" ); - const ignoreMatchReducer = function(acc: Token[], token: Token) { + const matchReducer = function(acc: Token[], token: Token, suffix: string) { let { pos, length } = token; - let open = { ...token, length: 1, type: "OPEN IGNORE" }; + let open = { ...token, length: 1, type: `OPEN ${suffix}` }; let close = { ...token, length: 1, pos: pos + length - 1, - type: "CLOSE IGNORE" + type: `CLOSE ${suffix}` }; return [...acc, open, close]; @@ -88,15 +90,18 @@ export function tokenize( const convertedIgnoreMatches = [ ...singleLineIgnoreMatches, ...ignoreMatches - ].reduce(ignoreMatchReducer, []); + ].reduce( + (acc: Token[], token: Token) => matchReducer(acc, token, "IGNORE"), + [] + ); let matches = [ + ...openListComprehensionMatches, + ...closeListComprehensionMatches, ...convertedIgnoreMatches, ...openMatches, ...closeMatches, - ...neutralMatches, - ...openListComprehensionMatches, - ...closeListComprehensionMatches + ...neutralMatches ]; let tokens = matches.sort(({ pos: posX }, { pos: posY }) => { @@ -111,5 +116,6 @@ export function tokenize( return 0; }); + console.log(tokens); return tokens; } From 77ad0c6e4be6e9828fcfdcd8fdc40b4fd84640e4 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Tue, 15 Oct 2019 04:46:56 -0300 Subject: [PATCH 07/13] chore: remove TODO --- TODO.md | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 TODO.md diff --git a/TODO.md b/TODO.md deleted file mode 100644 index e4e5be0..0000000 --- a/TODO.md +++ /dev/null @@ -1,6 +0,0 @@ -- [ ] rewrite languages.ts to support list comprehensions where it is due -- [ ] rewrite parser in extension.ts to allow for 3 differente environments: - * default: cycles colors through nesting code blocks - * comments: enter upon an open ignore-block token. ignores everything inside until the closing token - * comprehensions: matches diferrently, keeping its own color counter, - allowing just for inline tokens. should support nesting From 5723dc98c0987c39786f8fab15808d5a20540efa Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Tue, 15 Oct 2019 04:55:29 -0300 Subject: [PATCH 08/13] chore: reorganize code --- src/extension.ts | 110 ++++++++--------------------------------------- src/tokenizer.ts | 82 +++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 93 deletions(-) diff --git a/src/extension.ts b/src/extension.ts index 046942f..9a103b2 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -2,111 +2,30 @@ import * as vscode from "vscode"; import { languages } from "./languages"; -import { tokenize, Token, TokenizeParams } from "./tokenizer"; +import { tokenize, loadRegexes, Token, TokenizeParams } from "./tokenizer"; import { parse, deepDecorations } from "./parser"; -let timeout: NodeJS.Timer | null = null; -let regExps: { - [index: string]: TokenizeParams; -} = {}; - -function loadRegexes(language: string) { - const { - ignoreBlocks, - openTokens, - closeTokens, - neutralTokens, - listComprehensions - } = languages[language]; - - let ignoreTokens = null; - let singleLineIgnoreTokens = null; - let ignoreRegExp = null; - let singleLineIgnoreRegExp = null; - if (ignoreBlocks) { - ignoreTokens = ignoreBlocks - .filter(token => !token.singleline) - .map(({ open, close }) => `${open}[^(${close})]*${close}`) - .join("|"); - ignoreRegExp = RegExp(`${ignoreTokens}`, "gm"); - - singleLineIgnoreTokens = ignoreBlocks - .filter(token => token.singleline) - .map(({ open }) => `${open}`) - .join("|"); - singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*`, "g"); - console.log(singleLineIgnoreRegExp); - } - - /* - The `regexpPrefix` and `regexpSuffix` separators are used instead of \b to ensure that any regexp - provided as the configurable tokens can be matched. This is relaxed so that words preceded or followed by - parentheses, square brackets or curly brackets are also matched. - Previously, there was an issue involving the ':' character - */ - - const regexpPrefix = "(^|\\s)"; - const regexpSuffix = "($|\\s)"; - - let openRegExp = RegExp( - `(?<=${regexpPrefix})(${openTokens.join("|")})(?=${regexpSuffix})`, - "gm" - ); - let closeRegExp = RegExp( - `(?<=${regexpPrefix})(${closeTokens.join("|")})(?=${regexpSuffix})`, - "gm" - ); - let neutralRegExp = RegExp( - `(?<=${regexpPrefix})(${neutralTokens.join("|")})(?=${regexpSuffix})`, - "gm" - ); - - let openListComprehensionRegExp = null; - let closeListComprehensionRegExp = null; - - if (listComprehensions) { - let openListComprehensionTokens = listComprehensions - .map(({ open }) => `${open}`) - .join("|"); - openListComprehensionRegExp = RegExp( - `(${openListComprehensionTokens})`, - "gm" - ); - let closeListComprehensionTokens = listComprehensions - .map(({ close }) => `${close}`) - .join("|"); - closeListComprehensionRegExp = RegExp( - `(${closeListComprehensionTokens})`, - "gm" - ); - } +export function activate(context: vscode.ExtensionContext) { + let regExps: { + [index: string]: TokenizeParams; + } = {}; - return { - openRegExp, - closeRegExp, - ignoreRegExp, - singleLineIgnoreRegExp, - neutralRegExp, - openListComprehensionRegExp, - closeListComprehensionRegExp - }; -} + let timeout: NodeJS.Timer | null = null; -export function activate(context: vscode.ExtensionContext) { Object.keys(languages).forEach(language => { regExps[language] = loadRegexes(language); }); let activeEditor = vscode.window.activeTextEditor; if (activeEditor) { - triggerUpdateDecorations(activeEditor); + triggerUpdateDecorations(timeout, regExps); } vscode.window.onDidChangeActiveTextEditor( editor => { activeEditor = editor; if (activeEditor) { - triggerUpdateDecorations(activeEditor); + triggerUpdateDecorations(timeout, regExps); } }, null, @@ -116,7 +35,7 @@ export function activate(context: vscode.ExtensionContext) { vscode.workspace.onDidChangeTextDocument( event => { if (activeEditor && event.document === activeEditor.document) { - triggerUpdateDecorations(activeEditor); + timeout = triggerUpdateDecorations(timeout, regExps); } }, null, @@ -124,14 +43,19 @@ export function activate(context: vscode.ExtensionContext) { ); } -function triggerUpdateDecorations(activeEditor: vscode.TextEditor) { +function triggerUpdateDecorations( + timeout: NodeJS.Timer | null, + regExps: { + [index: string]: TokenizeParams; + } +) { if (timeout) { clearTimeout(timeout); } - timeout = setTimeout(updateDecorations, 250); + return setTimeout(() => updateDecorations(regExps), 250); } -function updateDecorations() { +function updateDecorations(regExps: { [index: string]: TokenizeParams }) { const activeEditor = vscode.window.activeTextEditor; if (!activeEditor) { return; diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8fa1724..7679b63 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -16,6 +16,88 @@ export interface TokenizeParams { closeListComprehensionRegExp: RegExp | null; } +export function loadRegexes(language: string) { + const { + ignoreBlocks, + openTokens, + closeTokens, + neutralTokens, + listComprehensions + } = languages[language]; + + let ignoreTokens = null; + let singleLineIgnoreTokens = null; + let ignoreRegExp = null; + let singleLineIgnoreRegExp = null; + if (ignoreBlocks) { + ignoreTokens = ignoreBlocks + .filter(token => !token.singleline) + .map(({ open, close }) => `${open}[^(${close})]*${close}`) + .join("|"); + ignoreRegExp = RegExp(`${ignoreTokens}`, "gm"); + + singleLineIgnoreTokens = ignoreBlocks + .filter(token => token.singleline) + .map(({ open }) => `${open}`) + .join("|"); + singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*`, "g"); + console.log(singleLineIgnoreRegExp); + } + + /* + The `regexpPrefix` and `regexpSuffix` separators are used instead of \b to ensure that any regexp + provided as the configurable tokens can be matched. This is relaxed so that words preceded or followed by + parentheses, square brackets or curly brackets are also matched. + Previously, there was an issue involving the ':' character + */ + + const regexpPrefix = "(^|\\s)"; + const regexpSuffix = "($|\\s)"; + + let openRegExp = RegExp( + `(?<=${regexpPrefix})(${openTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); + let closeRegExp = RegExp( + `(?<=${regexpPrefix})(${closeTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); + let neutralRegExp = RegExp( + `(?<=${regexpPrefix})(${neutralTokens.join("|")})(?=${regexpSuffix})`, + "gm" + ); + + let openListComprehensionRegExp = null; + let closeListComprehensionRegExp = null; + + if (listComprehensions) { + let openListComprehensionTokens = listComprehensions + .map(({ open }) => `${open}`) + .join("|"); + openListComprehensionRegExp = RegExp( + `(${openListComprehensionTokens})`, + "gm" + ); + let closeListComprehensionTokens = listComprehensions + .map(({ close }) => `${close}`) + .join("|"); + closeListComprehensionRegExp = RegExp( + `(${closeListComprehensionTokens})`, + "gm" + ); + } + + return { + openRegExp, + closeRegExp, + ignoreRegExp, + singleLineIgnoreRegExp, + neutralRegExp, + openListComprehensionRegExp, + closeListComprehensionRegExp + }; +} + function findAllMatches(str: string, regexp: RegExp | null, type: string) { if (!regexp) { return []; From b0e19e2827e31b4e6768c8a2b72f93c8fbd310da Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Tue, 15 Oct 2019 04:56:51 -0300 Subject: [PATCH 09/13] docs: update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b11c08..15f2c95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ By https://github.com/polvalente -- Fix 'ignoreInDelimiters' nesting +- Fix 'ignoreInDelimiters' nesting (#20) +- Refactor code to include new parsing engine (#21) ## 0.7.3 From cb9e400549a2280cabcb5cba540af5d0ba06f8e4 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Tue, 15 Oct 2019 05:02:17 -0300 Subject: [PATCH 10/13] chore: update language defs --- src/languages.ts | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/languages.ts b/src/languages.ts index 683d06f..18c969a 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -10,7 +10,6 @@ export const languages: { open: string; close: string; }>; - inlineOpenTokens: Array; openTokens: Array; closeTokens: Array; neutralTokens: Array; @@ -29,11 +28,6 @@ export const languages: { close: '"' } ], - inlineOpenTokens: [ - // Allow stuff like return toto if tutu - "if", - "unless" - ], openTokens: [ "class", "module", @@ -58,16 +52,11 @@ export const languages: { open: "'", close: "'" }, - { - open: "--\\[\\[", - close: "--\\]\\]" - }, { open: "--", singleline: true } ], - inlineOpenTokens: [], openTokens: ["function", "if", "while", "for"], closeTokens: ["end"], neutralTokens: ["do", "then", "else", "elseif"] @@ -90,15 +79,12 @@ export const languages: { { open: "'", close: "'" - } - ], - listComprehensions: [ + }, { - open: "\\[", - close: "\\]" + open: "\\(", + close: "\\)" } ], - inlineOpenTokens: [], openTokens: [ "fn", "defmodule", @@ -145,7 +131,12 @@ export const languages: { close: "'" } ], - inlineOpenTokens: [], + listComprehensions: [ + { + open: "\\[", + close: "\\]" + } + ], openTokens: [ "if", "struct", @@ -176,7 +167,6 @@ export const languages: { close: "'" } ], - inlineOpenTokens: [], openTokens: ["for", "if", "while", "until"], closeTokens: ["fi", "done"], neutralTokens: ["do", "in", "then", "else"] @@ -193,7 +183,6 @@ export const languages: { singleline: true } ], - inlineOpenTokens: [], openTokens: ["module", "case", "begin"], closeTokens: ["end", "endmodule", "endcase"], neutralTokens: [] @@ -206,7 +195,6 @@ export const languages: { singleline: true } ], - inlineOpenTokens: [], openTokens: ["entity", "component", "case", "begin"], closeTokens: ["end", "endcase"], neutralTokens: [] @@ -223,8 +211,6 @@ export const languages: { singleline: true } ], - - inlineOpenTokens: [], openTokens: [ "class", "module", @@ -245,7 +231,6 @@ export const languages: { }, COBOL: { caseSensitive: false, - inlineOpenTokens: [], openTokens: [ "program-id", "perform", From 172d965627f3fcb72089b68649699197b9ff3c40 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Sat, 14 Dec 2019 22:40:34 -0300 Subject: [PATCH 11/13] feat: implement comprehension parsing using stacks --- src/parser.ts | 71 +++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index e968796..7f2492e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -13,12 +13,12 @@ interface SubParserParams { decoration: vscode.DecorationOptions; options: vscode.DecorationOptions[][]; token: Token; - depth: number; + decorationDepth: number; } interface SubParserResult { options: vscode.DecorationOptions[][]; - depth: number; + decorationDepth: number; } export const deepDecorations = [ @@ -38,9 +38,9 @@ const IGNORE = 1; const COMPREHENSION = 2; export function parse({ activeEditor, options, tokens }: ParseParams) { - let depth = 0; - let comprehensionDepth = 0; + let decorationDepth = 0; let mode = DEFAULT; + let comprehensionDepthStack = []; for (let token of tokens) { let { pos, length, type } = token; @@ -50,14 +50,14 @@ export function parse({ activeEditor, options, tokens }: ParseParams) { continue; } else if (type === "OPEN COMPREHENSION") { mode = COMPREHENSION; - comprehensionDepth++; + comprehensionDepthStack.push(1); continue; } else if (type === "CLOSE IGNORE" || type === "CLOSE COMPREHENSION") { - comprehensionDepth--; - if (comprehensionDepth > 0) { + if (comprehensionDepthStack.length > 0) { + comprehensionDepthStack.pop(); continue; } - comprehensionDepth = 0; + comprehensionDepthStack = []; mode = DEFAULT; continue; } @@ -68,58 +68,61 @@ export function parse({ activeEditor, options, tokens }: ParseParams) { range: new vscode.Range(startPos, endPos) }; - let result = { depth, options }; - console.log(mode); + let result = { decorationDepth, options }; switch (mode) { // case IGNORE: /* A new parseInComment function could be implemented to allow for different highlighting instead of just ignoring */ - // result = parseInComment({ decoration, depth, options, token }); + // result = parseInComment({ decoration, decorationDepth, options, token }); // break; case COMPREHENSION: - console.log(token); - result = parseInComprehension({ decoration, depth, options, token }); + result = parseInComprehension({ + decoration, + decorationDepth, + options, + token + }); break; case DEFAULT: - console.log(token); - result = parseDefault({ decoration, depth, options, token }); + result = parseDefault({ decoration, decorationDepth, options, token }); break; default: - console.log("default: skip"); break; } - depth = result.depth; + decorationDepth = result.decorationDepth; options = result.options; } } function parseDefault(params: SubParserParams): SubParserResult { - let { decoration, token, depth, options } = params; + let { decoration, token, decorationDepth, options } = params; switch (token.type) { case "OPEN BLOCK": - // If beginning a new block, push new decoration and increment depth - options[depth % deepDecorations.length].push(decoration); - depth++; + // If beginning a new block, push new decoration and increment decorationDepth + options[decorationDepth % deepDecorations.length].push(decoration); + decorationDepth++; break; case "CLOSE BLOCK": - // If closing a block, decrement depth - depth = depth > 0 ? depth - 1 : 0; - options[depth % deepDecorations.length].push(decoration); + // If closing a block, decrement decorationDepth + decorationDepth = decorationDepth > 0 ? decorationDepth - 1 : 0; + options[decorationDepth % deepDecorations.length].push(decoration); break; default: - if (depth > 0) { - // As default, if the token is in non-zero depth, it is a continuation token and should keep the same color as the opening token - options[(depth - 1) % deepDecorations.length].push(decoration); + if (decorationDepth > 0) { + // As default, if the token is in non-zero decorationDepth, it is a continuation token and should keep the same color as the opening token + options[(decorationDepth - 1) % deepDecorations.length].push( + decoration + ); } break; } - return { depth, options }; + return { decorationDepth, options }; } function parseInComprehension(params: SubParserParams): SubParserResult { /* For simplicity, in comprehensions, - all open-block and close-block tokens will be highlighted with the same depth color + all open-block and close-block tokens will be highlighted with the same decorationDepth color The color is the next down from the previous block i.e.: @@ -129,16 +132,18 @@ function parseInComprehension(params: SubParserParams): SubParserResult { ] */ - let { decoration, token, depth, options } = params; - let comprehensionDepth = depth + 1; + let { decoration, token, decorationDepth, options } = params; + let comprehensionDecorationDepth = decorationDepth + 1; if ( token.type === "OPEN BLOCK" || token.type === "CLOSE BLOCK" || token.type === "NEUTRAL" ) { - options[comprehensionDepth % deepDecorations.length].push(decoration); + options[comprehensionDecorationDepth % deepDecorations.length].push( + decoration + ); } - return { depth, options }; + return { decorationDepth, options }; } From 26ff7fb3d94601abd362f65e72a7427bc1aae20e Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Sat, 14 Dec 2019 23:03:08 -0300 Subject: [PATCH 12/13] fix: correctly end comprehension when emptying stack --- src/extension.ts | 2 +- src/languages.ts | 38 +++++++++++++++++++++++--------------- src/parser.ts | 9 +++++++-- src/tokenizer.ts | 6 ++++-- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/extension.ts b/src/extension.ts index 9a103b2..2e69cf1 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -13,7 +13,7 @@ export function activate(context: vscode.ExtensionContext) { let timeout: NodeJS.Timer | null = null; Object.keys(languages).forEach(language => { - regExps[language] = loadRegexes(language); + regExps[language] = loadRegexes(languages[language]); }); let activeEditor = vscode.window.activeTextEditor; diff --git a/src/languages.ts b/src/languages.ts index 18c969a..c45da03 100644 --- a/src/languages.ts +++ b/src/languages.ts @@ -1,19 +1,21 @@ +export interface LangParams { + caseSensitive: boolean; + ignoreBlocks?: Array<{ + open: string; + close?: string; + singleline?: boolean; + }>; + listComprehensions?: Array<{ + open: string; + close: string; + }>; + openTokens: Array; + closeTokens: Array; + neutralTokens: Array; +} + export const languages: { - [index: string]: { - caseSensitive: boolean; - ignoreBlocks?: Array<{ - open: string; - close?: string; - singleline?: boolean; - }>; - listComprehensions?: Array<{ - open: string; - close: string; - }>; - openTokens: Array; - closeTokens: Array; - neutralTokens: Array; - }; + [index: string]: LangParams; } = { ruby: { caseSensitive: true, @@ -63,6 +65,12 @@ export const languages: { }, elixir: { caseSensitive: true, + listComprehensions: [ + { + open: "\\[", + close: "\\]" + } + ], ignoreBlocks: [ { open: "#", diff --git a/src/parser.ts b/src/parser.ts index 7f2492e..602ac9a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -42,7 +42,10 @@ export function parse({ activeEditor, options, tokens }: ParseParams) { let mode = DEFAULT; let comprehensionDepthStack = []; + console.log(tokens); + for (let token of tokens) { + console.log(token); let { pos, length, type } = token; /* Switch parsing modes if any of the mode delimiters has been reached */ if (type === "OPEN IGNORE") { @@ -53,14 +56,16 @@ export function parse({ activeEditor, options, tokens }: ParseParams) { comprehensionDepthStack.push(1); continue; } else if (type === "CLOSE IGNORE" || type === "CLOSE COMPREHENSION") { + comprehensionDepthStack.pop(); + if (comprehensionDepthStack.length > 0) { - comprehensionDepthStack.pop(); continue; } - comprehensionDepthStack = []; mode = DEFAULT; continue; } + console.log(mode); + console.log(comprehensionDepthStack); const startPos = activeEditor.document.positionAt(pos); const endPos = activeEditor.document.positionAt(pos + length); diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 7679b63..7f361eb 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,4 +1,6 @@ "use strict"; + +import { LangParams } from "./languages"; export interface Token { pos: number; length: number; @@ -16,14 +18,14 @@ export interface TokenizeParams { closeListComprehensionRegExp: RegExp | null; } -export function loadRegexes(language: string) { +export function loadRegexes(langParams: LangParams) { const { ignoreBlocks, openTokens, closeTokens, neutralTokens, listComprehensions - } = languages[language]; + } = langParams; let ignoreTokens = null; let singleLineIgnoreTokens = null; From 28429678aa96e11a8bc084e7165791960ea9a7f3 Mon Sep 17 00:00:00 2001 From: Paulo Valente Date: Sat, 14 Dec 2019 23:03:35 -0300 Subject: [PATCH 13/13] chore: remove stray console.logs --- src/parser.ts | 6 ------ src/tokenizer.ts | 2 -- 2 files changed, 8 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 602ac9a..ff2db5f 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -42,10 +42,7 @@ export function parse({ activeEditor, options, tokens }: ParseParams) { let mode = DEFAULT; let comprehensionDepthStack = []; - console.log(tokens); - for (let token of tokens) { - console.log(token); let { pos, length, type } = token; /* Switch parsing modes if any of the mode delimiters has been reached */ if (type === "OPEN IGNORE") { @@ -64,9 +61,6 @@ export function parse({ activeEditor, options, tokens }: ParseParams) { mode = DEFAULT; continue; } - console.log(mode); - console.log(comprehensionDepthStack); - const startPos = activeEditor.document.positionAt(pos); const endPos = activeEditor.document.positionAt(pos + length); const decoration: vscode.DecorationOptions = { diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 7f361eb..580c046 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -43,7 +43,6 @@ export function loadRegexes(langParams: LangParams) { .map(({ open }) => `${open}`) .join("|"); singleLineIgnoreRegExp = RegExp(`(${singleLineIgnoreTokens}).*`, "g"); - console.log(singleLineIgnoreRegExp); } /* @@ -200,6 +199,5 @@ export function tokenize( return 0; }); - console.log(tokens); return tokens; }