Merge 810afb1 into 1fec29f

streetsidesoftware · Apr 19, 2022 · 48795e5 · 48795e5
2 parents 1fec29f + 810afb1
commit 48795e5
Show file tree

Hide file tree

Showing 24 changed files with 1,153 additions and 746 deletions.
diff --git a/cspell.code-workspace b/cspell.code-workspace
@@ -25,6 +25,9 @@
         {
             "path": "packages/cspell-glob"
         },
+        {
+            "path": "packages/cspell-grammar"
+        },
         {
             "path": "packages/cspell-json-reporter"
         },

diff --git a/packages/cspell-grammar/README.md b/packages/cspell-grammar/README.md
@@ -1,10 +1,10 @@
 # `cspell-grammar`
 
-CSpell Grammar is used define the parts of a document that is spell checked.
+CSpell Grammar is used to generate a parser. The Parser is used to add context / scope to parts of a document, making it easier to define the parts to spell spell checked.
 
-In addition to being the cause of performance issues by end users, the existing `ignoreRegExpList` and `includeRegExpList` is too limited.
+This is to address the issues and limitations related to `ignoreRegExpList` and `includeRegExpList`.
 
-The grammar is use to add `scope` to sections of a document. The `scope` can then be used to apply spell checking rules.
+The parser is use to add `scope` to sections of a document. The `scope` can then be used to apply spell checking rules.
 
 Example: Only check comments and strings
 

diff --git a/packages/cspell-grammar/bin.js b/packages/cspell-grammar/bin.js
@@ -1,3 +1,4 @@
+#!/usr/bin/env node
 'use strict';
 
 const app = require('./dist/app.js');

diff --git a/packages/cspell-grammar/package-lock.json b/packages/cspell-grammar/package-lock.json
diff --git a/packages/cspell-grammar/package.json b/packages/cspell-grammar/package.json
@@ -10,6 +10,9 @@
   "author": "Jason Dent <jason@streetsidesoftware.nl>",
   "homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/cspell-gitignore#readme",
   "license": "MIT",
+  "bin": {
+    "cspell-grammar": "bin.js"
+  },
   "main": "dist/index.js",
   "directories": {
     "dist": "dist"
@@ -46,5 +49,8 @@
     "@types/node": "^17.0.25",
     "jest": "^27.5.1",
     "rimraf": "^3.0.2"
+  },
+  "dependencies": {
+    "@cspell/cspell-pipe": "^5.19.7"
   }
 }
diff --git a/packages/cspell-grammar/src/app.ts b/packages/cspell-grammar/src/app.ts
@@ -1,3 +1,42 @@
+import { TypeScript } from './grammars';
+import { NGrammar } from './parser/grammarNormalized';
+import { normalizeGrammar } from './parser/grammarNormalizer';
+import * as path from 'path';
+import { promises as fs } from 'fs';
+import { parseDocument } from './parser/parser';
+
+const grammars: Record<string, NGrammar | undefined> = {
+    '.ts': normalizeGrammar(TypeScript.grammar),
+};
+
+/**
+ * Run simple parser
+ * @param args -- command line arguments
+ * @returns Promise
+ */
 export async function run(args: string[]): Promise<void> {
     console.log('args: %o', args);
+    // early out if there are not enough arguments
+    if (args.length < 3) {
+        console.log('usage...');
+        return;
+    }
+
+    const filename = args.slice(2).filter((p) => !p.startsWith('-'))[0];
+    if (!filename) {
+        console.log('filename missing');
+        return;
+    }
+
+    const ext = path.extname(filename);
+    const g = grammars[ext];
+    if (!g) {
+        console.log(`No grammar for ${path.basename(filename)}`);
+        return;
+    }
+
+    console.log(`File: ${path.basename(filename)} Grammar: ${g.name || g.scopeName}`);
+    const content = await fs.readFile(filename, 'utf-8');
+
+    parseDocument(g, filename, content);
 }
diff --git a/packages/cspell-grammar/src/grammars/typescript.ts b/packages/cspell-grammar/src/grammars/typescript.ts
@@ -3,10 +3,33 @@ import { Grammar, Repository } from '..';
 const repository: Repository = {
     statements: {
         name: 'code.ts',
-        patterns: ['#string', '#comment', '#braces'],
+        patterns: [
+            '#keyword',
+            '#string',
+            '#comment',
+            '#braces',
+            '#punctuation',
+            '#space',
+            { name: 'identifier', match: /[^\s;,!|&:^%{}[\]()*/+=<>]+/ },
+        ],
+    },
+    keyword: {
+        patterns: ['#keywordBase', '#standardTypes', '#standardLib'],
+    },
+    keywordBase: {
+        name: 'keyword.typescript.ts',
+        match: /\b(?:any|as|async|await|bigint|boolean|break|case|catch|const|continue|do|else|enum|export|extends|false|finally|for|from|function|get|if|implements|in|instanceof|interface|import|let|map|module|new|new|null|number|of|package|private|public|require|return|set|static|string|super|switch|this|throw|true|try|type|typeof|unknown|undefined|var|void|while|yield)\b/,
+    },
+    standardTypes: {
+        name: 'keyword.type.ts',
+        match: /\b(?:Promise|Record|Omit|Extract|Exclude|BigInt|Array)\b/,
+    },
+    standardLib: {
+        name: 'keyword.lib.ts',
+        match: /\b(?:console|process|window)\b/,
     },
     string: {
-        patterns: [{ include: '#string_q_single' }, { include: '#string_q_double' }, { include: '#string_template' }],
+        patterns: ['#string_q_single', '#string_q_double', '#string_template'],
     },
     string_q_single: {
         name: 'string.quoted.single.ts',
@@ -50,24 +73,35 @@ const repository: Repository = {
                 end: ')',
                 captures: 'punctuation.meta.brace.ts',
                 patterns: ['#statements'],
-                contentName: 'meta.brace.ts',
+                name: 'meta.brace.ts',
+                contentName: 'code.ts',
             },
             {
                 begin: '{',
                 end: '}',
                 captures: 'punctuation.meta.brace.ts',
                 patterns: ['#statements'],
-                contentName: 'meta.brace.ts',
+                name: 'meta.brace.ts',
+                contentName: 'code.ts',
             },
             {
                 begin: '[',
                 end: ']',
                 captures: 'punctuation.meta.brace.ts',
                 patterns: ['#statements'],
-                contentName: 'meta.brace.ts',
+                name: 'meta.brace.ts',
+                contentName: 'code.ts',
             },
         ],
     },
+    punctuation: {
+        name: 'punctuation.ts',
+        match: /[-;:,!|&^%*/+=<>\n\r]/,
+    },
+    space: {
+        name: 'punctuation.space.ts',
+        match: /\s+/,
+    },
     comment: {
         patterns: [
             {
@@ -80,7 +114,7 @@ const repository: Repository = {
             {
                 name: 'comment.block.documentation.ts',
                 comment: 'DocBlock',
-                begin: /\*\*(?!\/)/,
+                begin: /\/\*\*(?!\/)/,
                 captures: 'punctuation.definition.comment.ts',
                 end: '*/',
             },

diff --git a/packages/cspell-grammar/src/mappers/types.ts b/packages/cspell-grammar/src/mappers/types.ts
@@ -0,0 +1,7 @@
+export interface MappedText {
+    text: string;
+    /**
+     *
+     */
+    map: number[];
+}
diff --git a/packages/cspell-grammar/src/mappers/typescript.test.ts b/packages/cspell-grammar/src/mappers/typescript.test.ts
@@ -0,0 +1,67 @@
+import { mapRawString } from './typescript';
+
+describe('mappers typescript', () => {
+    // cspell:ignore Fingerspitzengef Fingerspitzengefühl ˈfɪŋɐˌʃpɪtsənɡəˌfyːl
+    const sample = {
+        text: 'Fingerspitzengefühl is a German term.\nIt’s pronounced as follows: [ˈfɪŋɐˌʃpɪtsənɡəˌfyːl]',
+        hex: '\\x46\\x69\\x6E\\x67\\x65\\x72\\x73\\x70\\x69\\x74\\x7A\\x65\\x6E\\x67\\x65\\x66\\xFC\\x68\\x6C\\x20\\x69\\x73\\x20\
+\\x61\\x20\\x47\\x65\\x72\\x6D\\x61\\x6E\\x20\\x74\\x65\\x72\\x6D\\x2E\n\\x49\\x74\\u2019\\x73\\x20\\x70\\x72\\x6F\\x6E\\x6F\\x75\\x6E\
+\\x63\\x65\\x64\\x20\\x61\\x73\\x20\\x66\\x6F\\x6C\\x6C\\x6F\\x77\\x73\\x3A\\x20\\x5B\\u02C8\\x66\\u026A\\u014B\\u0250\\u02CC\\u0283\
+\\x70\\u026A\\x74\\x73\\u0259\\x6E\\u0261\\u0259\\u02CC\\x66\\x79\\u02D0\\x6C\\x5D',
+        mixed: 'Fingerspitzengef\\xFChl is a German term.\nIt\\u2019s pronounced as follows: \
+[\\u02C8f\\u026A\\u014B\\u0250\\u02CC\\u0283p\\u026Ats\\u0259n\\u0261\\u0259\\u02CCfy\\u02D0l]',
+    };
+
+    const emojis = {
+        text: 'Emojis 😁🥳🙈🙉🙊',
+        unicode: 'Emojis \\uD83D\\uDE01\\uD83E\\uDD73\\uD83D\\uDE48\\uD83D\\uDE49\\uD83D\\uDE4A',
+        codePoint: 'Emojis \\u{1F601}\\u{1F973}\\u{1F648}\\u{1F649}\\u{1F64A}',
+    };
+
+    test.each`
+        text                    | expected
+        ${''}                   | ${''}
+        ${'hello'}              | ${'hello'}
+        ${'hello\\x20there'}    | ${'hello\x20there'}
+        ${'hello\\u0020there'}  | ${'hello\u0020there'}
+        ${'hello\\u{020}there'} | ${'hello\u{020}there'}
+        ${'a\\tb'}              | ${'a\tb'}
+        ${'a\\rb'}              | ${'a\rb'}
+        ${'a\\nb'}              | ${'a\nb'}
+        ${'a\\dd'}              | ${'add'}
+        ${'a\\x'}               | ${'ax'}
+        ${'a\\xy'}              | ${'axy'}
+        ${'a\\x9h'}             | ${'ax9h'}
+        ${'a\\u9h'}             | ${'au9h'}
+        ${'a\\u{9h}'}           | ${'au{9h}'}
+        ${sample.text}          | ${sample.text}
+        ${sample.hex}           | ${sample.text}
+        ${sample.mixed}         | ${sample.text}
+        ${emojis.text}          | ${emojis.text}
+        ${emojis.unicode}       | ${emojis.text}
+        ${emojis.codePoint}     | ${emojis.text}
+    `('mapRawString $# [$text]', ({ text, expected }) => {
+        const r = mapRawString(text);
+        expect(toCharCodes(r.text)).toBe(toCharCodes(expected));
+        expect(r.text).toBe(expected);
+    });
+
+    test.each`
+        text                    | expected
+        ${''}                   | ${[]}
+        ${'hello'}              | ${[]}
+        ${'hello\\x20there'}    | ${[5, 5, 9, 6, 14, 11]}
+        ${'hello\\u0020there'}  | ${[5, 5, 11, 6, 16, 11]}
+        ${'hello\\u{020}there'} | ${[5, 5, 12, 6, 17, 11]}
+    `('mapRawString map $# [$text]', ({ text, expected }) => {
+        const r = mapRawString(text);
+        expect(r.map).toEqual(expected);
+    });
+});
+
+function toCharCodes(s: string): string {
+    return s
+        .split('')
+        .map((a) => ('000' + a.charCodeAt(0).toString(16)).slice(-4))
+        .join(', ');
+}