refactor(parser): reorganize for clarity and debuggability

tracespace · May 8, 2022 · c78492d · c78492d
1 parent 6650db5
commit c78492d
Show file tree

Hide file tree

Showing 27 changed files with 703 additions and 490 deletions.
diff --git a/package.json b/package.json
@@ -96,7 +96,8 @@
           "**/*.d.ts"
         ],
         "rules": {
-          "@typescript-eslint/triple-slash-reference": "off"
+          "@typescript-eslint/triple-slash-reference": "off",
+          "import/no-unassigned-import": "off"
         }
       },
       {

diff --git a/packages/parser/README.md b/packages/parser/README.md
@@ -32,7 +32,7 @@ If you're not using a bundler and you want to try out the parser in a browser, y
 ```html
 <script src="https://unpkg.com/@tracespace/parser"></script>
 <script>
-  // global variable TracespaceParser now available
-  const parser = TracespaceParser.createParser()
+  // namespace TracespaceParser now available
+  const {createParser} = TracespaceParser
 </script>
 ```
diff --git a/packages/parser/src/__tests__/parser.e2e.test.ts b/packages/parser/src/__tests__/parser.e2e.test.ts
@@ -0,0 +1,110 @@
+// Tests for @tracespace/parser
+
+import {describe, it, beforeEach, expect} from 'vitest'
+import {ROOT, COMMENT, DONE, GERBER, Parser, createParser} from '..'
+
+describe('@tracespace/parser', () => {
+  let parser: Parser
+
+  beforeEach(() => {
+    parser = createParser()
+  })
+
+  it('should raise if no AST yet', () => {
+    expect(parser.result).to.throw('File type not recognized')
+  })
+
+  it('should feed its input into the tree', () => {
+    parser.feed('G04 hello world*')
+
+    expect(parser.result()).to.eql({
+      type: ROOT,
+      filetype: GERBER,
+      children: [
+        {
+          type: COMMENT,
+          position: {
+            start: {line: 1, column: 1, offset: 0},
+            end: {line: 1, column: 16, offset: 15},
+          },
+          comment: 'hello world',
+        },
+      ],
+    })
+  })
+
+  it('should be chainable', () => {
+    const result = parser.feed('G04 hello world*').result()
+    expect(result).to.eql({
+      type: ROOT,
+      filetype: GERBER,
+      children: [
+        {
+          type: COMMENT,
+          position: {
+            start: {line: 1, column: 1, offset: 0},
+            end: {line: 1, column: 16, offset: 15},
+          },
+          comment: 'hello world',
+        },
+      ],
+    })
+  })
+
+  it('should handle multiple feedings', () => {
+    parser.feed('G04 hello world*\n')
+    parser.feed('M00*')
+
+    expect(parser.result()).to.eql({
+      type: ROOT,
+      filetype: GERBER,
+      children: [
+        {
+          type: COMMENT,
+          position: {
+            start: {line: 1, column: 1, offset: 0},
+            end: {line: 1, column: 16, offset: 15},
+          },
+          comment: 'hello world',
+        },
+        {
+          type: DONE,
+          position: {
+            start: {line: 2, column: 1, offset: 17},
+            end: {line: 2, column: 4, offset: 20},
+          },
+        },
+      ],
+    })
+  })
+
+  it('should handle multiple feedings with unexpected splits for streaming support', () => {
+    parser.feed('G0')
+    parser.feed('4 hello ')
+    parser.feed('world*\nM')
+    parser.feed('00')
+    parser.feed('*\n')
+
+    expect(parser.result()).to.eql({
+      type: ROOT,
+      filetype: GERBER,
+      children: [
+        {
+          type: COMMENT,
+          position: {
+            start: {line: 1, column: 1, offset: 0},
+            end: {line: 1, column: 16, offset: 15},
+          },
+          comment: 'hello world',
+        },
+        {
+          type: DONE,
+          position: {
+            start: {line: 2, column: 1, offset: 17},
+            end: {line: 2, column: 4, offset: 20},
+          },
+        },
+      ],
+    })
+  })
+})
diff --git a/packages/parser/src/__tests__/parser.test.ts b/packages/parser/src/__tests__/parser.test.ts
@@ -1,92 +1,109 @@
-// Tests for @tracespace/parser
+import {describe, it, beforeEach, afterEach, expect, vi} from 'vitest'
+import * as td from 'testdouble'
 
-import {describe, it, beforeEach, expect} from 'vitest'
-import {ROOT, COMMENT, DONE, GERBER, Parser, createParser} from '..'
+import {Token, Lexer, LexerState, createLexer} from '../lexer'
+import {matchSyntax} from '../syntax'
+import {GerberNode} from '../tree'
+import {createParser} from '..'
 
-describe('@tracespace/parser', () => {
-  let parser: Parser
+vi.mock('../lexer', () => td.object<unknown>())
+vi.mock('../syntax', () => td.object<unknown>())
 
-  beforeEach(() => {
-    parser = createParser()
+describe('parser', () => {
+  let lexer: Lexer
+
+  beforeEach(async () => {
+    lexer = td.object<Lexer>()
+    td.when(createLexer()).thenReturn(lexer)
   })
 
-  it('should raise if no AST yet', () => {
-    expect(parser.results).to.throw('File type not recognized')
+  afterEach(() => {
+    td.reset()
   })
 
-  it('should feed its input into the tree', () => {
-    parser.feed('G04 hello world*')
-
-    expect(parser.results()).to.eql({
-      type: ROOT,
-      filetype: GERBER,
-      children: [
-        {
-          type: COMMENT,
-          position: {
-            start: {line: 1, column: 1, offset: 0},
-            end: {line: 1, column: 16, offset: 15},
-          },
-          comment: 'hello world',
-        },
-      ],
+  it('should tokenize the input and match the tokens', () => {
+    const token1 = {type: 'WHITESPACE'} as Token
+    const token2 = {type: 'NEWLINE'} as Token
+    const lexerState1 = {offset: 1} as LexerState
+    const lexerState2 = {offset: 2} as LexerState
+
+    td.when(lexer.feed('abc123', null)).thenReturn([
+      [token1, lexerState1] as [Token, LexerState],
+      [token2, lexerState2] as [Token, LexerState],
+    ])
+
+    td.when(
+      matchSyntax(
+        [
+          [token1, lexerState1],
+          [token2, lexerState2],
+        ],
+        null
+      )
+    ).thenReturn({
+      filetype: 'gerber',
+      nodes: [{type: 'comment'} as GerberNode],
+      unmatched: '',
     })
-  })
 
-  it('should handle multiple feedings', () => {
-    parser.feed('G04 hello world*\n')
-    parser.feed('M00*')
-
-    expect(parser.results()).to.eql({
-      type: ROOT,
-      filetype: GERBER,
-      children: [
-        {
-          type: COMMENT,
-          position: {
-            start: {line: 1, column: 1, offset: 0},
-            end: {line: 1, column: 16, offset: 15},
-          },
-          comment: 'hello world',
-        },
-        {
-          type: DONE,
-          position: {
-            start: {line: 2, column: 1, offset: 17},
-            end: {line: 2, column: 4, offset: 20},
-          },
-        },
-      ],
+    const subject = createParser()
+    const result = subject.feed('abc123').result()
+
+    expect(result).to.eql({
+      type: 'root',
+      filetype: 'gerber',
+      children: [{type: 'comment'}],
     })
   })
 
-  it('should handle multiple feedings with unexpected splits for streaming support', () => {
-    parser.feed('G0')
-    parser.feed('4 hello ')
-    parser.feed('world*\nM')
-    parser.feed('00')
-    parser.feed('*\n')
-
-    expect(parser.results()).to.eql({
-      type: ROOT,
-      filetype: GERBER,
-      children: [
-        {
-          type: COMMENT,
-          position: {
-            start: {line: 1, column: 1, offset: 0},
-            end: {line: 1, column: 16, offset: 15},
-          },
-          comment: 'hello world',
-        },
-        {
-          type: DONE,
-          position: {
-            start: {line: 2, column: 1, offset: 17},
-            end: {line: 2, column: 4, offset: 20},
-          },
-        },
-      ],
+  it('should preserve state across feedings', () => {
+    const token1 = {type: 'WHITESPACE'} as Token
+    const token2 = {type: 'NEWLINE'} as Token
+    const token3 = {type: 'CATCHALL'} as Token
+    const lexerState1 = {offset: 1} as LexerState
+    const lexerState2 = {offset: 2} as LexerState
+    const lexerState3 = {offset: 3} as LexerState
+
+    td.when(lexer.feed('abc123', null)).thenReturn([
+      [token1, lexerState1] as [Token, LexerState],
+    ])
+
+    td.when(lexer.feed('123def456', lexerState1)).thenReturn([
+      [token2, lexerState2] as [Token, LexerState],
+    ])
+
+    td.when(lexer.feed('456ghi789', lexerState1)).thenReturn([
+      [token3, lexerState3] as [Token, LexerState],
+    ])
+
+    td.when(matchSyntax([[token1, lexerState1]], null)).thenReturn({
+      filetype: 'gerber',
+      nodes: [{type: 'comment'} as GerberNode],
+      unmatched: '123',
+      lexerState: lexerState1,
+    })
+
+    td.when(matchSyntax([[token2, lexerState2]], 'gerber')).thenReturn({
+      filetype: null,
+      nodes: [{type: 'unimplemented'} as GerberNode],
+      unmatched: '456',
+      lexerState: null,
+    })
+
+    td.when(matchSyntax([[token3, lexerState3]], 'gerber')).thenReturn({
+      filetype: null,
+      nodes: [{type: 'done'}],
+      unmatched: '',
+      lexerState: null,
+    })
+
+    const subject = createParser()
+    const result = subject.feed('abc123').feed('def456').feed('ghi789').result()
+
+    expect(result).to.eql({
+      type: 'root',
+      filetype: 'gerber',
+      children: [{type: 'comment'}, {type: 'unimplemented'}, {type: 'done'}],
     })
   })
 })
diff --git a/packages/parser/src/index.ts b/packages/parser/src/index.ts
@@ -1,5 +1,73 @@
+import {Lexer, LexerState, createLexer} from './lexer'
+import {matchSyntax} from './syntax'
+import {ROOT, GerberTree, GerberNode} from './tree'
+import {Filetype} from './types'
+
 export * from './constants'
 export * from './lexer'
-export * from './parser'
 export * from './tree'
 export * from './types'
+
+/**
+ * Gerber and NC drill file parser.
+ *
+ * @category Parser
+ */
+export interface Parser {
+  /** Parser's {@linkcode Lexer} instance */
+  lexer: Lexer
+  /** Feed the parser with all or part of the source file */
+  feed(chunk: string): this
+  /** Get the resulting AST when you are done feeding the parser */
+  result(): GerberTree
+}
+
+/**
+ * {@linkcode Parser} factory and the primary export of the library.
+ *
+ * @example
+ * ```ts
+ * import {createParser} from '@tracespace/parser'
+ *
+ * // create a parser to parse a single file
+ * const parser = createParser()
+ *
+ * // feed the parser the source file contents
+ * parser.feed('G04 gerber file contents*\nM02*\n')
+ *
+ * // get the resulting AST
+ * const tree = parser.results()
+ * ```
+ *
+ * @category Parser
+ */
+export function createParser(): Parser {
+  const lexer = createLexer()
+  const children: GerberNode[] = []
+  let filetype: Filetype | null = null
+  let lexerState: LexerState | null = null
+  let unmatched = ''
+
+  const parser = {lexer, feed, result}
+  return parser
+
+  function feed(chunk: string): Parser {
+    const tokens = lexer.feed(`${unmatched}${chunk}`, lexerState)
+    const result = matchSyntax(tokens, filetype)
+
+    filetype = filetype ?? result.filetype
+    unmatched = result.unmatched
+    lexerState = result.lexerState ?? lexerState
+    children.push(...result.nodes)
+
+    return parser
+  }
+
+  function result(): GerberTree {
+    if (filetype === null) {
+      throw new Error('File type not recognized')
+    }
+
+    return {type: ROOT, filetype, children}
+  }
+}