-
-
Notifications
You must be signed in to change notification settings - Fork 102
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor(parser): reorganize for clarity and debuggability
- Loading branch information
Showing
27 changed files
with
703 additions
and
490 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
// Tests for @tracespace/parser | ||
|
||
import {describe, it, beforeEach, expect} from 'vitest' | ||
import {ROOT, COMMENT, DONE, GERBER, Parser, createParser} from '..' | ||
|
||
describe('@tracespace/parser', () => { | ||
let parser: Parser | ||
|
||
beforeEach(() => { | ||
parser = createParser() | ||
}) | ||
|
||
it('should raise if no AST yet', () => { | ||
expect(parser.result).to.throw('File type not recognized') | ||
}) | ||
|
||
it('should feed its input into the tree', () => { | ||
parser.feed('G04 hello world*') | ||
|
||
expect(parser.result()).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
], | ||
}) | ||
}) | ||
|
||
it('should be chainable', () => { | ||
const result = parser.feed('G04 hello world*').result() | ||
expect(result).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
], | ||
}) | ||
}) | ||
|
||
it('should handle multiple feedings', () => { | ||
parser.feed('G04 hello world*\n') | ||
parser.feed('M00*') | ||
|
||
expect(parser.result()).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
{ | ||
type: DONE, | ||
position: { | ||
start: {line: 2, column: 1, offset: 17}, | ||
end: {line: 2, column: 4, offset: 20}, | ||
}, | ||
}, | ||
], | ||
}) | ||
}) | ||
|
||
it('should handle multiple feedings with unexpected splits for streaming support', () => { | ||
parser.feed('G0') | ||
parser.feed('4 hello ') | ||
parser.feed('world*\nM') | ||
parser.feed('00') | ||
parser.feed('*\n') | ||
|
||
expect(parser.result()).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
{ | ||
type: DONE, | ||
position: { | ||
start: {line: 2, column: 1, offset: 17}, | ||
end: {line: 2, column: 4, offset: 20}, | ||
}, | ||
}, | ||
], | ||
}) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,92 +1,109 @@ | ||
// Tests for @tracespace/parser | ||
import {describe, it, beforeEach, afterEach, expect, vi} from 'vitest' | ||
import * as td from 'testdouble' | ||
|
||
import {describe, it, beforeEach, expect} from 'vitest' | ||
import {ROOT, COMMENT, DONE, GERBER, Parser, createParser} from '..' | ||
import {Token, Lexer, LexerState, createLexer} from '../lexer' | ||
import {matchSyntax} from '../syntax' | ||
import {GerberNode} from '../tree' | ||
import {createParser} from '..' | ||
|
||
describe('@tracespace/parser', () => { | ||
let parser: Parser | ||
vi.mock('../lexer', () => td.object<unknown>()) | ||
vi.mock('../syntax', () => td.object<unknown>()) | ||
|
||
beforeEach(() => { | ||
parser = createParser() | ||
describe('parser', () => { | ||
let lexer: Lexer | ||
|
||
beforeEach(async () => { | ||
lexer = td.object<Lexer>() | ||
td.when(createLexer()).thenReturn(lexer) | ||
}) | ||
|
||
it('should raise if no AST yet', () => { | ||
expect(parser.results).to.throw('File type not recognized') | ||
afterEach(() => { | ||
td.reset() | ||
}) | ||
|
||
it('should feed its input into the tree', () => { | ||
parser.feed('G04 hello world*') | ||
|
||
expect(parser.results()).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
], | ||
it('should tokenize the input and match the tokens', () => { | ||
const token1 = {type: 'WHITESPACE'} as Token | ||
const token2 = {type: 'NEWLINE'} as Token | ||
const lexerState1 = {offset: 1} as LexerState | ||
const lexerState2 = {offset: 2} as LexerState | ||
|
||
td.when(lexer.feed('abc123', null)).thenReturn([ | ||
[token1, lexerState1] as [Token, LexerState], | ||
[token2, lexerState2] as [Token, LexerState], | ||
]) | ||
|
||
td.when( | ||
matchSyntax( | ||
[ | ||
[token1, lexerState1], | ||
[token2, lexerState2], | ||
], | ||
null | ||
) | ||
).thenReturn({ | ||
filetype: 'gerber', | ||
nodes: [{type: 'comment'} as GerberNode], | ||
unmatched: '', | ||
}) | ||
}) | ||
|
||
it('should handle multiple feedings', () => { | ||
parser.feed('G04 hello world*\n') | ||
parser.feed('M00*') | ||
|
||
expect(parser.results()).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
{ | ||
type: DONE, | ||
position: { | ||
start: {line: 2, column: 1, offset: 17}, | ||
end: {line: 2, column: 4, offset: 20}, | ||
}, | ||
}, | ||
], | ||
const subject = createParser() | ||
const result = subject.feed('abc123').result() | ||
|
||
expect(result).to.eql({ | ||
type: 'root', | ||
filetype: 'gerber', | ||
children: [{type: 'comment'}], | ||
}) | ||
}) | ||
|
||
it('should handle multiple feedings with unexpected splits for streaming support', () => { | ||
parser.feed('G0') | ||
parser.feed('4 hello ') | ||
parser.feed('world*\nM') | ||
parser.feed('00') | ||
parser.feed('*\n') | ||
|
||
expect(parser.results()).to.eql({ | ||
type: ROOT, | ||
filetype: GERBER, | ||
children: [ | ||
{ | ||
type: COMMENT, | ||
position: { | ||
start: {line: 1, column: 1, offset: 0}, | ||
end: {line: 1, column: 16, offset: 15}, | ||
}, | ||
comment: 'hello world', | ||
}, | ||
{ | ||
type: DONE, | ||
position: { | ||
start: {line: 2, column: 1, offset: 17}, | ||
end: {line: 2, column: 4, offset: 20}, | ||
}, | ||
}, | ||
], | ||
it('should preserve state across feedings', () => { | ||
const token1 = {type: 'WHITESPACE'} as Token | ||
const token2 = {type: 'NEWLINE'} as Token | ||
const token3 = {type: 'CATCHALL'} as Token | ||
const lexerState1 = {offset: 1} as LexerState | ||
const lexerState2 = {offset: 2} as LexerState | ||
const lexerState3 = {offset: 3} as LexerState | ||
|
||
td.when(lexer.feed('abc123', null)).thenReturn([ | ||
[token1, lexerState1] as [Token, LexerState], | ||
]) | ||
|
||
td.when(lexer.feed('123def456', lexerState1)).thenReturn([ | ||
[token2, lexerState2] as [Token, LexerState], | ||
]) | ||
|
||
td.when(lexer.feed('456ghi789', lexerState1)).thenReturn([ | ||
[token3, lexerState3] as [Token, LexerState], | ||
]) | ||
|
||
td.when(matchSyntax([[token1, lexerState1]], null)).thenReturn({ | ||
filetype: 'gerber', | ||
nodes: [{type: 'comment'} as GerberNode], | ||
unmatched: '123', | ||
lexerState: lexerState1, | ||
}) | ||
|
||
td.when(matchSyntax([[token2, lexerState2]], 'gerber')).thenReturn({ | ||
filetype: null, | ||
nodes: [{type: 'unimplemented'} as GerberNode], | ||
unmatched: '456', | ||
lexerState: null, | ||
}) | ||
|
||
td.when(matchSyntax([[token3, lexerState3]], 'gerber')).thenReturn({ | ||
filetype: null, | ||
nodes: [{type: 'done'}], | ||
unmatched: '', | ||
lexerState: null, | ||
}) | ||
|
||
const subject = createParser() | ||
const result = subject.feed('abc123').feed('def456').feed('ghi789').result() | ||
|
||
expect(result).to.eql({ | ||
type: 'root', | ||
filetype: 'gerber', | ||
children: [{type: 'comment'}, {type: 'unimplemented'}, {type: 'done'}], | ||
}) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,73 @@ | ||
import {Lexer, LexerState, createLexer} from './lexer' | ||
import {matchSyntax} from './syntax' | ||
import {ROOT, GerberTree, GerberNode} from './tree' | ||
import {Filetype} from './types' | ||
|
||
export * from './constants' | ||
export * from './lexer' | ||
export * from './parser' | ||
export * from './tree' | ||
export * from './types' | ||
|
||
/** | ||
* Gerber and NC drill file parser. | ||
* | ||
* @category Parser | ||
*/ | ||
export interface Parser { | ||
/** Parser's {@linkcode Lexer} instance */ | ||
lexer: Lexer | ||
/** Feed the parser with all or part of the source file */ | ||
feed(chunk: string): this | ||
/** Get the resulting AST when you are done feeding the parser */ | ||
result(): GerberTree | ||
} | ||
|
||
/** | ||
* {@linkcode Parser} factory and the primary export of the library. | ||
* | ||
* @example | ||
* ```ts | ||
* import {createParser} from '@tracespace/parser' | ||
* | ||
* // create a parser to parse a single file | ||
* const parser = createParser() | ||
* | ||
* // feed the parser the source file contents | ||
* parser.feed('G04 gerber file contents*\nM02*\n') | ||
* | ||
* // get the resulting AST | ||
* const tree = parser.results() | ||
* ``` | ||
* | ||
* @category Parser | ||
*/ | ||
export function createParser(): Parser { | ||
const lexer = createLexer() | ||
const children: GerberNode[] = [] | ||
let filetype: Filetype | null = null | ||
let lexerState: LexerState | null = null | ||
let unmatched = '' | ||
|
||
const parser = {lexer, feed, result} | ||
return parser | ||
|
||
function feed(chunk: string): Parser { | ||
const tokens = lexer.feed(`${unmatched}${chunk}`, lexerState) | ||
const result = matchSyntax(tokens, filetype) | ||
|
||
filetype = filetype ?? result.filetype | ||
unmatched = result.unmatched | ||
lexerState = result.lexerState ?? lexerState | ||
children.push(...result.nodes) | ||
|
||
return parser | ||
} | ||
|
||
function result(): GerberTree { | ||
if (filetype === null) { | ||
throw new Error('File type not recognized') | ||
} | ||
|
||
return {type: ROOT, filetype, children} | ||
} | ||
} |
Oops, something went wrong.