-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.go.tmpl
135 lines (121 loc) · 3.37 KB
/
lexer.go.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// generated by speak; DO NOT EDIT.
// Package lexer implements lexical analysis of the source language.
package lexer
import (
"io"
"io/ioutil"
"regexp"
"{{ .ImportPath }}"
"github.com/pkg/errors"
)
// regstr specifies a regular expression for identifying the tokens of the input
// grammar.
const regstr = `{{ .Regexp }}`
// reg is a compiled version of regstr with leftmost-longest matching enabled.
var reg *regexp.Regexp
func init() {
// Compile regexp for identifying tokens and enforce leftmost-longest
// matching.
reg = regexp.MustCompile(regstr)
reg.Longest()
}
// A Lexer lexes the source input into a slice of tokens.
type Lexer struct {
// Source input.
input []byte
// Current position in the source input.
pos int
}
// New returns a new scanner lexing from r.
func New(r io.Reader) (*Lexer, error) {
input, err := ioutil.ReadAll(r)
if err != nil {
return nil, errors.WithStack(err)
}
return NewFromBytes(input), nil
}
// Open returns a new scanner lexing from path.
func Open(path string) (*Lexer, error) {
input, err := ioutil.ReadFile(path)
if err != nil {
return nil, errors.WithStack(err)
}
return NewFromBytes(input), nil
}
// NewFromString returns a new scanner lexing from input.
func NewFromString(input string) *Lexer {
return NewFromBytes([]byte(input))
}
// NewFromBytes returns a new scanner lexing from input.
func NewFromBytes(input []byte) *Lexer {
return &Lexer{input: input}
}
// Scan lexes and returns the next token of the source input.
func (l *Lexer) Scan() (*token.Token, error) {
// Handle EOF.
if l.pos >= len(l.input) {
return nil, errors.WithStack(io.EOF)
}
input := l.input[l.pos:]
// Identify token locations matching start of input.
loc, err := tokenLocs(input)
if err != nil {
return nil, errors.WithStack(err)
}
n, id, err := locateTokens(input, loc)
if err != nil {
return nil, errors.WithStack(err)
}
lit := input[:n]
tok := &token.Token{
Pos: l.pos,
ID: id,
Lit: lit,
}
l.pos += n
return tok, nil
}
// locateTokens searches for the longest token that match the start of the
// input.
func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
n = -1
for i := 0; i < token.NTokens; i++ {
start := loc[2*i]
if start == -1 {
continue
}
if start != 0 {
return 0, 0, errors.Errorf("invalid start index of token; expected 0, got %d", start)
}
end := loc[2*i+1]
if n != -1 {
return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end])
}
n = end
id = token.ID(i+1)
}
if n == -1 {
// no matching token located.
return 0, 0, errors.Errorf("unable to identify valid token at %q", input)
}
return n, id, nil
}
// tokenLocs returns start and end location of each token types that match the
// start of the input.
func tokenLocs(input []byte) ([]int, error) {
loc := reg.FindSubmatchIndex(input)
if loc == nil {
// no submatch located.
return nil, errors.Errorf("unable to identify valid token at %q", input)
}
// Validate submatch indices length; expecting two indices - start and end -
// per submatch, and in total 2 + (number of tokens) submatches.
got := len(loc)
want := 2 * (2 + token.NTokens)
if got != want {
return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got)
}
// Skip the first two submatches as they do not identify specific tokens.
loc = loc[2*2:]
return loc, nil
}