Skip to content

Commit

Permalink
cmd/genlex: Start first valid token ID at 1. Let zero represent the I…
Browse files Browse the repository at this point in the history
…D NONE.
  • Loading branch information
mewmew committed Feb 16, 2017
1 parent 144c207 commit 9b8ffae
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 59 deletions.
6 changes: 3 additions & 3 deletions cmd/genlex/lexer.go.tmpl
Expand Up @@ -92,7 +92,7 @@ func (l *Lexer) Scan() (*token.Token, error) {
// input.
func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
n = -1
for i := 0; i < len(token.IDs); i++ {
for i := 0; i < token.NTokens; i++ {
start := loc[2*i]
if start == -1 {
continue
Expand All @@ -105,7 +105,7 @@ func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end])
}
n = end
id = token.ID(i)
id = token.ID(i+1)
}
if n == -1 {
// no matching token located.
Expand All @@ -125,7 +125,7 @@ func tokenLocs(input []byte) ([]int, error) {
// Validate submatch indices length; expecting two indices - start and end -
// per submatch, and in total 2 + (number of tokens) submatches.
got := len(loc)
want := 2 * (2 + len(token.IDs))
want := 2 * (2 + token.NTokens)
if got != want {
return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got)
}
Expand Down
31 changes: 16 additions & 15 deletions cmd/genlex/main.go
Expand Up @@ -134,51 +134,52 @@ func parseJSON(jsonPath string) (tokenData map[string]interface{}, regs []string
}
var ids []string
tokenData = make(map[string]interface{})
minName := -1
maxName := -1
minToken := -1
maxToken := -1
minSkip := -1
maxSkip := -1
minName := 0
maxName := 0
minToken := 0
maxToken := 0
minSkip := 0
maxSkip := 0
if len(terms.Names) > 0 {
minName = len(ids)
minName = len(ids) + 1
}
for _, term := range terms.Names {
id := fmt.Sprintf("name(%d, `%s`)", len(ids), term.ID)
id := fmt.Sprintf("name(%d, `%s`)", len(ids)+1, term.ID)
ids = append(ids, id)
regs = append(regs, term.Reg)
}
if len(terms.Names) > 0 {
maxName = len(ids) - 1
maxName = len(ids)
}
if len(terms.Tokens) > 0 {
minToken = len(ids)
minToken = len(ids) + 1
}
for _, term := range terms.Tokens {
id := fmt.Sprintf("token(%d, `%s`)", len(ids), term.ID)
id := fmt.Sprintf("token(%d, `%s`)", len(ids)+1, term.ID)
ids = append(ids, id)
regs = append(regs, term.Reg)
}
if len(terms.Tokens) > 0 {
maxToken = len(ids) - 1
maxToken = len(ids)
}
if len(terms.Skip) > 0 {
minSkip = len(ids)
minSkip = len(ids) + 1
}
for _, term := range terms.Skip {
id := fmt.Sprintf("skip(%d, `%s`)", len(ids), term.ID)
id := fmt.Sprintf("skip(%d, `%s`)", len(ids)+1, term.ID)
ids = append(ids, id)
regs = append(regs, term.Reg)
}
if len(terms.Skip) > 0 {
maxSkip = len(ids) - 1
maxSkip = len(ids)
}
tokenData["MinName"] = minName
tokenData["MaxName"] = maxName
tokenData["MinToken"] = minToken
tokenData["MaxToken"] = maxToken
tokenData["MinSkip"] = minSkip
tokenData["MaxSkip"] = maxSkip
tokenData["NumTokens"] = len(ids)
tokenData["IDs"] = ids
return tokenData, regs, nil
}
8 changes: 8 additions & 0 deletions cmd/genlex/token.go.tmpl
Expand Up @@ -24,6 +24,9 @@ func (tok *Token) String() string {
// ID is the set of lexical tokens of the source language.
type ID int

// None represents a non-existent token ID.
const None ID = 0

// Minimum and maximum token ID for each category of tokens, as specified by the
// language grammar.
const (
Expand Down Expand Up @@ -61,8 +64,13 @@ func (id ID) IsSkip() bool {
return minSkip <= id && id <= maxSkip
}

// NTokens specifies the number of unique token IDs recognized by the language
// grammar.
const NTokens = {{ .NumTokens }}

// IDs specifies the string representation of each token ID.
var IDs = [...]string{
"NONE(0)",
{{- range .IDs }}
"{{ . }}",
{{- end }}
Expand Down
4 changes: 3 additions & 1 deletion examples/uc/Makefile
Expand Up @@ -9,7 +9,9 @@ lexer: uc.json
genlex $<

clean:
rm -rf token lexer
rm -f token/token.go lexer/lexer.go
-rmdir --ignore-fail-on-non-empty token
-rmdir --ignore-fail-on-non-empty lexer
rm -f uc.json

.PHONY: all clean
6 changes: 3 additions & 3 deletions examples/uc/lexer/lexer.go
Expand Up @@ -92,7 +92,7 @@ func (l *Lexer) Scan() (*token.Token, error) {
// input.
func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
n = -1
for i := 0; i < len(token.IDs); i++ {
for i := 0; i < token.NTokens; i++ {
start := loc[2*i]
if start == -1 {
continue
Expand All @@ -105,7 +105,7 @@ func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end])
}
n = end
id = token.ID(i)
id = token.ID(i+1)
}
if n == -1 {
// no matching token located.
Expand All @@ -125,7 +125,7 @@ func tokenLocs(input []byte) ([]int, error) {
// Validate submatch indices length; expecting two indices - start and end -
// per submatch, and in total 2 + (number of tokens) submatches.
got := len(loc)
want := 2 * (2 + len(token.IDs))
want := 2 * (2 + token.NTokens)
if got != want {
return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got)
}
Expand Down
82 changes: 45 additions & 37 deletions examples/uc/token/token.go
Expand Up @@ -24,15 +24,18 @@ func (tok *Token) String() string {
// ID is the set of lexical tokens of the source language.
type ID int

// None represents a non-existent token ID.
const None ID = 0

// Minimum and maximum token ID for each category of tokens, as specified by the
// language grammar.
const (
minName ID = 0
maxName ID = 2
minToken ID = 3
maxToken ID = 28
minSkip ID = 29
maxSkip ID = 30
minName ID = 1
maxName ID = 3
minToken ID = 4
maxToken ID = 29
minSkip ID = 30
maxSkip ID = 31
)

// String returns the string represenatation of the token ID.
Expand Down Expand Up @@ -61,37 +64,42 @@ func (id ID) IsSkip() bool {
return minSkip <= id && id <= maxSkip
}

// NTokens specifies the number of unique token IDs recognized by the language
// grammar.
const NTokens = 31

// IDs specifies the string representation of each token ID.
var IDs = [...]string{
"name(0, `char_lit`)",
"name(1, `ident`)",
"name(2, `int_lit`)",
"token(3, `!`)",
"token(4, `!=`)",
"token(5, `&&`)",
"token(6, `(`)",
"token(7, `)`)",
"token(8, `*`)",
"token(9, `+`)",
"token(10, `,`)",
"token(11, `-`)",
"token(12, `/`)",
"token(13, `;`)",
"token(14, `<`)",
"token(15, `<=`)",
"token(16, `=`)",
"token(17, `==`)",
"token(18, `>`)",
"token(19, `>=`)",
"token(20, `[`)",
"token(21, `]`)",
"token(22, `else`)",
"token(23, `if`)",
"token(24, `return`)",
"token(25, `typedef`)",
"token(26, `while`)",
"token(27, `{`)",
"token(28, `}`)",
"skip(29, `comment`)",
"skip(30, `whitespace`)",
"NONE(0)",
"name(1, `char_lit`)",
"name(2, `ident`)",
"name(3, `int_lit`)",
"token(4, `!`)",
"token(5, `!=`)",
"token(6, `&&`)",
"token(7, `(`)",
"token(8, `)`)",
"token(9, `*`)",
"token(10, `+`)",
"token(11, `,`)",
"token(12, `-`)",
"token(13, `/`)",
"token(14, `;`)",
"token(15, `<`)",
"token(16, `<=`)",
"token(17, `=`)",
"token(18, `==`)",
"token(19, `>`)",
"token(20, `>=`)",
"token(21, `[`)",
"token(22, `]`)",
"token(23, `else`)",
"token(24, `if`)",
"token(25, `return`)",
"token(26, `typedef`)",
"token(27, `while`)",
"token(28, `{`)",
"token(29, `}`)",
"skip(30, `comment`)",
"skip(31, `whitespace`)",
}

0 comments on commit 9b8ffae

Please sign in to comment.