From 9b8ffae1c456d6d9c4bbad9425aee18153c25113 Mon Sep 17 00:00:00 2001 From: mewmew Date: Thu, 16 Feb 2017 02:35:25 +0100 Subject: [PATCH] cmd/genlex: Start first valid token ID at 1. Let zero represent the ID NONE. --- cmd/genlex/lexer.go.tmpl | 6 +-- cmd/genlex/main.go | 31 +++++++------- cmd/genlex/token.go.tmpl | 8 ++++ examples/uc/Makefile | 4 +- examples/uc/lexer/lexer.go | 6 +-- examples/uc/token/token.go | 82 +++++++++++++++++++++----------------- 6 files changed, 78 insertions(+), 59 deletions(-) diff --git a/cmd/genlex/lexer.go.tmpl b/cmd/genlex/lexer.go.tmpl index 8a814f5..3ca6167 100644 --- a/cmd/genlex/lexer.go.tmpl +++ b/cmd/genlex/lexer.go.tmpl @@ -92,7 +92,7 @@ func (l *Lexer) Scan() (*token.Token, error) { // input. func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) { n = -1 - for i := 0; i < len(token.IDs); i++ { + for i := 0; i < token.NTokens; i++ { start := loc[2*i] if start == -1 { continue @@ -105,7 +105,7 @@ func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) { return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end]) } n = end - id = token.ID(i) + id = token.ID(i+1) } if n == -1 { // no matching token located. @@ -125,7 +125,7 @@ func tokenLocs(input []byte) ([]int, error) { // Validate submatch indices length; expecting two indices - start and end - // per submatch, and in total 2 + (number of tokens) submatches. got := len(loc) - want := 2 * (2 + len(token.IDs)) + want := 2 * (2 + token.NTokens) if got != want { return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got) } diff --git a/cmd/genlex/main.go b/cmd/genlex/main.go index 40eefb0..a99e498 100644 --- a/cmd/genlex/main.go +++ b/cmd/genlex/main.go @@ -134,44 +134,44 @@ func parseJSON(jsonPath string) (tokenData map[string]interface{}, regs []string } var ids []string tokenData = make(map[string]interface{}) - minName := -1 - maxName := -1 - minToken := -1 - maxToken := -1 - minSkip := -1 - maxSkip := -1 + minName := 0 + maxName := 0 + minToken := 0 + maxToken := 0 + minSkip := 0 + maxSkip := 0 if len(terms.Names) > 0 { - minName = len(ids) + minName = len(ids) + 1 } for _, term := range terms.Names { - id := fmt.Sprintf("name(%d, `%s`)", len(ids), term.ID) + id := fmt.Sprintf("name(%d, `%s`)", len(ids)+1, term.ID) ids = append(ids, id) regs = append(regs, term.Reg) } if len(terms.Names) > 0 { - maxName = len(ids) - 1 + maxName = len(ids) } if len(terms.Tokens) > 0 { - minToken = len(ids) + minToken = len(ids) + 1 } for _, term := range terms.Tokens { - id := fmt.Sprintf("token(%d, `%s`)", len(ids), term.ID) + id := fmt.Sprintf("token(%d, `%s`)", len(ids)+1, term.ID) ids = append(ids, id) regs = append(regs, term.Reg) } if len(terms.Tokens) > 0 { - maxToken = len(ids) - 1 + maxToken = len(ids) } if len(terms.Skip) > 0 { - minSkip = len(ids) + minSkip = len(ids) + 1 } for _, term := range terms.Skip { - id := fmt.Sprintf("skip(%d, `%s`)", len(ids), term.ID) + id := fmt.Sprintf("skip(%d, `%s`)", len(ids)+1, term.ID) ids = append(ids, id) regs = append(regs, term.Reg) } if len(terms.Skip) > 0 { - maxSkip = len(ids) - 1 + maxSkip = len(ids) } tokenData["MinName"] = minName tokenData["MaxName"] = maxName @@ -179,6 +179,7 @@ func parseJSON(jsonPath string) (tokenData map[string]interface{}, regs []string tokenData["MaxToken"] = maxToken tokenData["MinSkip"] = minSkip tokenData["MaxSkip"] = maxSkip + tokenData["NumTokens"] = len(ids) tokenData["IDs"] = ids return tokenData, regs, nil } diff --git a/cmd/genlex/token.go.tmpl b/cmd/genlex/token.go.tmpl index 61edbe0..6552884 100644 --- a/cmd/genlex/token.go.tmpl +++ b/cmd/genlex/token.go.tmpl @@ -24,6 +24,9 @@ func (tok *Token) String() string { // ID is the set of lexical tokens of the source language. type ID int +// None represents a non-existent token ID. +const None ID = 0 + // Minimum and maximum token ID for each category of tokens, as specified by the // language grammar. const ( @@ -61,8 +64,13 @@ func (id ID) IsSkip() bool { return minSkip <= id && id <= maxSkip } +// NTokens specifies the number of unique token IDs recognized by the language +// grammar. +const NTokens = {{ .NumTokens }} + // IDs specifies the string representation of each token ID. var IDs = [...]string{ + "NONE(0)", {{- range .IDs }} "{{ . }}", {{- end }} diff --git a/examples/uc/Makefile b/examples/uc/Makefile index cad02fa..f3cc825 100644 --- a/examples/uc/Makefile +++ b/examples/uc/Makefile @@ -9,7 +9,9 @@ lexer: uc.json genlex $< clean: - rm -rf token lexer + rm -f token/token.go lexer/lexer.go + -rmdir --ignore-fail-on-non-empty token + -rmdir --ignore-fail-on-non-empty lexer rm -f uc.json .PHONY: all clean diff --git a/examples/uc/lexer/lexer.go b/examples/uc/lexer/lexer.go index 43a7944..e252697 100644 --- a/examples/uc/lexer/lexer.go +++ b/examples/uc/lexer/lexer.go @@ -92,7 +92,7 @@ func (l *Lexer) Scan() (*token.Token, error) { // input. func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) { n = -1 - for i := 0; i < len(token.IDs); i++ { + for i := 0; i < token.NTokens; i++ { start := loc[2*i] if start == -1 { continue @@ -105,7 +105,7 @@ func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) { return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end]) } n = end - id = token.ID(i) + id = token.ID(i+1) } if n == -1 { // no matching token located. @@ -125,7 +125,7 @@ func tokenLocs(input []byte) ([]int, error) { // Validate submatch indices length; expecting two indices - start and end - // per submatch, and in total 2 + (number of tokens) submatches. got := len(loc) - want := 2 * (2 + len(token.IDs)) + want := 2 * (2 + token.NTokens) if got != want { return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got) } diff --git a/examples/uc/token/token.go b/examples/uc/token/token.go index de0cb10..a6bc17c 100644 --- a/examples/uc/token/token.go +++ b/examples/uc/token/token.go @@ -24,15 +24,18 @@ func (tok *Token) String() string { // ID is the set of lexical tokens of the source language. type ID int +// None represents a non-existent token ID. +const None ID = 0 + // Minimum and maximum token ID for each category of tokens, as specified by the // language grammar. const ( - minName ID = 0 - maxName ID = 2 - minToken ID = 3 - maxToken ID = 28 - minSkip ID = 29 - maxSkip ID = 30 + minName ID = 1 + maxName ID = 3 + minToken ID = 4 + maxToken ID = 29 + minSkip ID = 30 + maxSkip ID = 31 ) // String returns the string represenatation of the token ID. @@ -61,37 +64,42 @@ func (id ID) IsSkip() bool { return minSkip <= id && id <= maxSkip } +// NTokens specifies the number of unique token IDs recognized by the language +// grammar. +const NTokens = 31 + // IDs specifies the string representation of each token ID. var IDs = [...]string{ - "name(0, `char_lit`)", - "name(1, `ident`)", - "name(2, `int_lit`)", - "token(3, `!`)", - "token(4, `!=`)", - "token(5, `&&`)", - "token(6, `(`)", - "token(7, `)`)", - "token(8, `*`)", - "token(9, `+`)", - "token(10, `,`)", - "token(11, `-`)", - "token(12, `/`)", - "token(13, `;`)", - "token(14, `<`)", - "token(15, `<=`)", - "token(16, `=`)", - "token(17, `==`)", - "token(18, `>`)", - "token(19, `>=`)", - "token(20, `[`)", - "token(21, `]`)", - "token(22, `else`)", - "token(23, `if`)", - "token(24, `return`)", - "token(25, `typedef`)", - "token(26, `while`)", - "token(27, `{`)", - "token(28, `}`)", - "skip(29, `comment`)", - "skip(30, `whitespace`)", + "NONE(0)", + "name(1, `char_lit`)", + "name(2, `ident`)", + "name(3, `int_lit`)", + "token(4, `!`)", + "token(5, `!=`)", + "token(6, `&&`)", + "token(7, `(`)", + "token(8, `)`)", + "token(9, `*`)", + "token(10, `+`)", + "token(11, `,`)", + "token(12, `-`)", + "token(13, `/`)", + "token(14, `;`)", + "token(15, `<`)", + "token(16, `<=`)", + "token(17, `=`)", + "token(18, `==`)", + "token(19, `>`)", + "token(20, `>=`)", + "token(21, `[`)", + "token(22, `]`)", + "token(23, `else`)", + "token(24, `if`)", + "token(25, `return`)", + "token(26, `typedef`)", + "token(27, `while`)", + "token(28, `{`)", + "token(29, `}`)", + "skip(30, `comment`)", + "skip(31, `whitespace`)", }