cmd/genlex: Start first valid token ID at 1. Let zero represent the I…

…D NONE.
mewspring · Feb 16, 2017 · 9b8ffae · 9b8ffae
1 parent 144c207
commit 9b8ffae
Show file tree

Hide file tree

Showing 6 changed files with 78 additions and 59 deletions.
diff --git a/cmd/genlex/lexer.go.tmpl b/cmd/genlex/lexer.go.tmpl
@@ -92,7 +92,7 @@ func (l *Lexer) Scan() (*token.Token, error) {
 // input.
 func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
 	n = -1
-	for i := 0; i < len(token.IDs); i++ {
+	for i := 0; i < token.NTokens; i++ {
 		start := loc[2*i]
 		if start == -1 {
 			continue
@@ -105,7 +105,7 @@ func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
 			return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end])
 		}
 		n = end
-		id = token.ID(i)
+		id = token.ID(i+1)
 	}
 	if n == -1 {
 		// no matching token located.
@@ -125,7 +125,7 @@ func tokenLocs(input []byte) ([]int, error) {
 	// Validate submatch indices length; expecting two indices - start and end -
 	// per submatch, and in total 2 + (number of tokens) submatches.
 	got := len(loc)
-	want := 2 * (2 + len(token.IDs))
+	want := 2 * (2 + token.NTokens)
 	if got != want {
 		return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got)
 	}

diff --git a/cmd/genlex/main.go b/cmd/genlex/main.go
@@ -134,51 +134,52 @@ func parseJSON(jsonPath string) (tokenData map[string]interface{}, regs []string
 	}
 	var ids []string
 	tokenData = make(map[string]interface{})
-	minName := -1
-	maxName := -1
-	minToken := -1
-	maxToken := -1
-	minSkip := -1
-	maxSkip := -1
+	minName := 0
+	maxName := 0
+	minToken := 0
+	maxToken := 0
+	minSkip := 0
+	maxSkip := 0
 	if len(terms.Names) > 0 {
-		minName = len(ids)
+		minName = len(ids) + 1
 	}
 	for _, term := range terms.Names {
-		id := fmt.Sprintf("name(%d, `%s`)", len(ids), term.ID)
+		id := fmt.Sprintf("name(%d, `%s`)", len(ids)+1, term.ID)
 		ids = append(ids, id)
 		regs = append(regs, term.Reg)
 	}
 	if len(terms.Names) > 0 {
-		maxName = len(ids) - 1
+		maxName = len(ids)
 	}
 	if len(terms.Tokens) > 0 {
-		minToken = len(ids)
+		minToken = len(ids) + 1
 	}
 	for _, term := range terms.Tokens {
-		id := fmt.Sprintf("token(%d, `%s`)", len(ids), term.ID)
+		id := fmt.Sprintf("token(%d, `%s`)", len(ids)+1, term.ID)
 		ids = append(ids, id)
 		regs = append(regs, term.Reg)
 	}
 	if len(terms.Tokens) > 0 {
-		maxToken = len(ids) - 1
+		maxToken = len(ids)
 	}
 	if len(terms.Skip) > 0 {
-		minSkip = len(ids)
+		minSkip = len(ids) + 1
 	}
 	for _, term := range terms.Skip {
-		id := fmt.Sprintf("skip(%d, `%s`)", len(ids), term.ID)
+		id := fmt.Sprintf("skip(%d, `%s`)", len(ids)+1, term.ID)
 		ids = append(ids, id)
 		regs = append(regs, term.Reg)
 	}
 	if len(terms.Skip) > 0 {
-		maxSkip = len(ids) - 1
+		maxSkip = len(ids)
 	}
 	tokenData["MinName"] = minName
 	tokenData["MaxName"] = maxName
 	tokenData["MinToken"] = minToken
 	tokenData["MaxToken"] = maxToken
 	tokenData["MinSkip"] = minSkip
 	tokenData["MaxSkip"] = maxSkip
+	tokenData["NumTokens"] = len(ids)
 	tokenData["IDs"] = ids
 	return tokenData, regs, nil
 }
diff --git a/cmd/genlex/token.go.tmpl b/cmd/genlex/token.go.tmpl
@@ -24,6 +24,9 @@ func (tok *Token) String() string {
 // ID is the set of lexical tokens of the source language.
 type ID int
 
+// None represents a non-existent token ID.
+const None ID = 0
+
 // Minimum and maximum token ID for each category of tokens, as specified by the
 // language grammar.
 const (
@@ -61,8 +64,13 @@ func (id ID) IsSkip() bool {
 	return minSkip <= id && id <= maxSkip
 }
 
+// NTokens specifies the number of unique token IDs recognized by the language
+// grammar.
+const NTokens = {{ .NumTokens }}
+
 // IDs specifies the string representation of each token ID.
 var IDs = [...]string{
+	"NONE(0)",
 {{- range .IDs }}
 	"{{ . }}",
 {{- end }}

diff --git a/examples/uc/Makefile b/examples/uc/Makefile
@@ -9,7 +9,9 @@ lexer: uc.json
 	genlex $<
 
 clean:
-	rm -rf token lexer
+	rm -f token/token.go lexer/lexer.go
+	-rmdir --ignore-fail-on-non-empty token
+	-rmdir --ignore-fail-on-non-empty lexer
 	rm -f uc.json
 
 .PHONY: all clean
diff --git a/examples/uc/lexer/lexer.go b/examples/uc/lexer/lexer.go
@@ -92,7 +92,7 @@ func (l *Lexer) Scan() (*token.Token, error) {
 // input.
 func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
 	n = -1
-	for i := 0; i < len(token.IDs); i++ {
+	for i := 0; i < token.NTokens; i++ {
 		start := loc[2*i]
 		if start == -1 {
 			continue
@@ -105,7 +105,7 @@ func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
 			return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end])
 		}
 		n = end
-		id = token.ID(i)
+		id = token.ID(i+1)
 	}
 	if n == -1 {
 		// no matching token located.
@@ -125,7 +125,7 @@ func tokenLocs(input []byte) ([]int, error) {
 	// Validate submatch indices length; expecting two indices - start and end -
 	// per submatch, and in total 2 + (number of tokens) submatches.
 	got := len(loc)
-	want := 2 * (2 + len(token.IDs))
+	want := 2 * (2 + token.NTokens)
 	if got != want {
 		return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got)
 	}

diff --git a/examples/uc/token/token.go b/examples/uc/token/token.go
@@ -24,15 +24,18 @@ func (tok *Token) String() string {
 // ID is the set of lexical tokens of the source language.
 type ID int
 
+// None represents a non-existent token ID.
+const None ID = 0
+
 // Minimum and maximum token ID for each category of tokens, as specified by the
 // language grammar.
 const (
-	minName  ID = 0
-	maxName  ID = 2
-	minToken ID = 3
-	maxToken ID = 28
-	minSkip  ID = 29
-	maxSkip  ID = 30
+	minName  ID = 1
+	maxName  ID = 3
+	minToken ID = 4
+	maxToken ID = 29
+	minSkip  ID = 30
+	maxSkip  ID = 31
 )
 
 // String returns the string represenatation of the token ID.
@@ -61,37 +64,42 @@ func (id ID) IsSkip() bool {
 	return minSkip <= id && id <= maxSkip
 }
 
+// NTokens specifies the number of unique token IDs recognized by the language
+// grammar.
+const NTokens = 31
+
 // IDs specifies the string representation of each token ID.
 var IDs = [...]string{
-	"name(0, `char_lit`)",
-	"name(1, `ident`)",
-	"name(2, `int_lit`)",
-	"token(3, `!`)",
-	"token(4, `!=`)",
-	"token(5, `&&`)",
-	"token(6, `(`)",
-	"token(7, `)`)",
-	"token(8, `*`)",
-	"token(9, `+`)",
-	"token(10, `,`)",
-	"token(11, `-`)",
-	"token(12, `/`)",
-	"token(13, `;`)",
-	"token(14, `<`)",
-	"token(15, `<=`)",
-	"token(16, `=`)",
-	"token(17, `==`)",
-	"token(18, `>`)",
-	"token(19, `>=`)",
-	"token(20, `[`)",
-	"token(21, `]`)",
-	"token(22, `else`)",
-	"token(23, `if`)",
-	"token(24, `return`)",
-	"token(25, `typedef`)",
-	"token(26, `while`)",
-	"token(27, `{`)",
-	"token(28, `}`)",
-	"skip(29, `comment`)",
-	"skip(30, `whitespace`)",
+	"NONE(0)",
+	"name(1, `char_lit`)",
+	"name(2, `ident`)",
+	"name(3, `int_lit`)",
+	"token(4, `!`)",
+	"token(5, `!=`)",
+	"token(6, `&&`)",
+	"token(7, `(`)",
+	"token(8, `)`)",
+	"token(9, `*`)",
+	"token(10, `+`)",
+	"token(11, `,`)",
+	"token(12, `-`)",
+	"token(13, `/`)",
+	"token(14, `;`)",
+	"token(15, `<`)",
+	"token(16, `<=`)",
+	"token(17, `=`)",
+	"token(18, `==`)",
+	"token(19, `>`)",
+	"token(20, `>=`)",
+	"token(21, `[`)",
+	"token(22, `]`)",
+	"token(23, `else`)",
+	"token(24, `if`)",
+	"token(25, `return`)",
+	"token(26, `typedef`)",
+	"token(27, `while`)",
+	"token(28, `{`)",
+	"token(29, `}`)",
+	"skip(30, `comment`)",
+	"skip(31, `whitespace`)",
 }