/
lexer.go
121 lines (104 loc) · 2.84 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
package main
import (
"fmt"
"io"
"os"
"regexp"
"strings"
"github.com/macrat/simplexer"
)
type Position struct {
simplexer.Position
Filename string
}
func (p Position) String() string {
return fmt.Sprintf("%s:%d:%d", p.Filename, p.Line+1, p.Column+1)
}
type Lexer struct {
lexer *simplexer.Lexer
result Expression
lastToken *simplexer.Token
lastPosition Position
Filename string
}
func NewLexer(reader io.Reader) *Lexer {
l := simplexer.NewLexer(reader)
l.Whitespace = simplexer.NewPatternTokenType(-1, []string{" ", "\t"})
l.TokenTypes = []simplexer.TokenType{
simplexer.NewRegexpTokenType(NEWLINE, `[\n\r]+`),
simplexer.NewRegexpTokenType(NUMBER, `[0-9]+`),
simplexer.NewRegexpTokenType(COMPARE_OPERATOR, `(?:[=!]=|>=?|<=?)`),
simplexer.NewPatternTokenType(DEFINE_OPERATOR, []string{":=", "="}),
simplexer.NewPatternTokenType(CALCULATE_DEFINE_OPERATOR, []string{"+=", "-=", "*=", "/="}),
simplexer.NewPatternTokenType(FUNCTION_SEP, []string{"){"}),
simplexer.NewPatternTokenType(IF, []string{"if"}),
simplexer.NewPatternTokenType(ELSE, []string{"else"}),
simplexer.NewPatternTokenType(ELLIPSIS, []string{"..."}),
simplexer.NewRegexpTokenType(STRING, `"((?:\\\\|\\"|[^"])*)"|'((?:\\\\|\\'|[^'])*)'`),
simplexer.NewRegexpTokenType(IDENTIFIER, `[a-zA-Z_][a-zA-Z0-9_]*|:[^ \t\n\r]:|[^ \t\n\r]:`),
simplexer.NewRegexpTokenType(0, `.`),
}
return &Lexer{
lexer: l,
}
}
func (l *Lexer) Lex(lval *yySymType) int {
token, err := l.lexer.Scan()
if err != nil {
if e, ok := err.(simplexer.UnknownTokenError); ok {
fmt.Fprintln(os.Stderr, e.Error()+":")
fmt.Fprintln(os.Stderr, l.lexer.GetLastLine())
fmt.Fprintln(os.Stderr, strings.Repeat(" ", e.Position.Column)+strings.Repeat("^", len(e.Literal)))
} else {
l.Error(err.Error())
}
os.Exit(1)
}
if token == nil {
return -1
}
tokenID := int(token.Type.GetID())
if tokenID == 0 {
tokenID = int(token.Literal[0])
}
pos := Position{
Position: token.Position,
Filename: l.Filename,
}
lval.token = Token{
Token: tokenID,
Literal: token.Literal,
Pos: pos,
}
switch tokenID {
case CALCULATE_DEFINE_OPERATOR:
lval.token.Literal = token.Submatches[0]
case STRING:
lval.token.Literal = regexp.MustCompile(`\\[nrt\\"']`).ReplaceAllStringFunc(token.Submatches[0]+token.Submatches[1], func(s string) string {
switch s[1] {
case 'n':
return "\n"
case 'r':
return "\r"
case 't':
return "\t"
case '\\':
return "\\"
case '"':
return "\""
case '\'':
return "'"
}
return ""
})
}
l.lastToken = token
l.lastPosition = pos
return tokenID
}
func (l *Lexer) Error(e string) {
fmt.Fprintln(os.Stderr, e+":")
fmt.Fprintln(os.Stderr, l.lexer.GetLastLine())
fmt.Fprintln(os.Stderr, strings.Repeat(" ", l.lastToken.Position.Column)+strings.Repeat("^", len(l.lastToken.Literal)))
os.Exit(1)
}