-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.go
156 lines (136 loc) · 3.39 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Lexer is a Lexical Analyzer for scheme.
// It returns each token from source code and analyzes its type.
package scheme
import (
"fmt"
"regexp"
"strings"
"text/scanner"
)
type Lexer struct {
scanner.Scanner
results []Object
}
const (
EOF = -(iota + 1)
IdentifierToken
IntToken
BooleanToken
StringToken
)
var identifierChars = "a-zA-Z?!*/<=>:$%^&_~"
var numberChars = "0-9+-."
var identifierExp = fmt.Sprintf("[%s][%s%s]*", identifierChars, identifierChars, numberChars)
func NewLexer(source string) *Lexer {
lexer := new(Lexer)
lexer.Init(strings.NewReader(source))
lexer.Mode &^= scanner.ScanChars
return lexer
}
func (l *Lexer) Lex(lval *yySymType) int {
token := int(l.TokenType())
lval.token = l.NextToken()
return token
}
func (l *Lexer) Error(e string) {
panic(e)
}
// Non-destructive scanner.Scan().
// This method returns next token type or unicode character.
func (l Lexer) TokenType() rune {
token := l.PeekToken()
if l.matchRegexp(token, "^[ ]*$") {
return EOF
} else if l.matchRegexp(token, fmt.Sprintf("^(%s|\\+|-)$", identifierExp)) {
return IDENTIFIER
} else if l.matchRegexp(token, "^-?[0-9]+$") {
return NUMBER
} else if l.matchRegexp(token, "^#(f|t)$") {
return BOOLEAN
} else if l.matchRegexp(token, "\"[^\"]*\"") {
return STRING
} else {
runes := []rune(token)
return runes[0]
}
}
// Non-destructive Lexer.NextToken().
func (l Lexer) PeekToken() string {
return l.nextToken()
}
// This function returns next token and moves current token reading
// position to next token position.
func (l *Lexer) NextToken() string {
defer l.ensureAvailability()
return l.nextToken()
}
func (l Lexer) IndentLevel() int {
tokens := l.AllTokens()
openCount, closedCount := 0, 0
for _, token := range tokens {
if token == "(" {
openCount++
} else if token == ")" {
closedCount++
}
}
return openCount - closedCount
}
func (l Lexer) AllTokens() []string {
tokens := []string{}
for {
token := l.NextToken()
if token == "" {
break
}
tokens = append(tokens, token)
}
return tokens
}
func (l *Lexer) nextToken() string {
// text/scanner scans text which starts with "'" in one token.
if l.Peek() == '\'' {
l.Next()
return "'"
}
l.Scan()
if l.TokenText() == "#" {
// text/scanner scans '#t' as '#' and 't'.
l.Scan()
switch l.TokenText() {
case "t", "f":
return fmt.Sprintf("#%s", l.TokenText())
default:
runtimeError("Tokens which start from '#' are not implemented except #f, #t.")
}
} else if l.matchRegexp(l.TokenText(), fmt.Sprintf("^%s$", identifierExp)) {
// text/scanner scans some signs as splitted token from alphabet token.
text := l.TokenText()
for l.isIdentifierChar(l.Peek()) {
l.Scan()
text = fmt.Sprintf("%s%s", text, l.TokenText())
}
return text
} else if l.TokenText() == "-" && l.matchRegexp(fmt.Sprintf("%c", l.Peek()), "[0-9]") {
text := l.TokenText()
l.Scan()
text = text + l.TokenText()
return text
}
return l.TokenText()
}
func (l Lexer) isIdentifierChar(char rune) bool {
charString := fmt.Sprintf("%c", char)
return l.matchRegexp(charString, fmt.Sprintf("^[%s%s]$", identifierChars, numberChars))
}
func (l *Lexer) matchRegexp(matchString string, expression string) bool {
re, err := regexp.Compile(expression)
if err != nil {
runtimeError(err.Error())
}
return re.MatchString(matchString)
}
func (l *Lexer) ensureAvailability() {
// Error message will be printed by interpreter
recover()
}