-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.go
145 lines (137 loc) · 3.26 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
package gorillang
type Lexer struct {
input []rune
position int // 入力における現在の位置(現在の文字を指し示す)
readPosition int // これから読み込む位置(現在の文字の次)
ch rune // 現在検査中の文字
isAmongWhiteSpace bool // ホワイトスペースの間で文字(Unicode)の途中であるかどうか
}
func NewLexer(input string) *Lexer {
r := []rune(input)
l := &Lexer{
input: r,
}
l.readChar()
return l
}
func (l *Lexer) NextToken() Token {
var tok Token
take2 := func(tt TokenType) Token {
ch := l.ch
l.readChar()
literal := []rune{ch, l.ch}
return Token{Type: tt, Literal: literal}
}
switch l.ch {
case []rune(` `)[0]:
tok = Token{Type: WHITESPACE, Literal: []rune{l.ch}}
l.isAmongWhiteSpace = false
case []rune(`ウ`)[0]:
switch l.peekChar() {
case []rune(`ホ`)[0]:
if l.isAmongWhiteSpace {
tok = take2(X0)
} else {
tok = take2(PREFIX)
l.isAmongWhiteSpace = true
}
case []rune(`ウ`)[0]:
tok = take2(X1)
case []rune(`ォ`)[0]:
tok = take2(X6)
case []rune(`ッ`)[0]:
c1 := l.ch
l.readChar()
c2 := l.ch
if l.peekChar() == []rune(`ホ`)[0] {
l.readChar()
literal := []rune{c1, c2, l.ch}
tok = Token{Type: XF, Literal: literal}
} else { // Error
tok = Token{Type: ILLEGAL, Literal: []rune{c1, c2}}
}
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`ホ`)[0]:
switch l.peekChar() {
case []rune(`ホ`)[0]:
tok = take2(X2)
case []rune(`ゥ`)[0]:
tok = take2(X4)
case []rune(`ッ`)[0]:
tok = take2(X5)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`ゥ`)[0]:
switch l.peekChar() {
case []rune(`ホ`)[0]:
tok = take2(X3)
case []rune(`ゥ`)[0]:
tok = take2(XA)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`う`)[0]:
switch l.peekChar() {
case []rune(`ほ`)[0]:
tok = take2(X7)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`オ`)[0]:
switch l.peekChar() {
case []rune(`ホ`)[0]:
tok = take2(X9)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`ッ`)[0]:
switch l.peekChar() {
case []rune(`ッ`)[0]:
tok = take2(XB)
case []rune(`!`)[0]:
tok = take2(XC)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`ォ`)[0]:
switch l.peekChar() {
case []rune(`!`)[0]:
tok = take2(XD)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`!`)[0]:
switch l.peekChar() {
case []rune(`!`)[0]:
tok = take2(XE)
default: // Error
tok = Token{Type: ILLEGAL, Literal: []rune{l.ch}}
}
case []rune(`?`)[0]:
tok = Token{Type: X8, Literal: []rune{l.ch}}
case 0: // End
tok = Token{Type: EOF, Literal: []rune{l.ch}}
l.isAmongWhiteSpace = false
}
l.readChar()
return tok
}
func (l *Lexer) readChar() {
if l.readPosition >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.readPosition]
}
l.position = l.readPosition
l.readPosition += 1
}
func (l *Lexer) peekChar() rune {
if l.readPosition >= len(l.input) {
return 0
} else {
return l.input[l.readPosition]
}
}