/
regexp.rb
executable file
·320 lines (281 loc) · 6.33 KB
/
regexp.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#!/usr/bin/env ruby
require 'ytljit.rb'
include YTLJit
class State
def initialize(regobj)
@regobj = regobj
@id = regobj.curid
regobj.add_state self
@transfer = {}
@transfer[nil] = []
@transfer[true] = []
@isend = false
end
def reset
@transfer = {}
@transfer[nil] = []
@transfer[true] = []
end
def clone
no = @regobj.newstate
no.transfer = @transfer.clone
no.isend = @isend
no
end
attr :id
attr_accessor :transfer
attr_accessor :isend
def add_edge(c, to)
trans = @transfer[c]
if c and trans then
ns = @regobj.newstate
@transfer[nil].push ns
trans = ns.transfer[c] = []
elsif trans == nil then
trans = @transfer[c] = []
end
unless trans.include?(to)
trans.push to
end
end
def epsilon_nodes(res)
if res.include?(self) then
return res
end
res.push self
@transfer[nil].each do |st|
res = st.epsilon_nodes(res)
res.push st if res.include?(st)
end
res
end
def collect_edge(nodes)
trans = {}
trans[true] = []
nodes.each do |st|
st.transfer.each do |c, st2|
if c then
trans[c] ||= []
trans[c] += st2
end
end
end
trans.each do |c, nodes|
if nodes.size > 1 then
(nodes + trans[true]).each_cons(2) do |e0, e1|
e0.add_edge(nil, e1)
end
trans[c] = [nodes[0]]
end
end
trans[nil] = []
trans
end
def translate_dfa
enodes = epsilon_nodes([])
@isend = enodes.any? {|e| e.isend}
@transfer = collect_edge(enodes)
end
def inspect
res = "#{@id}\n"
@transfer.each do |c, st|
if c != nil then
st.each do |ele|
res += " #{c} -> #{ele.id} #{" END" if @isend}\n"
end
end
end
res
end
end
class StateCompiler
def initialize(regobj)
@regobj = regobj
@state_codespace = []
regobj.states.each do |s|
@state_codespace[s.id] = CodeSpace.new
end
@csstart = CodeSpace.new
@csstart2 = CodeSpace.new
@csmain = CodeSpace.new
@failend = CodeSpace.new
end
# Register map
#
# eax work
# esi work
# edi pointer to current char
def compile_1state(st)
cstab = @state_codespace
ccs = @state_codespace[st.id]
asm = Assembler.new(ccs)
if st.isend then
asm.with_retry do
asm.mov(X86::EAX, OpImmidiate32.new(2))
asm.ret
end
else
failend = @failend
asm.with_retry do
asm.mov(X86::AL, X86::INDIRECT_EDI)
asm.add(X86::EDI, OpImmidiate32.new(1))
st.transfer.each do |c, ns|
if c.is_a?(String) then
asm.cmp(X86::AL, OpImmidiate8.new(c.ord))
asm.jz(cstab[ns[0].id].var_base_address)
end
end
asm.cmp(X86::AL, OpImmidiate8.new(0))
asm.jz(failend.var_base_address)
if st.transfer[true][0] then
asm.jmp(cstab[st.transfer[true][0].id].var_base_address)
else
asm.mov(X86::EAX, OpImmidiate32.new(0))
asm.ret
end
end
end
end
def compile
asm = Assembler.new(@csstart)
cs2 = @csstart2
asm.with_retry do
asm.mov(X86::ESI, X86::EAX)
asm.jmp(cs2.var_base_address)
end
InternalRubyType::rstring_ptr(X86::ESI, @csstart2, @csmain)
asm = Assembler.new(@csmain)
cstab = @state_codespace
asm.with_retry do
asm.mov(X86::EDI, X86::EAX)
asm.jmp(cstab[0].var_base_address)
end
asm = Assembler.new(@failend)
asm.with_retry do
asm.mov(X86::EAX, OpImmidiate32.new(0))
asm.ret
end
@regobj.states.each do |s|
compile_1state(s)
end
end
def exec(str)
@csstart.call(@csstart.base_address, str)
end
end
class YTLRegexp
def initialize
@states = []
@numstate = 0
end
attr :states
def curid
rc = @numstate
@numstate += 1
rc
end
def add_state(state)
@states.push state
end
def newstate
ns = State.new(self)
end
def parse(regstr)
parse_aux(regstr, 0, 0)
end
def parse_letter(curstate, c)
ns = newstate
# curstate.add_edge(c, newstate)
ns
end
def parse_aux(regstr, cp, nest)
start_state = end_state = newstate
s0 = newstate
start_state.add_edge(nil, s0)
s1 = s0
s2 = s1
orxst = nil
while cp < regstr.size do
c = regstr[cp]
case c
when '\\'
cp += 1
if orxst then
s2 = orxst
orxst = nil
else
s2 = newstate
end
s1.add_edge(regstr[cp], s2)
s0 = s1
s1 = s2
when '.'
if orxst then
s2 = orxst
orxst = nil
else
s2 = newstate
end
s1.add_edge(true, s2)
s0 = s1
s1 = s2
when '('
s0 = s1
s1, s2, cp = parse_aux(regstr, cp + 1, nest + 1)
s0.add_edge(nil, s1)
when ')'
if nest > 0 then
return [start_state, s1, cp]
end
raise "Illigal \')\'"
when '*'
ns0 = s0.clone
s0.reset
n1 = s0
s1 = ns0
n2 = newstate
n1.add_edge(nil, s1)
s2.add_edge(nil, n2)
s2.add_edge(nil, s1)
n1.add_edge(nil, n2)
s0 = s1
s1 = n2
when '|'
s1 = s0
orxst = s2
else
if orxst then
s2 = orxst
orxst = nil
else
s2 = newstate
end
s1.add_edge(c, s2)
s0 = s1
s1 = s2
end
cp += 1
end
end_state = s1
s1.isend = true
[start_state, end_state, cp]
end
end
regobj = YTLRegexp.new
#regobj.parse("cb*ab")
#regobj.parse("(ab)(abc)*(ab)")
#regobj.parse("c(abc)*ab")
#regobj.parse("c(abc)*a|b|c")
regobj.parse(".*cabc.*a|b|c")
regobj.states.each do |s|
s.translate_dfa
end
regobj.states.each do |s|
p s
end
sc = StateCompiler.new(regobj)
sc.compile
p sc.exec("foo")
p sc.exec("cabcd")
p sc.exec("cabcb")
p sc.exec("cabcccaasssccccddswa")