-
Notifications
You must be signed in to change notification settings - Fork 0
/
compiler.go
327 lines (309 loc) · 9.25 KB
/
compiler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
//
// Copyright 2014-2015 Nathan Fiedler. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
package core
import (
"sort"
)
//
// byte code compiler borrowed from Eli Bendersky's bobscheme
// (https://github.com/eliben/bobscheme)
//
// Compiler constructs a CodeObject one piece at a time and assembles the
// final result on demand.
type Compiler interface {
// AddInstruction adds the given instruction to the accumulated results.
AddInstruction(code Opcode, arg interface{})
// FindOrAddConstant ensures the constants table holds the given value,
// returning the offset for that constant.
FindOrAddConstant(con interface{}) uint
// FindOrAddSymbol ensures the symbols table holds the given name,
// returning the offset for that symbol.
FindOrAddSymbol(sym Symbol) uint
// Compile produces the byte codes for the given expression
Compile(expr interface{}) LispError
// Assemble pulls the final result into a cohesive set of instructions
// in which all intermediate values have been stripped.
Assemble(name string, args interface{}) CodeObject
}
// compiler is the internal implementation of a Compiler.
type compiler struct {
codes []Opcode // sequence of opcodes for the code object under construction
arguments []interface{} // arguments, one for each opcode; may be nil
constants []interface{} // table of constant values (typically Atoms)
symbols []Symbol // table of symbol names
labels uint // number of labels so far, used to generate unique labels
lines []byteLinePair // byte offset and line number pairings
}
// NewCompiler constructs an instance of Compiler.
func NewCompiler() Compiler {
comp := new(compiler)
comp.codes = make([]Opcode, 0)
comp.arguments = make([]interface{}, 0)
comp.constants = make([]interface{}, 0)
comp.symbols = make([]Symbol, 0)
comp.lines = make([]byteLinePair, 0)
return comp
}
func (c *compiler) AddInstruction(code Opcode, arg interface{}) {
c.codes = append(c.codes, code)
c.arguments = append(c.arguments, arg)
}
func (c *compiler) FindOrAddConstant(con interface{}) uint {
con_a, con_is_atom := con.(Atom)
for idx, val := range c.constants {
if con_is_atom {
if val_a, val_is_atom := val.(Atom); val_is_atom {
if atomsEqual(con_a, val_a) {
return uint(idx)
}
}
}
if val == con {
return uint(idx)
}
}
idx := len(c.constants)
c.constants = append(c.constants, con)
return uint(idx)
}
func (c *compiler) FindOrAddSymbol(sym Symbol) uint {
for idx, val := range c.symbols {
if atomsEqual(val, sym) {
return uint(idx)
}
}
idx := len(c.symbols)
c.symbols = append(c.symbols, sym)
return uint(idx)
}
func (c *compiler) createLabel() Instruction {
// starting from zero, go up one each time
c.labels++
return NewInstruction(OP_LABEL, uint(c.labels))
}
// addLineNumber adds line number information to the pending CodeObject for
// the purpose of error reporting.
func (c *compiler) addLineNumber(expr interface{}) {
if pair, is_pair := expr.(Pair); is_pair && pair.Len() > 0 {
expr = pair.First()
}
if loco, is_loc := expr.(Locatable); is_loc {
offset := uint(len(c.codes))
line, _ := loco.Location()
c.lines = append(c.lines, byteLinePair{offset, line})
}
}
func (c *compiler) Compile(expr interface{}) LispError {
c.addLineNumber(expr)
// start with the primitive expressions (R7RS 4.1)
switch obj := expr.(type) {
case Symbol:
// symbol reference
c.AddInstruction(OP_LOADVAR, obj)
case Pair:
// most of the time they're pairs
length := obj.Len()
if length == 0 {
// empty list
c.AddInstruction(OP_CONST, obj)
break
}
first := obj.First()
// assume it is one of the primitives until we learn otherwise
application := false
if sym, issym := first.(Symbol); issym {
if atomsEqual(sym, quoteSym) {
// (quote exp)
c.AddInstruction(OP_CONST, obj.Second())
} else if atomsEqual(sym, ifSym) {
// (if test conseq alt)
labelElse := c.createLabel()
labelAfterElse := c.createLabel()
err := c.Compile(obj.Second())
if err != nil {
return err
}
c.AddInstruction(OP_FJUMP, labelElse)
err = c.Compile(obj.Third())
if err != nil {
return err
}
c.AddInstruction(OP_JUMP, labelAfterElse)
c.AddInstruction(OP_LABEL, labelElse.Argument())
err = c.Compile(Cxr("cadddr", obj))
if err != nil {
return err
}
c.AddInstruction(OP_LABEL, labelAfterElse.Argument())
} else if atomsEqual(sym, setSym) {
// (set! var exp)
err := c.Compile(obj.Third())
if err != nil {
return err
}
name := obj.Second()
if ns, ok := name.(Symbol); ok {
c.AddInstruction(OP_STOREVAR, ns)
} else {
return NewLispErrorf(EARGUMENT, "name %v not a symbol", name)
}
} else if atomsEqual(sym, defineSym) {
// (define name exp)
err := c.Compile(obj.Third())
if err != nil {
return err
}
name := obj.Second().(Symbol)
// If the value is a lambda, assign the name for error reporting.
if c.codes[len(c.codes)-1] == OP_FUNCTION {
co := c.arguments[len(c.arguments)-1].(CodeObject)
co.setName(name.String())
}
c.AddInstruction(OP_DEFVAR, name)
} else if atomsEqual(sym, lambdaSym) {
// (lambda (var*) exp)
vars := obj.Second()
co, err := CompileLambda(vars, obj.Third())
if err != nil {
return err
}
c.AddInstruction(OP_FUNCTION, co)
} else if atomsEqual(sym, beginSym) {
// (begin exp+)
iter := obj.Iterator()
iter.Next()
for iter.HasNext() {
err := c.Compile(iter.Next())
if err != nil {
return err
}
// last entry does _not_ get a POP
if iter.HasNext() {
c.AddInstruction(OP_POP, nil)
}
}
} else {
application = true
}
} else {
application = true
}
if application {
// compile all of the arguments in left-to-right order
iter := obj.Iterator()
iter.Next()
var arg_count uint = 0
for iter.HasNext() {
err := c.Compile(iter.Next())
if err != nil {
return err
}
arg_count++
}
c.Compile(obj.First())
c.AddInstruction(OP_CALL, arg_count)
}
default:
// must be an atom
c.AddInstruction(OP_CONST, expr)
}
return nil
}
func (c *compiler) Assemble(name string, args interface{}) CodeObject {
// Compute the label offsets, not counting the labels themselves toward
// the final result as they will be pruned.
offsets := make(map[uint]uint)
var offset uint = 0
for idx, code := range c.codes {
if code == OP_LABEL {
label := c.arguments[idx].(uint)
offsets[label] = offset
} else {
offset++
}
}
// codes will be a little oversized because the intermediate code has
// the label placeholders, while the final result will not
codes := make([]Instruction, 0, len(c.codes))
for idx, code := range c.codes {
arg_value := c.arguments[idx]
var arg_num uint = 0
switch code {
case OP_LABEL:
// labels do not contribute to the final result
continue
case OP_FJUMP, OP_JUMP:
instr := arg_value.(Instruction)
arg_num = offsets[instr.Argument()]
case OP_CONST, OP_FUNCTION:
if ws, ok := arg_value.(WireStripper); ok {
arg_num = c.FindOrAddConstant(ws.StripLocation())
} else {
arg_num = c.FindOrAddConstant(arg_value)
}
case OP_LOADVAR, OP_STOREVAR, OP_DEFVAR:
// strip the parser's location information
sym := NewSymbol(arg_value.(Symbol).String())
arg_num = c.FindOrAddSymbol(sym)
case OP_CALL:
arg_num = arg_value.(uint)
}
codes = append(codes, NewInstruction(code, arg_num))
}
// sort the byte-line pairings and remove duplicates
lines := make([]byteLinePair, 0)
if len(c.lines) > 0 {
sort.Sort(byteLinePairs(c.lines))
lines = append(lines, c.lines[0])
prev_pair := c.lines[0]
for _, line := range c.lines[1:] {
if line.line > prev_pair.line && line.offset > prev_pair.offset {
lines = append(lines, line)
prev_pair = line
}
}
}
return NewCodeObject(name, args, codes, c.constants, c.symbols, lines)
}
// Compile converts the parsed and expanded s-expression into a CodeObject
// which can be executed by the stack-based byte code interpreter.
func Compile(name string, expr interface{}) (CodeObject, LispError) {
compiler := NewCompiler()
err := compiler.Compile(expr)
if err != nil {
return nil, err
}
return compiler.Assemble(name, theEmptyList), nil
}
// CompileLambda compiles the given expression assuming that it is a
// lambda expression which accepts the given arguments.
func CompileLambda(args interface{}, expr interface{}) (CodeObject, LispError) {
compiler := NewCompiler()
err := compiler.Compile(expr)
if err != nil {
return nil, err
}
compiler.AddInstruction(OP_RETURN, nil)
return compiler.Assemble("lambda", args), nil
}
// CompileString parses, expands, and compiles the given program.
func CompileString(name string, prog string) (CodeObject, LispError) {
// Thus begins the "inject" operation in CESK, Byte Code Edition (TM).
var err LispError
parser := NewParser()
parsed, err := parser.Parse(prog)
if err != nil {
return nil, err
}
// By ensuring that the program is wrapped inside a (begin ...) we
// create what constitutes the "halt" continuation, as well as the
// "step" function.
expanded, err := parser.Expand(wrapWithBegin(parsed))
if err != nil {
return nil, err
}
return Compile(name, expanded)
}