diff --git a/Makefile b/Makefile index 660975ea7b..6252c6a6e6 100644 --- a/Makefile +++ b/Makefile @@ -187,9 +187,11 @@ tinygo-test: $(TINYGO) test encoding/ascii85 $(TINYGO) test encoding/base32 $(TINYGO) test encoding/hex + $(TINYGO) test hash/adler32 $(TINYGO) test hash/fnv $(TINYGO) test hash/crc64 $(TINYGO) test math + $(TINYGO) test math/cmplx $(TINYGO) test text/scanner $(TINYGO) test unicode/utf8 diff --git a/compiler/interface.go b/compiler/interface.go index fa08c5b0e5..030c67571b 100644 --- a/compiler/interface.go +++ b/compiler/interface.go @@ -465,9 +465,7 @@ func (c *compilerContext) getInterfaceInvokeWrapper(f *ir.Function) llvm.Value { paramTypes := append([]llvm.Type{c.i8ptrType}, fnType.ParamTypes()[len(expandedReceiverType):]...) wrapFnType := llvm.FunctionType(fnType.ReturnType(), paramTypes, false) wrapper = llvm.AddFunction(c.mod, wrapperName, wrapFnType) - if f.LLVMFn.LastParam().Name() == "parentHandle" { - wrapper.LastParam().SetName("parentHandle") - } + wrapper.LastParam().SetName("parentHandle") wrapper.SetLinkage(llvm.InternalLinkage) wrapper.SetUnnamedAddr(true) diff --git a/compiler/llvmutil/wordpack.go b/compiler/llvmutil/wordpack.go index 55666a43c9..230f5dda88 100644 --- a/compiler/llvmutil/wordpack.go +++ b/compiler/llvmutil/wordpack.go @@ -26,7 +26,6 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts. packedType := ctx.StructType(valueTypes, false) // Allocate memory for the packed data. - var packedAlloc, packedHeapAlloc llvm.Value size := targetData.TypeAllocSize(packedType) if size == 0 { return llvm.ConstPointerNull(i8ptrType) @@ -39,9 +38,39 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts. // Try to keep this cast in SSA form. return builder.CreateIntToPtr(values[0], i8ptrType, "pack.int") } + // Because packedType is a struct and we have to cast it to a *i8, store - // it in an alloca first for bitcasting (store+bitcast+load). - packedAlloc, _, _ = CreateTemporaryAlloca(builder, mod, packedType, "") + // it in a *i8 alloca first and load the *i8 value from there. This is + // effectively a bitcast. + packedAlloc, _, _ := CreateTemporaryAlloca(builder, mod, i8ptrType, "") + + if size < targetData.TypeAllocSize(i8ptrType) { + // The alloca is bigger than the value that will be stored in it. + // To avoid having some bits undefined, zero the alloca first. + // Hopefully this will get optimized away. + builder.CreateStore(llvm.ConstNull(i8ptrType), packedAlloc) + } + + // Store all values in the alloca. + packedAllocCast := builder.CreateBitCast(packedAlloc, llvm.PointerType(packedType, 0), "") + for i, value := range values { + indices := []llvm.Value{ + llvm.ConstInt(ctx.Int32Type(), 0, false), + llvm.ConstInt(ctx.Int32Type(), uint64(i), false), + } + gep := builder.CreateInBoundsGEP(packedAllocCast, indices, "") + builder.CreateStore(value, gep) + } + + // Load value (the *i8) from the alloca. + result := builder.CreateLoad(packedAlloc, "") + + // End the lifetime of the alloca, to help the optimizer. + packedPtr := builder.CreateBitCast(packedAlloc, i8ptrType, "") + packedSize := llvm.ConstInt(ctx.Int64Type(), targetData.TypeAllocSize(packedAlloc.Type()), false) + EmitLifetimeEnd(builder, mod, packedPtr, packedSize) + + return result } else { // Check if the values are all constants. constant := true @@ -67,7 +96,7 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts. // Packed data is bigger than a pointer, so allocate it on the heap. sizeValue := llvm.ConstInt(uintptrType, size, false) alloc := mod.NamedFunction("runtime.alloc") - packedHeapAlloc = builder.CreateCall(alloc, []llvm.Value{ + packedHeapAlloc := builder.CreateCall(alloc, []llvm.Value{ sizeValue, llvm.Undef(i8ptrType), // unused context parameter llvm.ConstPointerNull(i8ptrType), // coroutine handle @@ -80,28 +109,19 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts. llvm.ConstPointerNull(i8ptrType), // coroutine handle }, "") } - packedAlloc = builder.CreateBitCast(packedHeapAlloc, llvm.PointerType(packedType, 0), "") - } - // Store all values in the alloca or heap pointer. - for i, value := range values { - indices := []llvm.Value{ - llvm.ConstInt(ctx.Int32Type(), 0, false), - llvm.ConstInt(ctx.Int32Type(), uint64(i), false), + packedAlloc := builder.CreateBitCast(packedHeapAlloc, llvm.PointerType(packedType, 0), "") + + // Store all values in the heap pointer. + for i, value := range values { + indices := []llvm.Value{ + llvm.ConstInt(ctx.Int32Type(), 0, false), + llvm.ConstInt(ctx.Int32Type(), uint64(i), false), + } + gep := builder.CreateInBoundsGEP(packedAlloc, indices, "") + builder.CreateStore(value, gep) } - gep := builder.CreateInBoundsGEP(packedAlloc, indices, "") - builder.CreateStore(value, gep) - } - if packedHeapAlloc.IsNil() { - // Load value (as *i8) from the alloca. - packedAlloc = builder.CreateBitCast(packedAlloc, llvm.PointerType(i8ptrType, 0), "") - result := builder.CreateLoad(packedAlloc, "") - packedPtr := builder.CreateBitCast(packedAlloc, i8ptrType, "") - packedSize := llvm.ConstInt(ctx.Int64Type(), targetData.TypeAllocSize(packedAlloc.Type()), false) - EmitLifetimeEnd(builder, mod, packedPtr, packedSize) - return result - } else { - // Get the original heap allocation pointer, which already is an *i8. + // Return the original heap allocation pointer, which already is an *i8. return packedHeapAlloc } } diff --git a/interp/README.md b/interp/README.md index 1c8e96152c..5ada617c39 100644 --- a/interp/README.md +++ b/interp/README.md @@ -6,50 +6,81 @@ possible and only run unknown expressions (e.g. external calls) at runtime. This is in practice a partial evaluator of the `runtime.initAll` function, which calls each package initializer. -It works by directly interpreting LLVM IR: - - * Almost all operations work directly on constants, and are implemented using - the llvm.Const* set of functions that are evaluated directly. - * External function calls and some other operations (inline assembly, volatile - load, volatile store) are seen as having limited side effects. Limited in - the sense that it is known at compile time which globals it affects, which - then are marked 'dirty' (meaning, further operations on it must be done at - runtime). These operations are emitted directly in the `runtime.initAll` - function. Return values are also considered 'dirty'. - * Such 'dirty' objects and local values must be executed at runtime instead of - at compile time. This dirtyness propagates further through the IR, for - example storing a dirty local value to a global also makes the global dirty, - meaning that the global may not be read or written at compile time as it's - contents at that point during interpretation is unknown. - * There are some heuristics in place to avoid doing too much with dirty - values. For example, a branch based on a dirty local marks the whole - function itself as having side effect (as if it is an external function). - However, all globals it touches are still taken into account and when a call - is inserted in `runtime.initAll`, all globals it references are also marked - dirty. - * Heap allocation (`runtime.alloc`) is emulated by creating new objects. The - value in the allocation is the initializer of the global, the zero value is - the zero initializer. - * Stack allocation (`alloca`) is often emulated using a fake alloca object, - until the address of the alloca is taken in which case it is also created as - a real `alloca` in `runtime.initAll` and marked dirty. This may be necessary - when calling an external function with the given alloca as paramter. +This package is a rewrite of a previous partial evaluator that worked +directly on LLVM IR and used the module and LLVM constants as intermediate +values. This newer version instead uses a mostly Go intermediate form. It +compiles functions and extracts relevant data first (compiler.go), then +executes those functions (interpreter.go) in a memory space that can be +rolled back per function (memory.go). This means that it is not necessary to +scan functions to see whether they can be run at compile time, which was very +error prone. Instead it just tries to execute everything and if it hits +something it cannot interpret (such as a store to memory-mapped I/O) it rolls +back the execution of that function and runs the function at runtime instead. +All in all, this design provides several benefits: + + * Much better error handling. By being able to revert to runtime execution + without the need for scanning functions, this version is able to + automatically work around many bugs in the previous implementation. + * More correct memory model. This is not inherent to the new design, but the + new design also made the memory model easier to reason about. + * Faster execution of initialization code. While it is not much faster for + normal interpretation (maybe 25% or so) due to the compilation overhead, + it should be a whole lot faster for loops as it doesn't have to call into + LLVM (via CGo) for every operation. + +As mentioned, this partial evaulator comes in three parts: a compiler, an +interpreter, and a memory manager. + +## Compiler + +The main task of the compiler is that it extracts all necessary data from +every instruction in a function so that when this instruction is interpreted, +no additional CGo calls are necessary. This is not currently done for all +instructions (`runtime.alloc` is a notable exception), but at least it does +so for the vast majority of instructions. + +## Interpreter + +The interpreter runs an instruction just like it would if it were executed +'for real'. The vast majority of instructions can be executed at compile +time. As indicated above, some instructions need to be executed at runtime +instead. + +## Memory + +Memory is represented as objects (the `object` type) that contains data that +will eventually be stored in a global and values (the `value` interface) that +can be worked with while running the interpreter. Values therefore are only +used locally and are always passed by value (just like most LLVM constants) +while objects represent the backing storage (like LLVM globals). Some values +are pointer values, and point to an object. + +Importantly, this partial evaluator can roll back the execution of a +function. This is implemented by creating a new memory view per function +activation, which makes sure that any change to a global (such as a store +instruction) is stored in the memory view. It creates a copy of the object +and stores that in the memory view to be modified. Once the function has +executed successfully, all these modified objects are then copied into the +parent function, up to the root function invocation which (on successful +execution) writes the values back into the LLVM module. This way, function +invocations can be rolled back without leaving a trace. + +Pointer values point to memory objects, but not to a particular memory +object. Every memory object is given an index, and pointers use that index to +look up the current active object for the pointer to load from or to copy +when storing to it. + +Rolling back a function should roll back everyting, including the few +instructions emitted at runtime. This is done by treating instructions much +like memory objects and removing the created instructions when necessary. ## Why is this necessary? A partial evaluator is hard to get right, so why go through all the trouble of writing one? -The main reason is that the previous attempt wasn't complete and wasn't sound. -It simply tried to evaluate Go SSA directly, which was good but more difficult -than necessary. An IR based interpreter needs to understand fewer instructions -as the LLVM IR simply has less (complex) instructions than Go SSA. Also, LLVM -provides some useful tools like easily getting all uses of a function or global, -which Go SSA does not provide. - -But why is it necessary at all? The answer is that globals with initializers are -much easier to optimize by LLVM than initialization code. Also, there are a few -other benefits: +The answer is that globals with initializers are much easier to optimize by +LLVM than initialization code. Also, there are a few other benefits: * Dead globals are trivial to optimize away. * Constant globals are easier to detect. Remember that Go does not have global @@ -60,5 +91,29 @@ other benefits: * Constants are much more efficent on microcontrollers, as they can be allocated in flash instead of RAM. +The Go SSA package does not create constant initializers for globals. +Instead, it emits initialization functions, so if you write the following: + +```go +var foo = []byte{1, 2, 3, 4} +``` + +It would generate something like this: + +```go +var foo []byte + +func init() { + foo = make([]byte, 4) + foo[0] = 1 + foo[1] = 2 + foo[2] = 3 + foo[3] = 4 +} +``` + +This is of course hugely wasteful, it's much better to create `foo` as a +global array instead of initializing it at runtime. + For more details, see [this section of the documentation](https://tinygo.org/compiler-internals/differences-from-go/). diff --git a/interp/compiler.go b/interp/compiler.go new file mode 100644 index 0000000000..e45df8c13b --- /dev/null +++ b/interp/compiler.go @@ -0,0 +1,410 @@ +package interp + +// This file compiles the LLVM IR to a form that's easy to efficiently +// interpret. + +import ( + "strings" + + "tinygo.org/x/go-llvm" +) + +// A function is a compiled LLVM function, which means that interpreting it +// avoids most CGo calls necessary. This is done in a separate step so the +// result can be cached. +// Functions are in SSA form, just like the LLVM version if it. The first block +// (blocks[0]) is the entry block. +type function struct { + llvmFn llvm.Value + name string // precalculated llvmFn.Name() + params []llvm.Value // precalculated llvmFn.Params() + blocks []*basicBlock + locals map[llvm.Value]int +} + +// basicBlock represents a LLVM basic block and contains a slice of +// instructions. The last instruction must be a terminator instruction. +type basicBlock struct { + instructions []instruction +} + +// instruction is a precompiled LLVM IR instruction. The operands can be either +// an already known value (such as literalValue or pointerValue) but can also be +// the special localValue, which means that the value is a function parameter or +// is produced by another instruction in the function. In that case, the +// interpreter will replace the operand with that local value. +type instruction struct { + opcode llvm.Opcode + localIndex int + operands []value + llvmInst llvm.Value + name string +} + +// String returns a nice human-readable version of this instruction. +func (inst *instruction) String() string { + operands := make([]string, len(inst.operands)) + for i, op := range inst.operands { + operands[i] = op.String() + } + + name := instructionNameMap[inst.opcode] + if name == "" { + name = "" + } + return name + " " + strings.Join(operands, " ") +} + +// compileFunction compiles a given LLVM function to an easier to interpret +// version of the function. As far as possible, all operands are preprocessed so +// that the interpreter doesn't have to call into LLVM. +func (r *runner) compileFunction(llvmFn llvm.Value) *function { + fn := &function{ + llvmFn: llvmFn, + name: llvmFn.Name(), + params: llvmFn.Params(), + locals: make(map[llvm.Value]int), + } + if llvmFn.IsDeclaration() { + // Nothing to do. + return fn + } + + for i, param := range fn.params { + fn.locals[param] = i + } + + // Make a map of all the blocks, to quickly find the block number for a + // given branch instruction. + blockIndices := make(map[llvm.Value]int) + for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) { + index := len(blockIndices) + blockIndices[llvmBB.AsValue()] = index + } + + // Compile every block. + for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) { + bb := &basicBlock{} + fn.blocks = append(fn.blocks, bb) + + // Compile every instruction in the block. + for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) { + // Create instruction skeleton. + opcode := llvmInst.InstructionOpcode() + inst := instruction{ + opcode: opcode, + localIndex: len(fn.locals), + llvmInst: llvmInst, + } + fn.locals[llvmInst] = len(fn.locals) + + // Add operands specific for this instruction. + switch opcode { + case llvm.Ret: + // Return instruction, which can either be a `ret void` (no + // return value) or return a value. + numOperands := llvmInst.OperandsCount() + if numOperands != 0 { + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + } + } + case llvm.Br: + // Branch instruction. Can be either a conditional branch (with + // 3 operands) or unconditional branch (with just one basic + // block operand). + numOperands := llvmInst.OperandsCount() + switch numOperands { + case 3: + // Conditional jump to one of two blocks. Comparable to an + // if/else in procedural languages. + thenBB := llvmInst.Operand(2) + elseBB := llvmInst.Operand(1) + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + literalValue{uint32(blockIndices[thenBB])}, + literalValue{uint32(blockIndices[elseBB])}, + } + case 1: + // Unconditional jump to a target basic block. Comparable to + // a jump in C and Go. + jumpBB := llvmInst.Operand(0) + inst.operands = []value{ + literalValue{uint32(blockIndices[jumpBB])}, + } + default: + panic("unknown number of operands") + } + case llvm.PHI: + inst.name = llvmInst.Name() + incomingCount := inst.llvmInst.IncomingCount() + for i := 0; i < incomingCount; i++ { + incomingBB := inst.llvmInst.IncomingBlock(i) + incomingValue := inst.llvmInst.IncomingValue(i) + inst.operands = append(inst.operands, + literalValue{uint32(blockIndices[incomingBB.AsValue()])}, + r.getValue(incomingValue), + ) + } + case llvm.Select: + // Select is a special instruction that is much like a ternary + // operator. It produces operand 1 or 2 based on the boolean + // that is operand 0. + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + r.getValue(llvmInst.Operand(1)), + r.getValue(llvmInst.Operand(2)), + } + case llvm.Call: + // Call is a regular function call but could also be a runtime + // intrinsic. Some runtime intrinsics are treated specially by + // the interpreter, such as runtime.alloc. We don't + // differentiate between them here because these calls may also + // need to be run at runtime, in which case they should all be + // created in the same way. + llvmCalledValue := llvmInst.CalledValue() + if !llvmCalledValue.IsAFunction().IsNil() { + name := llvmCalledValue.Name() + if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") { + // These intrinsics should not be interpreted, they are not + // relevant to the execution of this function. + continue + } + } + inst.name = llvmInst.Name() + numOperands := llvmInst.OperandsCount() + inst.operands = append(inst.operands, r.getValue(llvmCalledValue)) + for i := 0; i < numOperands-1; i++ { + inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i))) + } + case llvm.Load: + // Load instruction. The interpreter will load from the + // appropriate memory view. + // Also provide the memory size to be loaded, which is necessary + // with a lack of type information. + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + literalValue{r.targetData.TypeAllocSize(llvmInst.Type())}, + } + case llvm.Store: + // Store instruction. The interpreter will create a new object + // in the memory view of the function invocation and store to + // that, to make it possible to roll back this store. + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + r.getValue(llvmInst.Operand(1)), + } + case llvm.Alloca: + // Alloca allocates stack space for local variables. + numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32) + elementSize := r.targetData.TypeAllocSize(inst.llvmInst.Type().ElementType()) + inst.operands = []value{ + literalValue{elementSize * uint64(numElements)}, + } + case llvm.GetElementPtr: + // GetElementPtr does pointer arithmetic. + inst.name = llvmInst.Name() + ptr := llvmInst.Operand(0) + n := llvmInst.OperandsCount() + elementType := ptr.Type().ElementType() + // gep: [source ptr, dest value size, pairs of indices...] + inst.operands = []value{ + r.getValue(ptr), + literalValue{r.targetData.TypeAllocSize(llvmInst.Type().ElementType())}, + r.getValue(llvmInst.Operand(1)), + literalValue{r.targetData.TypeAllocSize(elementType)}, + } + for i := 2; i < n; i++ { + operand := r.getValue(llvmInst.Operand(i)) + if elementType.TypeKind() == llvm.StructTypeKind { + index := operand.(literalValue).value.(uint32) + elementOffset := r.targetData.ElementOffset(elementType, int(index)) + // Encode operands in a special way. The elementOffset + // is just the offset in bytes. The elementSize is a + // negative number (when cast to a int64) by flipping + // all the bits. This allows the interpreter to detect + // this is a struct field and that it should not + // multiply it with the elementOffset to get the offset. + // It is important for the interpreter to know the + // struct field index for when the GEP must be done at + // runtime. + inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)}) + elementType = elementType.StructElementTypes()[index] + } else { + elementType = elementType.ElementType() + elementSize := r.targetData.TypeAllocSize(elementType) + elementSizeOperand := literalValue{elementSize} + // Add operand * elementSizeOperand bytes to the pointer. + inst.operands = append(inst.operands, operand, elementSizeOperand) + } + } + case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt: + // Bitcasts are ususally used to cast a pointer from one type to + // another leaving the pointer itself intact. + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + } + case llvm.ExtractValue: + inst.name = llvmInst.Name() + agg := llvmInst.Operand(0) + var offset uint64 + indexingType := agg.Type() + for _, index := range inst.llvmInst.Indices() { + switch indexingType.TypeKind() { + case llvm.StructTypeKind: + offset += r.targetData.ElementOffset(indexingType, int(index)) + indexingType = indexingType.StructElementTypes()[index] + default: // ArrayTypeKind + indexingType = indexingType.ElementType() + elementSize := r.targetData.TypeAllocSize(indexingType) + offset += elementSize * uint64(index) + } + } + size := r.targetData.TypeAllocSize(inst.llvmInst.Type()) + // extractvalue [agg, byteOffset, byteSize] + inst.operands = []value{ + r.getValue(agg), + literalValue{offset}, + literalValue{size}, + } + case llvm.InsertValue: + inst.name = llvmInst.Name() + agg := llvmInst.Operand(0) + var offset uint64 + indexingType := agg.Type() + for _, index := range inst.llvmInst.Indices() { + switch indexingType.TypeKind() { + case llvm.StructTypeKind: + offset += r.targetData.ElementOffset(indexingType, int(index)) + indexingType = indexingType.StructElementTypes()[index] + default: // ArrayTypeKind + indexingType = indexingType.ElementType() + elementSize := r.targetData.TypeAllocSize(indexingType) + offset += elementSize * uint64(index) + } + } + // insertvalue [agg, elt, byteOffset] + inst.operands = []value{ + r.getValue(agg), + r.getValue(llvmInst.Operand(1)), + literalValue{offset}, + } + case llvm.ICmp: + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + r.getValue(llvmInst.Operand(1)), + literalValue{uint8(llvmInst.IntPredicate())}, + } + case llvm.FCmp: + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + r.getValue(llvmInst.Operand(1)), + literalValue{uint8(llvmInst.FloatPredicate())}, + } + case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor: + // Integer binary operations. + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + r.getValue(llvmInst.Operand(1)), + } + case llvm.SExt, llvm.ZExt, llvm.Trunc: + // Extend or shrink an integer size. + // No sign extension going on so easy to do. + // zext: [value, bitwidth] + // trunc: [value, bitwidth] + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + literalValue{uint64(llvmInst.Type().IntTypeWidth())}, + } + case llvm.SIToFP, llvm.UIToFP: + // Convert an integer to a floating point instruction. + // opcode: [value, bitwidth] + inst.name = llvmInst.Name() + inst.operands = []value{ + r.getValue(llvmInst.Operand(0)), + literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)}, + } + default: + // Unknown instruction, which is already set in inst.opcode so + // is detectable. + // This error is handled when actually trying to interpret this + // instruction (to not trigger on code that won't be executed). + } + bb.instructions = append(bb.instructions, inst) + } + } + return fn +} + +// instructionNameMap maps from instruction opcodes to instruction names. This +// can be useful for debug logging. +var instructionNameMap = [...]string{ + llvm.Ret: "ret", + llvm.Br: "br", + llvm.Switch: "switch", + llvm.IndirectBr: "indirectbr", + llvm.Invoke: "invoke", + llvm.Unreachable: "unreachable", + + // Standard Binary Operators + llvm.Add: "add", + llvm.FAdd: "fadd", + llvm.Sub: "sub", + llvm.FSub: "fsub", + llvm.Mul: "mul", + llvm.FMul: "fmul", + llvm.UDiv: "udiv", + llvm.SDiv: "sdiv", + llvm.FDiv: "fdiv", + llvm.URem: "urem", + llvm.SRem: "srem", + llvm.FRem: "frem", + + // Logical Operators + llvm.Shl: "shl", + llvm.LShr: "lshr", + llvm.AShr: "ashr", + llvm.And: "and", + llvm.Or: "or", + llvm.Xor: "xor", + + // Memory Operators + llvm.Alloca: "alloca", + llvm.Load: "load", + llvm.Store: "store", + llvm.GetElementPtr: "getelementptr", + + // Cast Operators + llvm.Trunc: "trunc", + llvm.ZExt: "zext", + llvm.SExt: "sext", + llvm.FPToUI: "fptoui", + llvm.FPToSI: "fptosi", + llvm.UIToFP: "uitofp", + llvm.SIToFP: "sitofp", + llvm.FPTrunc: "fptrunc", + llvm.FPExt: "fpext", + llvm.PtrToInt: "ptrtoint", + llvm.IntToPtr: "inttoptr", + llvm.BitCast: "bitcast", + + // Other Operators + llvm.ICmp: "icmp", + llvm.FCmp: "fcmp", + llvm.PHI: "phi", + llvm.Call: "call", + llvm.Select: "select", + llvm.VAArg: "vaarg", + llvm.ExtractElement: "extractelement", + llvm.InsertElement: "insertelement", + llvm.ShuffleVector: "shufflevector", + llvm.ExtractValue: "extractvalue", + llvm.InsertValue: "insertvalue", +} diff --git a/interp/errors.go b/interp/errors.go index 87c99587a6..30007de846 100644 --- a/interp/errors.go +++ b/interp/errors.go @@ -11,15 +11,19 @@ import ( "tinygo.org/x/go-llvm" ) -// errUnreachable is returned when an unreachable instruction is executed. This -// error should not be visible outside of the interp package. -var errUnreachable = &Error{Err: errors.New("interp: unreachable executed")} +var errLiteralToPointer = errors.New("interp: trying to convert literal value to pointer") -// unsupportedInstructionError returns a new "unsupported instruction" error for -// the given instruction. It includes source location information, when -// available. -func (e *evalPackage) unsupportedInstructionError(inst llvm.Value) *Error { - return e.errorAt(inst, errors.New("interp: unsupported instruction")) +// These errors are expected during normal execution and can be recovered from +// by running the affected function at runtime instead of compile time. +var ( + errExpectedPointer = errors.New("interp: trying to use an integer as a pointer (memory-mapped I/O?)") + errUnsupportedInst = errors.New("interp: unsupported instruction") + errUnsupportedRuntimeInst = errors.New("interp: unsupported instruction (to be emitted at runtime)") + errMapAlreadyCreated = errors.New("interp: map already created") +) + +func isRecoverableError(err error) bool { + return err == errExpectedPointer || err == errUnsupportedInst || err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated } // ErrorLine is one line in a traceback. The position may be missing. @@ -46,13 +50,13 @@ func (e *Error) Error() string { // errorAt returns an error value for the currently interpreted package at the // location of the instruction. The location information may not be complete as // it depends on debug information in the IR. -func (e *evalPackage) errorAt(inst llvm.Value, err error) *Error { - pos := getPosition(inst) +func (r *runner) errorAt(inst instruction, err error) *Error { + pos := getPosition(inst.llvmInst) return &Error{ - ImportPath: e.packagePath, + ImportPath: r.pkgName, Pos: pos, Err: err, - Traceback: []ErrorLine{{pos, inst}}, + Traceback: []ErrorLine{{pos, inst.llvmInst}}, } } diff --git a/interp/frame.go b/interp/frame.go deleted file mode 100644 index 16984f93f4..0000000000 --- a/interp/frame.go +++ /dev/null @@ -1,708 +0,0 @@ -package interp - -// This file implements the core interpretation routines, interpreting single -// functions. - -import ( - "errors" - "strings" - - "tinygo.org/x/go-llvm" -) - -type frame struct { - *evalPackage - fn llvm.Value - locals map[llvm.Value]Value -} - -// evalBasicBlock evaluates a single basic block, returning the return value (if -// ending with a ret instruction), a list of outgoing basic blocks (if not -// ending with a ret instruction), or an error on failure. -// Most of it works at compile time. Some calls get translated into calls to be -// executed at runtime: calls to functions with side effects, external calls, -// and operations on the result of such instructions. -func (fr *frame) evalBasicBlock(bb, incoming llvm.BasicBlock, indent string) (retval Value, outgoing []llvm.Value, err *Error) { - for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) { - if fr.Debug { - print(indent) - inst.Dump() - println() - } - switch { - case !inst.IsABinaryOperator().IsNil(): - lhs := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying - rhs := fr.getLocal(inst.Operand(1)).(*LocalValue).Underlying - - switch inst.InstructionOpcode() { - // Standard binary operators - case llvm.Add: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateAdd(lhs, rhs, "")} - case llvm.FAdd: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFAdd(lhs, rhs, "")} - case llvm.Sub: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateSub(lhs, rhs, "")} - case llvm.FSub: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFSub(lhs, rhs, "")} - case llvm.Mul: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateMul(lhs, rhs, "")} - case llvm.FMul: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFMul(lhs, rhs, "")} - case llvm.UDiv: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateUDiv(lhs, rhs, "")} - case llvm.SDiv: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateSDiv(lhs, rhs, "")} - case llvm.FDiv: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFDiv(lhs, rhs, "")} - case llvm.URem: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateURem(lhs, rhs, "")} - case llvm.SRem: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateSRem(lhs, rhs, "")} - case llvm.FRem: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFRem(lhs, rhs, "")} - - // Logical operators - case llvm.Shl: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateShl(lhs, rhs, "")} - case llvm.LShr: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateLShr(lhs, rhs, "")} - case llvm.AShr: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateAShr(lhs, rhs, "")} - case llvm.And: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateAnd(lhs, rhs, "")} - case llvm.Or: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateOr(lhs, rhs, "")} - case llvm.Xor: - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateXor(lhs, rhs, "")} - - default: - return nil, nil, fr.unsupportedInstructionError(inst) - } - - // Memory operators - case !inst.IsAAllocaInst().IsNil(): - allocType := inst.Type().ElementType() - alloca := llvm.AddGlobal(fr.Mod, allocType, fr.packagePath+"$alloca") - alloca.SetInitializer(llvm.ConstNull(allocType)) - alloca.SetLinkage(llvm.InternalLinkage) - fr.locals[inst] = &LocalValue{ - Underlying: alloca, - Eval: fr.Eval, - } - case !inst.IsALoadInst().IsNil(): - operand := fr.getLocal(inst.Operand(0)).(*LocalValue) - var value llvm.Value - if !operand.IsConstant() || inst.IsVolatile() || (!operand.Underlying.IsAConstantExpr().IsNil() && operand.Underlying.Opcode() == llvm.BitCast) { - value = fr.builder.CreateLoad(operand.Value(), inst.Name()) - } else { - var err error - value, err = operand.Load() - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - } - if value.Type() != inst.Type() { - return nil, nil, fr.errorAt(inst, errors.New("interp: load: type does not match")) - } - fr.locals[inst] = fr.getValue(value) - case !inst.IsAStoreInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - ptr := fr.getLocal(inst.Operand(1)) - if inst.IsVolatile() { - fr.builder.CreateStore(value.Value(), ptr.Value()) - } else { - err := ptr.Store(value.Value()) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - } - case !inst.IsAGetElementPtrInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - llvmIndices := make([]llvm.Value, inst.OperandsCount()-1) - for i := range llvmIndices { - llvmIndices[i] = inst.Operand(i + 1) - } - indices := make([]uint32, len(llvmIndices)) - for i, llvmIndex := range llvmIndices { - operand := fr.getLocal(llvmIndex) - if !operand.IsConstant() { - // Not a constant operation. - // This should be detected by the scanner, but isn't at the - // moment. - return nil, nil, fr.errorAt(inst, errors.New("todo: non-const gep")) - } - indices[i] = uint32(operand.Value().ZExtValue()) - } - result, err := value.GetElementPtr(indices) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - if result.Type() != inst.Type() { - return nil, nil, fr.errorAt(inst, errors.New("interp: gep: type does not match")) - } - fr.locals[inst] = result - - // Cast operators - case !inst.IsATruncInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateTrunc(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAZExtInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateZExt(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsASExtInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateSExt(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAFPToUIInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFPToUI(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAFPToSIInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFPToSI(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAUIToFPInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateUIToFP(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsASIToFPInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateSIToFP(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAFPTruncInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFPTrunc(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAFPExtInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFPExt(value.(*LocalValue).Value(), inst.Type(), "")} - case !inst.IsAPtrToIntInst().IsNil(): - value := fr.getLocal(inst.Operand(0)) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreatePtrToInt(value.Value(), inst.Type(), "")} - case !inst.IsABitCastInst().IsNil() && inst.Type().TypeKind() == llvm.PointerTypeKind: - operand := inst.Operand(0) - if !operand.IsACallInst().IsNil() { - fn := operand.CalledValue() - if !fn.IsAFunction().IsNil() && fn.Name() == "runtime.alloc" { - continue // special case: bitcast of alloc - } - } - if _, ok := fr.getLocal(operand).(*MapValue); ok { - // Special case for runtime.trackPointer calls. - // Note: this might not be entirely sound in some rare cases - // where the map is stored in a dirty global. - uses := getUses(inst) - if len(uses) == 1 { - use := uses[0] - if !use.IsACallInst().IsNil() && !use.CalledValue().IsAFunction().IsNil() && use.CalledValue().Name() == "runtime.trackPointer" { - continue - } - } - // It is not possible in Go to bitcast a map value to a pointer. - return nil, nil, fr.errorAt(inst, errors.New("unimplemented: bitcast of map")) - } - value := fr.getLocal(operand).(*LocalValue) - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateBitCast(value.Value(), inst.Type(), "")} - - // Other operators - case !inst.IsAICmpInst().IsNil(): - lhs := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying - rhs := fr.getLocal(inst.Operand(1)).(*LocalValue).Underlying - predicate := inst.IntPredicate() - if predicate == llvm.IntEQ { - var lhsZero, rhsZero bool - var ok1, ok2 bool - if lhs.Type().TypeKind() == llvm.PointerTypeKind { - // Unfortunately, the const propagation in the IR builder - // doesn't handle pointer compares of inttoptr values. So we - // implement it manually here. - lhsZero, ok1 = isPointerNil(lhs) - rhsZero, ok2 = isPointerNil(rhs) - } - if lhs.Type().TypeKind() == llvm.IntegerTypeKind { - lhsZero, ok1 = isZero(lhs) - rhsZero, ok2 = isZero(rhs) - } - if ok1 && ok2 { - if lhsZero && rhsZero { - // Both are zero, so this icmp is always evaluated to true. - fr.locals[inst] = &LocalValue{fr.Eval, llvm.ConstInt(fr.Mod.Context().Int1Type(), 1, false)} - continue - } - if lhsZero != rhsZero { - // Only one of them is zero, so this comparison must return false. - fr.locals[inst] = &LocalValue{fr.Eval, llvm.ConstInt(fr.Mod.Context().Int1Type(), 0, false)} - continue - } - } - } - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateICmp(predicate, lhs, rhs, "")} - case !inst.IsAFCmpInst().IsNil(): - lhs := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying - rhs := fr.getLocal(inst.Operand(1)).(*LocalValue).Underlying - predicate := inst.FloatPredicate() - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateFCmp(predicate, lhs, rhs, "")} - case !inst.IsAPHINode().IsNil(): - for i := 0; i < inst.IncomingCount(); i++ { - if inst.IncomingBlock(i) == incoming { - fr.locals[inst] = fr.getLocal(inst.IncomingValue(i)) - } - } - case !inst.IsACallInst().IsNil(): - callee := inst.CalledValue() - switch { - case callee.Name() == "runtime.alloc": - // heap allocation - users := getUses(inst) - var resultInst = inst - if len(users) == 1 && !users[0].IsABitCastInst().IsNil() { - // happens when allocating something other than i8* - resultInst = users[0] - } - size := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying.ZExtValue() - allocType := resultInst.Type().ElementType() - typeSize := fr.TargetData.TypeAllocSize(allocType) - elementCount := 1 - if size != typeSize { - // allocate an array - if size%typeSize != 0 { - return nil, nil, fr.unsupportedInstructionError(inst) - } - elementCount = int(size / typeSize) - allocType = llvm.ArrayType(allocType, elementCount) - } - alloc := llvm.AddGlobal(fr.Mod, allocType, fr.packagePath+"$alloc") - alloc.SetInitializer(llvm.ConstNull(allocType)) - alloc.SetLinkage(llvm.InternalLinkage) - result := &LocalValue{ - Underlying: alloc, - Eval: fr.Eval, - } - if elementCount == 1 { - fr.locals[resultInst] = result - } else { - result, err := result.GetElementPtr([]uint32{0, 0}) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - fr.locals[resultInst] = result - } - case callee.Name() == "runtime.hashmapMake": - // create a map - keySize := inst.Operand(0).ZExtValue() - valueSize := inst.Operand(1).ZExtValue() - fr.locals[inst] = &MapValue{ - Eval: fr.Eval, - PkgName: fr.packagePath, - KeySize: int(keySize), - ValueSize: int(valueSize), - } - case callee.Name() == "runtime.hashmapStringSet": - // set a string key in the map - keyBuf := fr.getLocal(inst.Operand(1)).(*LocalValue) - keyLen := fr.getLocal(inst.Operand(2)).(*LocalValue) - valPtr := fr.getLocal(inst.Operand(3)).(*LocalValue) - m, ok := fr.getLocal(inst.Operand(0)).(*MapValue) - if !ok || !keyBuf.IsConstant() || !keyLen.IsConstant() || !valPtr.IsConstant() { - // The mapassign operation could not be done at compile - // time. Do it at runtime instead. - m := fr.getLocal(inst.Operand(0)).Value() - fr.markDirty(m) - llvmParams := []llvm.Value{ - m, // *runtime.hashmap - fr.getLocal(inst.Operand(1)).Value(), // key.ptr - fr.getLocal(inst.Operand(2)).Value(), // key.len - fr.getLocal(inst.Operand(3)).Value(), // value (unsafe.Pointer) - fr.getLocal(inst.Operand(4)).Value(), // context - fr.getLocal(inst.Operand(5)).Value(), // parentHandle - } - fr.builder.CreateCall(callee, llvmParams, "") - continue - } - // "key" is a Go string value, which in the TinyGo calling convention is split up - // into separate pointer and length parameters. - err := m.PutString(keyBuf, keyLen, valPtr) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - case callee.Name() == "runtime.hashmapBinarySet": - // set a binary (int etc.) key in the map - keyBuf := fr.getLocal(inst.Operand(1)).(*LocalValue) - valPtr := fr.getLocal(inst.Operand(2)).(*LocalValue) - m, ok := fr.getLocal(inst.Operand(0)).(*MapValue) - if !ok || !keyBuf.IsConstant() || !valPtr.IsConstant() { - // The mapassign operation could not be done at compile - // time. Do it at runtime instead. - m := fr.getLocal(inst.Operand(0)).Value() - fr.markDirty(m) - llvmParams := []llvm.Value{ - m, // *runtime.hashmap - fr.getLocal(inst.Operand(1)).Value(), // key - fr.getLocal(inst.Operand(2)).Value(), // value - fr.getLocal(inst.Operand(3)).Value(), // context - fr.getLocal(inst.Operand(4)).Value(), // parentHandle - } - fr.builder.CreateCall(callee, llvmParams, "") - continue - } - err := m.PutBinary(keyBuf, valPtr) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - case callee.Name() == "runtime.stringConcat": - // adding two strings together - buf1Ptr := fr.getLocal(inst.Operand(0)) - buf1Len := fr.getLocal(inst.Operand(1)) - buf2Ptr := fr.getLocal(inst.Operand(2)) - buf2Len := fr.getLocal(inst.Operand(3)) - buf1, err := getStringBytes(buf1Ptr, buf1Len.Value()) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - buf2, err := getStringBytes(buf2Ptr, buf2Len.Value()) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - result := []byte(string(buf1) + string(buf2)) - vals := make([]llvm.Value, len(result)) - for i := range vals { - vals[i] = llvm.ConstInt(fr.Mod.Context().Int8Type(), uint64(result[i]), false) - } - globalType := llvm.ArrayType(fr.Mod.Context().Int8Type(), len(result)) - globalValue := llvm.ConstArray(fr.Mod.Context().Int8Type(), vals) - global := llvm.AddGlobal(fr.Mod, globalType, fr.packagePath+"$stringconcat") - global.SetInitializer(globalValue) - global.SetLinkage(llvm.InternalLinkage) - global.SetGlobalConstant(true) - global.SetUnnamedAddr(true) - stringType := fr.Mod.GetTypeByName("runtime._string") - retPtr := llvm.ConstGEP(global, getLLVMIndices(fr.Mod.Context().Int32Type(), []uint32{0, 0})) - retLen := llvm.ConstInt(stringType.StructElementTypes()[1], uint64(len(result)), false) - ret := llvm.ConstNull(stringType) - ret = llvm.ConstInsertValue(ret, retPtr, []uint32{0}) - ret = llvm.ConstInsertValue(ret, retLen, []uint32{1}) - fr.locals[inst] = &LocalValue{fr.Eval, ret} - case callee.Name() == "runtime.sliceCopy": - elementSize := fr.getLocal(inst.Operand(4)).(*LocalValue).Value().ZExtValue() - dstArray := fr.getLocal(inst.Operand(0)).(*LocalValue).stripPointerCasts() - srcArray := fr.getLocal(inst.Operand(1)).(*LocalValue).stripPointerCasts() - dstLen := fr.getLocal(inst.Operand(2)).(*LocalValue) - srcLen := fr.getLocal(inst.Operand(3)).(*LocalValue) - if elementSize != 1 && dstArray.Type().ElementType().TypeKind() == llvm.ArrayTypeKind && srcArray.Type().ElementType().TypeKind() == llvm.ArrayTypeKind { - // Slice data pointers are created by adding a global array - // and getting the address of the first element using a GEP. - // However, before the compiler can pass it to - // runtime.sliceCopy, it has to perform a bitcast to a *i8, - // to make it a unsafe.Pointer. Now, when the IR builder - // sees a bitcast of a GEP with zero indices, it will make - // a bitcast of the original array instead of the GEP, - // which breaks our assumptions. - // Re-add this GEP, in the hope that it it is then of the correct type... - dstArrayValue, err := dstArray.GetElementPtr([]uint32{0, 0}) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - dstArray = dstArrayValue.(*LocalValue) - srcArrayValue, err := srcArray.GetElementPtr([]uint32{0, 0}) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - srcArray = srcArrayValue.(*LocalValue) - } - if fr.Eval.TargetData.TypeAllocSize(dstArray.Type().ElementType()) != elementSize { - return nil, nil, fr.errorAt(inst, errors.New("interp: slice dst element size does not match pointer type")) - } - if fr.Eval.TargetData.TypeAllocSize(srcArray.Type().ElementType()) != elementSize { - return nil, nil, fr.errorAt(inst, errors.New("interp: slice src element size does not match pointer type")) - } - if dstArray.Type() != srcArray.Type() { - return nil, nil, fr.errorAt(inst, errors.New("interp: slice element types don't match")) - } - length := dstLen.Value().SExtValue() - if srcLength := srcLen.Value().SExtValue(); srcLength < length { - length = srcLength - } - if length < 0 { - return nil, nil, fr.errorAt(inst, errors.New("interp: trying to copy a slice with negative length?")) - } - for i := int64(0); i < length; i++ { - // *dst = *src - val, err := srcArray.Load() - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - err = dstArray.Store(val) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - // dst++ - dstArrayValue, err := dstArray.GetElementPtr([]uint32{1}) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - dstArray = dstArrayValue.(*LocalValue) - // src++ - srcArrayValue, err := srcArray.GetElementPtr([]uint32{1}) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - srcArray = srcArrayValue.(*LocalValue) - } - case callee.Name() == "runtime.stringToBytes": - // convert a string to a []byte - bufPtr := fr.getLocal(inst.Operand(0)) - bufLen := fr.getLocal(inst.Operand(1)) - result, err := getStringBytes(bufPtr, bufLen.Value()) - if err != nil { - return nil, nil, fr.errorAt(inst, err) - } - vals := make([]llvm.Value, len(result)) - for i := range vals { - vals[i] = llvm.ConstInt(fr.Mod.Context().Int8Type(), uint64(result[i]), false) - } - globalType := llvm.ArrayType(fr.Mod.Context().Int8Type(), len(result)) - globalValue := llvm.ConstArray(fr.Mod.Context().Int8Type(), vals) - global := llvm.AddGlobal(fr.Mod, globalType, fr.packagePath+"$bytes") - global.SetInitializer(globalValue) - global.SetLinkage(llvm.InternalLinkage) - global.SetGlobalConstant(true) - global.SetUnnamedAddr(true) - sliceType := inst.Type() - retPtr := llvm.ConstGEP(global, getLLVMIndices(fr.Mod.Context().Int32Type(), []uint32{0, 0})) - retLen := llvm.ConstInt(sliceType.StructElementTypes()[1], uint64(len(result)), false) - ret := llvm.ConstNull(sliceType) - ret = llvm.ConstInsertValue(ret, retPtr, []uint32{0}) // ptr - ret = llvm.ConstInsertValue(ret, retLen, []uint32{1}) // len - ret = llvm.ConstInsertValue(ret, retLen, []uint32{2}) // cap - fr.locals[inst] = &LocalValue{fr.Eval, ret} - case callee.Name() == "runtime.typeAssert": - actualTypeInt := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying - assertedType := fr.getLocal(inst.Operand(1)).(*LocalValue).Underlying - if actualTypeInt.IsAConstantExpr().IsNil() || actualTypeInt.Opcode() != llvm.PtrToInt { - return nil, nil, fr.errorAt(inst, errors.New("interp: expected typecode in runtime.typeAssert to be a ptrtoint")) - } - actualType := actualTypeInt.Operand(0) - if actualType.IsAConstant().IsNil() || assertedType.IsAConstant().IsNil() { - return nil, nil, fr.errorAt(inst, errors.New("interp: unimplemented: type assert with non-constant interface value")) - } - assertOk := uint64(0) - if llvm.ConstExtractValue(actualType.Initializer(), []uint32{0}) == assertedType { - assertOk = 1 - } - fr.locals[inst] = &LocalValue{fr.Eval, llvm.ConstInt(fr.Mod.Context().Int1Type(), assertOk, false)} - case callee.Name() == "runtime.interfaceImplements": - typecode := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying - interfaceMethodSet := fr.getLocal(inst.Operand(1)).(*LocalValue).Underlying - if typecode.IsAConstantExpr().IsNil() || typecode.Opcode() != llvm.PtrToInt { - return nil, nil, fr.errorAt(inst, errors.New("interp: expected typecode to be a ptrtoint")) - } - typecode = typecode.Operand(0) - if interfaceMethodSet.IsAConstantExpr().IsNil() || interfaceMethodSet.Opcode() != llvm.GetElementPtr { - return nil, nil, fr.errorAt(inst, errors.New("interp: expected method set in runtime.interfaceImplements to be a constant gep")) - } - interfaceMethodSet = interfaceMethodSet.Operand(0).Initializer() - methodSet := llvm.ConstExtractValue(typecode.Initializer(), []uint32{1}) - if methodSet.IsAConstantExpr().IsNil() || methodSet.Opcode() != llvm.GetElementPtr { - return nil, nil, fr.errorAt(inst, errors.New("interp: expected method set to be a constant gep")) - } - methodSet = methodSet.Operand(0).Initializer() - - // Make a set of all the methods on the concrete type, for - // easier checking in the next step. - definedMethods := map[string]struct{}{} - for i := 0; i < methodSet.Type().ArrayLength(); i++ { - methodInfo := llvm.ConstExtractValue(methodSet, []uint32{uint32(i)}) - name := llvm.ConstExtractValue(methodInfo, []uint32{0}).Name() - definedMethods[name] = struct{}{} - } - // Check whether all interface methods are also in the list - // of defined methods calculated above. - implements := uint64(1) // i1 true - for i := 0; i < interfaceMethodSet.Type().ArrayLength(); i++ { - name := llvm.ConstExtractValue(interfaceMethodSet, []uint32{uint32(i)}).Name() - if _, ok := definedMethods[name]; !ok { - // There is a method on the interface that is not - // implemented by the type. - implements = 0 // i1 false - break - } - } - fr.locals[inst] = &LocalValue{fr.Eval, llvm.ConstInt(fr.Mod.Context().Int1Type(), implements, false)} - case callee.Name() == "runtime.nanotime": - fr.locals[inst] = &LocalValue{fr.Eval, llvm.ConstInt(fr.Mod.Context().Int64Type(), 0, false)} - case callee.Name() == "llvm.dbg.value": - // do nothing - case strings.HasPrefix(callee.Name(), "llvm.lifetime."): - // do nothing - case callee.Name() == "runtime.trackPointer": - // do nothing - case strings.HasPrefix(callee.Name(), "runtime.print") || callee.Name() == "runtime._panic": - // This are all print instructions, which necessarily have side - // effects but no results. - // TODO: print an error when executing runtime._panic (with the - // exact error message it would print at runtime). - var params []llvm.Value - for i := 0; i < inst.OperandsCount()-1; i++ { - operand := fr.getLocal(inst.Operand(i)).Value() - fr.markDirty(operand) - params = append(params, operand) - } - // TODO: accurate debug info, including call chain - fr.builder.CreateCall(callee, params, inst.Name()) - case !callee.IsAFunction().IsNil() && callee.IsDeclaration(): - // external functions - var params []llvm.Value - for i := 0; i < inst.OperandsCount()-1; i++ { - operand := fr.getLocal(inst.Operand(i)).Value() - fr.markDirty(operand) - params = append(params, operand) - } - // TODO: accurate debug info, including call chain - result := fr.builder.CreateCall(callee, params, inst.Name()) - if inst.Type().TypeKind() != llvm.VoidTypeKind { - fr.markDirty(result) - fr.locals[inst] = &LocalValue{fr.Eval, result} - } - case !callee.IsAFunction().IsNil(): - // regular function - var params []Value - dirtyParams := false - for i := 0; i < inst.OperandsCount()-1; i++ { - local := fr.getLocal(inst.Operand(i)) - if !local.IsConstant() { - dirtyParams = true - } - params = append(params, local) - } - var ret Value - scanResult, err := fr.hasSideEffects(callee) - if err != nil { - return nil, nil, err - } - if scanResult.severity == sideEffectLimited || dirtyParams && scanResult.severity != sideEffectAll { - // Side effect is bounded. This means the operation invokes - // side effects (like calling an external function) but it - // is known at compile time which side effects it invokes. - // This means the function can be called at runtime and the - // affected globals can be marked dirty at compile time. - llvmParams := make([]llvm.Value, len(params)) - for i, param := range params { - llvmParams[i] = param.Value() - } - result := fr.builder.CreateCall(callee, llvmParams, inst.Name()) - ret = &LocalValue{fr.Eval, result} - // mark all mentioned globals as dirty - for global := range scanResult.mentionsGlobals { - fr.markDirty(global) - } - } else { - // Side effect is one of: - // * None: no side effects, can be fully interpreted at - // compile time. - // * Unbounded: cannot call at runtime so we'll try to - // interpret anyway and hope for the best. - ret, err = fr.function(callee, params, indent+" ") - if err != nil { - // Record this function call in the backtrace. - err.Traceback = append(err.Traceback, ErrorLine{ - Pos: getPosition(inst), - Inst: inst, - }) - return nil, nil, err - } - } - if inst.Type().TypeKind() != llvm.VoidTypeKind { - fr.locals[inst] = ret - } - default: - // function pointers, etc. - return nil, nil, fr.unsupportedInstructionError(inst) - } - case !inst.IsAExtractValueInst().IsNil(): - agg := fr.getLocal(inst.Operand(0)).(*LocalValue) // must be constant - indices := inst.Indices() - if agg.Underlying.IsConstant() { - newValue := llvm.ConstExtractValue(agg.Underlying, indices) - fr.locals[inst] = fr.getValue(newValue) - } else { - if len(indices) != 1 { - return nil, nil, fr.errorAt(inst, errors.New("interp: cannot handle extractvalue with not exactly 1 index")) - } - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateExtractValue(agg.Underlying, int(indices[0]), inst.Name())} - } - case !inst.IsAInsertValueInst().IsNil(): - agg := fr.getLocal(inst.Operand(0)).(*LocalValue) // must be constant - val := fr.getLocal(inst.Operand(1)) - indices := inst.Indices() - if agg.IsConstant() && val.IsConstant() { - newValue := llvm.ConstInsertValue(agg.Underlying, val.Value(), indices) - fr.locals[inst] = &LocalValue{fr.Eval, newValue} - } else { - if len(indices) != 1 { - return nil, nil, fr.errorAt(inst, errors.New("interp: cannot handle insertvalue with not exactly 1 index")) - } - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateInsertValue(agg.Underlying, val.Value(), int(indices[0]), inst.Name())} - } - case !inst.IsASelectInst().IsNil(): - // var result T - // if cond { - // result = x - // } else { - // result = y - // } - // return result - cond := fr.getLocal(inst.Operand(0)).(*LocalValue).Underlying - x := fr.getLocal(inst.Operand(1)).(*LocalValue).Underlying - y := fr.getLocal(inst.Operand(2)).(*LocalValue).Underlying - fr.locals[inst] = &LocalValue{fr.Eval, fr.builder.CreateSelect(cond, x, y, "")} - - case !inst.IsAReturnInst().IsNil() && inst.OperandsCount() == 0: - return nil, nil, nil // ret void - case !inst.IsAReturnInst().IsNil() && inst.OperandsCount() == 1: - return fr.getLocal(inst.Operand(0)), nil, nil - case !inst.IsABranchInst().IsNil() && inst.OperandsCount() == 3: - // conditional branch (if/then/else) - cond := fr.getLocal(inst.Operand(0)).Value() - if cond.Type() != fr.Mod.Context().Int1Type() { - return nil, nil, fr.errorAt(inst, errors.New("expected an i1 in a branch instruction")) - } - thenBB := inst.Operand(1) - elseBB := inst.Operand(2) - if !cond.IsAInstruction().IsNil() { - return nil, nil, fr.errorAt(inst, errors.New("interp: branch on a non-constant")) - } - if !cond.IsAConstantExpr().IsNil() { - // This may happen when the instruction builder could not - // const-fold some instructions. - return nil, nil, fr.errorAt(inst, errors.New("interp: branch on a non-const-propagated constant expression")) - } - switch cond { - case llvm.ConstInt(fr.Mod.Context().Int1Type(), 0, false): // false - return nil, []llvm.Value{thenBB}, nil // then - case llvm.ConstInt(fr.Mod.Context().Int1Type(), 1, false): // true - return nil, []llvm.Value{elseBB}, nil // else - default: - return nil, nil, fr.errorAt(inst, errors.New("branch was not true or false")) - } - case !inst.IsABranchInst().IsNil() && inst.OperandsCount() == 1: - // unconditional branch (goto) - return nil, []llvm.Value{inst.Operand(0)}, nil - case !inst.IsAUnreachableInst().IsNil(): - // Unreachable was reached (e.g. after a call to panic()). - // Report this as an error, as it is not supposed to happen. - // This is a sentinel error value. - return nil, nil, errUnreachable - - default: - return nil, nil, fr.unsupportedInstructionError(inst) - } - } - - panic("interp: reached end of basic block without terminator") -} - -// Get the Value for an operand, which is a constant value of some sort. -func (fr *frame) getLocal(v llvm.Value) Value { - if ret, ok := fr.locals[v]; ok { - return ret - } else if value := fr.getValue(v); value != nil { - return value - } else { - // This should not happen under normal circumstances. - panic("cannot find value") - } -} diff --git a/interp/interp.go b/interp/interp.go index d5ff396568..b58d7f966f 100644 --- a/interp/interp.go +++ b/interp/interp.go @@ -1,59 +1,73 @@ -// Package interp interprets Go package initializers as much as possible. This -// avoid running them at runtime, improving code size and making other -// optimizations possible. +// Package interp is a partial evaluator of code run at package init time. See +// the README in this package for details. package interp -// This file provides the overarching Eval object with associated (utility) -// methods. - import ( + "fmt" + "os" "strings" + "time" "tinygo.org/x/go-llvm" ) -type Eval struct { - Mod llvm.Module - TargetData llvm.TargetData - Debug bool - builder llvm.Builder - dirtyGlobals map[llvm.Value]struct{} - sideEffectFuncs map[llvm.Value]*sideEffectResult // cache of side effect scan results -} +// Enable extra checks, which should be disabled by default. +// This may help track down bugs by adding a few more sanity checks. +const checks = true -// evalPackage encapsulates the Eval type for just a single package. The Eval -// type keeps state across the whole program, the evalPackage type keeps extra -// state for the currently interpreted package. -type evalPackage struct { - *Eval - packagePath string +// runner contains all state related to one interp run. +type runner struct { + mod llvm.Module + targetData llvm.TargetData + builder llvm.Builder + pointerSize uint32 // cached pointer size from the TargetData + debug bool // log debug messages + pkgName string // package name of the currently executing package + functionCache map[llvm.Value]*function // cache of compiled functions + objects []object // slice of objects in memory + globals map[llvm.Value]int // map from global to index in objects slice + start time.Time + callsExecuted uint64 } -// Run evaluates the function with the given name and then eliminates all -// callers. +// Run evaluates runtime.initAll function as much as possible at compile time. +// Set debug to true if it should print output while running. func Run(mod llvm.Module, debug bool) error { - if debug { - println("\ncompile-time evaluation:") - } - - name := "runtime.initAll" - e := &Eval{ - Mod: mod, - TargetData: llvm.NewTargetData(mod.DataLayout()), - Debug: debug, - dirtyGlobals: map[llvm.Value]struct{}{}, + r := runner{ + mod: mod, + targetData: llvm.NewTargetData(mod.DataLayout()), + debug: debug, + functionCache: make(map[llvm.Value]*function), + objects: []object{{}}, + globals: make(map[llvm.Value]int), + start: time.Now(), } - e.builder = mod.Context().NewBuilder() + r.pointerSize = uint32(r.targetData.PointerSize()) - initAll := mod.NamedFunction(name) + initAll := mod.NamedFunction("runtime.initAll") bb := initAll.EntryBasicBlock() + + // Create a builder, to insert instructions that could not be evaluated at + // compile time. + r.builder = mod.Context().NewBuilder() + defer r.builder.Dispose() + // Create a dummy alloca in the entry block that we can set the insert point // to. This is necessary because otherwise we might be removing the // instruction (init call) that we are removing after successful // interpretation. - e.builder.SetInsertPointBefore(bb.FirstInstruction()) - dummy := e.builder.CreateAlloca(e.Mod.Context().Int8Type(), "dummy") - e.builder.SetInsertPointBefore(dummy) + r.builder.SetInsertPointBefore(bb.FirstInstruction()) + dummy := r.builder.CreateAlloca(r.mod.Context().Int8Type(), "dummy") + r.builder.SetInsertPointBefore(dummy) + defer dummy.EraseFromParentAsInstruction() + + // Get a list if init calls. A runtime.initAll might look something like this: + // func initAll() { + // unsafe.init() + // machine.init() + // runtime.init() + // } + // This function gets a list of these call instructions. var initCalls []llvm.Value for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) { if inst == dummy { @@ -63,99 +77,67 @@ func Run(mod llvm.Module, debug bool) error { break // ret void } if inst.IsACallInst().IsNil() || inst.CalledValue().IsAFunction().IsNil() { - return errorAt(inst, "interp: expected all instructions in "+name+" to be direct calls") + return errorAt(inst, "interp: expected all instructions in "+initAll.Name()+" to be direct calls") } initCalls = append(initCalls, inst) } - // Do this in a separate step to avoid corrupting the iterator above. - undefPtr := llvm.Undef(llvm.PointerType(mod.Context().Int8Type(), 0)) + // Run initializers for each package. Once the package initializer is + // finished, the call to the package initializer can be removed. for _, call := range initCalls { initName := call.CalledValue().Name() if !strings.HasSuffix(initName, ".init") { - return errorAt(call, "interp: expected all instructions in "+name+" to be *.init() calls") + return errorAt(call, "interp: expected all instructions in "+initAll.Name()+" to be *.init() calls") } - pkgName := initName[:len(initName)-5] + r.pkgName = initName[:len(initName)-len(".init")] fn := call.CalledValue() - call.EraseFromParentAsInstruction() - evalPkg := evalPackage{ - Eval: e, - packagePath: pkgName, + if r.debug { + fmt.Fprintln(os.Stderr, "call:", fn.Name()) } - _, err := evalPkg.function(fn, []Value{&LocalValue{e, undefPtr}, &LocalValue{e, undefPtr}}, "") - if err == errUnreachable { - break + _, mem, callErr := r.run(r.getFunction(fn), nil, nil, " ") + if callErr != nil { + if isRecoverableError(callErr.Err) { + if r.debug { + fmt.Fprintln(os.Stderr, "not interpretring", r.pkgName, "because of error:", callErr.Err) + } + mem.revert() + continue + } + return callErr } - if err != nil { - return err + call.EraseFromParentAsInstruction() + for index, obj := range mem.objects { + r.objects[index] = obj } } + r.pkgName = "" - return nil -} - -// function interprets the given function. The params are the function params -// and the indent is the string indentation to use when dumping all interpreted -// instructions. -func (e *evalPackage) function(fn llvm.Value, params []Value, indent string) (Value, *Error) { - fr := frame{ - evalPackage: e, - fn: fn, - locals: make(map[llvm.Value]Value), - } - for i, param := range fn.Params() { - fr.locals[param] = params[i] - } - - bb := fn.EntryBasicBlock() - var lastBB llvm.BasicBlock - for { - retval, outgoing, err := fr.evalBasicBlock(bb, lastBB, indent) - if outgoing == nil { - // returned something (a value or void, or an error) - return retval, err + // Update all global variables in the LLVM module. + mem := memoryView{r: &r} + for _, obj := range r.objects { + if obj.llvmGlobal.IsNil() { + continue } - if len(outgoing) > 1 { - panic("unimplemented: multiple outgoing blocks") + if obj.buffer == nil { + continue } - next := outgoing[0] - if next.IsABasicBlock().IsNil() { - panic("did not switch to a basic block") + initializer := obj.buffer.toLLVMValue(obj.llvmGlobal.Type().ElementType(), &mem) + if checks && initializer.Type() != obj.llvmGlobal.Type().ElementType() { + panic("initializer type mismatch") } - lastBB = bb - bb = next.AsBasicBlock() + obj.llvmGlobal.SetInitializer(initializer) } -} -// getValue determines what kind of LLVM value it gets and returns the -// appropriate Value type. -func (e *Eval) getValue(v llvm.Value) Value { - return &LocalValue{e, v} + return nil } -// markDirty marks the passed-in LLVM value dirty, recursively. For example, -// when it encounters a constant GEP on a global, it marks the global dirty. -func (e *Eval) markDirty(v llvm.Value) { - if !v.IsAGlobalVariable().IsNil() { - if v.IsGlobalConstant() { - return - } - if _, ok := e.dirtyGlobals[v]; !ok { - e.dirtyGlobals[v] = struct{}{} - e.sideEffectFuncs = nil // re-calculate all side effects - } - } else if v.IsConstant() { - if v.OperandsCount() >= 2 && !v.Operand(0).IsAGlobalVariable().IsNil() { - // looks like a constant getelementptr of a global. - // TODO: find a way to make sure it really is: v.Opcode() returns 0. - e.markDirty(v.Operand(0)) - return - } - return // nothing to mark - } else if !v.IsAGetElementPtrInst().IsNil() { - panic("interp: todo: GEP") - } else { - // Not constant and not a global or GEP so doesn't have to be marked - // non-constant. +// getFunction returns the compiled version of the given LLVM function. It +// compiles the function if necessary and caches the result. +func (r *runner) getFunction(llvmFn llvm.Value) *function { + if fn, ok := r.functionCache[llvmFn]; ok { + return fn } + fn := r.compileFunction(llvmFn) + r.functionCache[llvmFn] = fn + return fn } diff --git a/interp/interp_test.go b/interp/interp_test.go index dba7387187..29a405dc53 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -42,9 +42,29 @@ func runTest(t *testing.T, pathPrefix string) { // Perform the transform. err = Run(mod, false) if err != nil { + if err, match := err.(*Error); match { + println(err.Error()) + if !err.Inst.IsNil() { + err.Inst.Dump() + println() + } + if len(err.Traceback) > 0 { + println("\ntraceback:") + for _, line := range err.Traceback { + println(line.Pos.String() + ":") + line.Inst.Dump() + println() + } + } + } t.Fatal(err) } + // To be sure, verify that the module is still valid. + if llvm.VerifyModule(mod, llvm.PrintMessageAction) != nil { + t.FailNow() + } + // Run some cleanup passes to get easy-to-read outputs. pm := llvm.NewPassManager() defer pm.Dispose() diff --git a/interp/interpreter.go b/interp/interpreter.go new file mode 100644 index 0000000000..36a845d7c5 --- /dev/null +++ b/interp/interpreter.go @@ -0,0 +1,917 @@ +package interp + +import ( + "errors" + "fmt" + "math" + "os" + "strings" + "time" + + "tinygo.org/x/go-llvm" +) + +func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent string) (value, memoryView, *Error) { + mem := memoryView{r: r, parent: parentMem} + locals := make([]value, len(fn.locals)) + r.callsExecuted++ + + if time.Since(r.start) > time.Minute { + // Running for more than a minute. This should never happen. + return nil, mem, r.errorAt(fn.blocks[0].instructions[0], fmt.Errorf("interp: running for more than a minute, timing out (executed calls: %d)", r.callsExecuted)) + } + + // Parameters are considered a kind of local values. + for i, param := range params { + locals[i] = param + } + + // Start with the first basic block and the first instruction. + // Branch instructions may modify both bb and instIndex when branching. + bb := fn.blocks[0] + currentBB := 0 + lastBB := -1 // last basic block is undefined, only defined after a branch + var operands []value + for instIndex := 0; instIndex < len(bb.instructions); instIndex++ { + inst := bb.instructions[instIndex] + operands = operands[:0] + isRuntimeInst := false + if inst.opcode != llvm.PHI { + for _, v := range inst.operands { + if v, ok := v.(localValue); ok { + if localVal := locals[fn.locals[v.value]]; localVal == nil { + return nil, mem, r.errorAt(inst, errors.New("interp: local not defined")) + } else { + operands = append(operands, localVal) + if _, ok := localVal.(localValue); ok { + isRuntimeInst = true + } + continue + } + } + operands = append(operands, v) + } + } + if isRuntimeInst { + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + continue + } + switch inst.opcode { + case llvm.Ret: + if len(operands) != 0 { + if r.debug { + fmt.Fprintln(os.Stderr, indent+"ret", operands[0]) + } + // Return instruction has a value to return. + return operands[0], mem, nil + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"ret") + } + // Return instruction doesn't return anything, it's just 'ret void'. + return nil, mem, nil + case llvm.Br: + switch len(operands) { + case 1: + // Unconditional branch: [nextBB] + lastBB = currentBB + currentBB = int(operands[0].(literalValue).value.(uint32)) + bb = fn.blocks[currentBB] + instIndex = -1 // start at 0 the next cycle + if r.debug { + fmt.Fprintln(os.Stderr, indent+"br", operands, "->", currentBB) + } + case 3: + // Conditional branch: [cond, thenBB, elseBB] + lastBB = currentBB + switch operands[0].Uint() { + case 1: // true -> thenBB + currentBB = int(operands[1].(literalValue).value.(uint32)) + case 0: // false -> elseBB + currentBB = int(operands[2].(literalValue).value.(uint32)) + default: + panic("bool should be 0 or 1") + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"br", operands, "->", currentBB) + } + bb = fn.blocks[currentBB] + instIndex = -1 // start at 0 the next cycle + default: + panic("unknown operands length") + } + break // continue with next block + case llvm.PHI: + var result value + for i := 0; i < len(inst.operands); i += 2 { + if int(inst.operands[i].(literalValue).value.(uint32)) == lastBB { + incoming := inst.operands[i+1] + if local, ok := incoming.(localValue); ok { + result = locals[fn.locals[local.value]] + } else { + result = incoming + } + break + } + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"phi", inst.operands, "->", result) + } + if result == nil { + panic("could not find PHI input") + } + locals[inst.localIndex] = result + case llvm.Select: + // Select is much like a ternary operator: it picks a result from + // the second and third operand based on the boolean first operand. + var result value + switch operands[0].Uint() { + case 1: + result = operands[1] + case 0: + result = operands[2] + default: + panic("boolean must be 0 or 1") + } + locals[inst.localIndex] = result + if r.debug { + fmt.Fprintln(os.Stderr, indent+"select", operands, "->", result) + } + case llvm.Call: + // A call instruction can either be a regular call or a runtime intrinsic. + fnPtr, err := operands[0].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + callFn := r.getFunction(fnPtr.llvmValue(&mem)) + switch { + case callFn.name == "runtime.trackPointer": + // Allocas and such are created as globals, so don't need a + // runtime.trackPointer. + // Unless the object is allocated at runtime for example, in + // which case this call won't even get to this point but will + // already be emitted in initAll. + continue + case callFn.name == "(reflect.Type).Elem" || strings.HasPrefix(callFn.name, "runtime.print") || callFn.name == "runtime._panic" || callFn.name == "runtime.hashmapGet": + // These functions should be run at runtime. Specifically: + // * (reflect.Type).Elem is a special function. It should + // eventually be interpreted, but fall back to a runtime call + // for now. + // * Print and panic functions are best emitted directly without + // interpreting them, otherwise we get a ton of putchar (etc.) + // calls. + // * runtime.hashmapGet tries to access the map value directly. + // This is not possible as the map value is treated as a special + // kind of object in this package. + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + case callFn.name == "runtime.nanotime" && r.pkgName == "time": + // The time package contains a call to runtime.nanotime. + // This appears to be to work around a limitation in Windows + // Server 2008: + // > Monotonic times are reported as offsets from startNano. + // > We initialize startNano to runtimeNano() - 1 so that on systems where + // > monotonic time resolution is fairly low (e.g. Windows 2008 + // > which appears to have a default resolution of 15ms), + // > we avoid ever reporting a monotonic time of 0. + // > (Callers may want to use 0 as "time not set".) + // Simply let runtime.nanotime return 0 in this case, which + // should be fine and avoids a call to runtime.nanotime. It + // means that monotonic time in the time package is counted from + // time.Time{}.Sub(1), which should be fine. + locals[inst.localIndex] = literalValue{uint64(0)} + case callFn.name == "runtime.alloc": + // Allocate heap memory. At compile time, this is instead done + // by creating a global variable. + + // Get the requested memory size to be allocated. + size := operands[1].Uint() + + // Create the object. + alloc := object{ + globalName: r.pkgName + "$alloc", + buffer: newRawValue(uint32(size)), + size: uint32(size), + } + index := len(r.objects) + r.objects = append(r.objects, alloc) + + // And create a pointer to this object, for working with it (so + // that stores to it copy it, etc). + ptr := newPointerValue(r, index, 0) + if r.debug { + fmt.Fprintln(os.Stderr, indent+"runtime.alloc:", size, "->", ptr) + } + locals[inst.localIndex] = ptr + case callFn.name == "runtime.sliceCopy": + // sliceCopy implements the built-in copy function for slices. + // It is implemented here so that it can be used even if the + // runtime implementation is not available. Doing it this way + // may also be faster. + // Code: + // func sliceCopy(dst, src unsafe.Pointer, dstLen, srcLen uintptr, elemSize uintptr) int { + // n := srcLen + // if n > dstLen { + // n = dstLen + // } + // memmove(dst, src, n*elemSize) + // return int(n) + // } + dstLen := operands[3].Uint() + srcLen := operands[4].Uint() + elemSize := operands[5].Uint() + n := srcLen + if n > dstLen { + n = dstLen + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"copy:", operands[1], operands[2], n) + } + if n != 0 { + // Only try to copy bytes when there are any bytes to copy. + // This is not just an optimization. If one of the slices + // (or both) are nil, the asPointer method call will fail + // even though copying a nil slice is allowed. + dst, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + src, err := operands[2].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + nBytes := uint32(n * elemSize) + dstObj := mem.getWritable(dst.index()) + dstBuf := dstObj.buffer.asRawValue(r) + srcBuf := mem.get(src.index()).buffer.asRawValue(r) + copy(dstBuf.buf[dst.offset():dst.offset()+nBytes], srcBuf.buf[src.offset():]) + dstObj.buffer = dstBuf + mem.put(dst.index(), dstObj) + } + switch inst.llvmInst.Type().IntTypeWidth() { + case 16: + locals[inst.localIndex] = literalValue{uint16(n)} + case 32: + locals[inst.localIndex] = literalValue{uint32(n)} + case 64: + locals[inst.localIndex] = literalValue{uint64(n)} + default: + panic("unknown integer type width") + } + case strings.HasPrefix(callFn.name, "llvm.memcpy.p0i8.p0i8.") || strings.HasPrefix(callFn.name, "llvm.memmove.p0i8.p0i8."): + // Copy a block of memory from one pointer to another. + dst, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + src, err := operands[2].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + nBytes := uint32(operands[3].Uint()) + dstObj := mem.getWritable(dst.index()) + dstBuf := dstObj.buffer.asRawValue(r) + srcBuf := mem.get(src.index()).buffer.asRawValue(r) + copy(dstBuf.buf[dst.offset():dst.offset()+nBytes], srcBuf.buf[src.offset():]) + dstObj.buffer = dstBuf + mem.put(dst.index(), dstObj) + case callFn.name == "runtime.typeAssert": + // This function must be implemented manually as it is normally + // implemented by the interface lowering pass. + if r.debug { + fmt.Fprintln(os.Stderr, indent+"typeassert:", operands[1:]) + } + typeInInterfacePtr, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + actualType, err := mem.load(typeInInterfacePtr, r.pointerSize).asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + assertedType, err := operands[2].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + result := assertedType.asRawValue(r).equal(actualType.asRawValue(r)) + if result { + locals[inst.localIndex] = literalValue{uint8(1)} + } else { + locals[inst.localIndex] = literalValue{uint8(0)} + } + case callFn.name == "runtime.interfaceImplements": + if r.debug { + fmt.Fprintln(os.Stderr, indent+"interface assert:", operands[1:]) + } + + // Load various values for the interface implements check below. + typeInInterfacePtr, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + methodSetPtr, err := mem.load(typeInInterfacePtr.addOffset(r.pointerSize), r.pointerSize).asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + methodSet := mem.get(methodSetPtr.index()).llvmGlobal.Initializer() + interfaceMethodSetPtr, err := operands[2].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + interfaceMethodSet := mem.get(interfaceMethodSetPtr.index()).llvmGlobal.Initializer() + + // Make a set of all the methods on the concrete type, for + // easier checking in the next step. + concreteTypeMethods := map[string]struct{}{} + for i := 0; i < methodSet.Type().ArrayLength(); i++ { + methodInfo := llvm.ConstExtractValue(methodSet, []uint32{uint32(i)}) + name := llvm.ConstExtractValue(methodInfo, []uint32{0}).Name() + concreteTypeMethods[name] = struct{}{} + } + + // Check whether all interface methods are also in the list + // of defined methods calculated above. This is the interface + // assert itself. + assertOk := uint8(1) // i1 true + for i := 0; i < interfaceMethodSet.Type().ArrayLength(); i++ { + name := llvm.ConstExtractValue(interfaceMethodSet, []uint32{uint32(i)}).Name() + if _, ok := concreteTypeMethods[name]; !ok { + // There is a method on the interface that is not + // implemented by the type. The assertion will fail. + assertOk = 0 // i1 false + break + } + } + // If assertOk is still 1, the assertion succeeded. + locals[inst.localIndex] = literalValue{assertOk} + case callFn.name == "runtime.hashmapMake": + // Create a new map. + hashmapPointerType := inst.llvmInst.Type() + keySize := uint32(operands[1].Uint()) + valueSize := uint32(operands[2].Uint()) + m := newMapValue(r, hashmapPointerType, keySize, valueSize) + alloc := object{ + llvmType: hashmapPointerType, + globalName: r.pkgName + "$map", + buffer: m, + size: m.len(r), + } + index := len(r.objects) + r.objects = append(r.objects, alloc) + + // Create a pointer to this map. Maps are reference types, so + // are implemented as pointers. + ptr := newPointerValue(r, index, 0) + if r.debug { + fmt.Fprintln(os.Stderr, indent+"runtime.hashmapMake:", keySize, valueSize, "->", ptr) + } + locals[inst.localIndex] = ptr + case callFn.name == "runtime.hashmapBinarySet": + // Do a mapassign operation with a binary key (that is, without + // a string key). + if r.debug { + fmt.Fprintln(os.Stderr, indent+"runtime.hashmapBinarySet:", operands[1:]) + } + mapPtr, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + m := mem.getWritable(mapPtr.index()).buffer.(*mapValue) + keyPtr, err := operands[2].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + valuePtr, err := operands[3].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + err = m.putBinary(&mem, keyPtr, valuePtr) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + case callFn.name == "runtime.hashmapStringSet": + // Do a mapassign operation with a string key. + if r.debug { + fmt.Fprintln(os.Stderr, indent+"runtime.hashmapBinarySet:", operands[1:]) + } + mapPtr, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + m := mem.getWritable(mapPtr.index()).buffer.(*mapValue) + stringPtr, err := operands[2].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + stringLen := operands[3].Uint() + valuePtr, err := operands[4].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + err = m.putString(&mem, stringPtr, stringLen, valuePtr) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + default: + if len(callFn.blocks) == 0 { + // Call to a function declaration without a definition + // available. + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + continue + } + // Call a function with a definition available. Run it as usual, + // possibly trying to recover from it if it failed to execute. + if r.debug { + argStrings := make([]string, len(operands)-1) + for i := range argStrings { + argStrings[i] = operands[i+1].String() + } + fmt.Fprintln(os.Stderr, indent+"call:", callFn.name+"("+strings.Join(argStrings, ", ")+")") + } + retval, callMem, callErr := r.run(callFn, operands[1:], &mem, indent+" ") + if callErr != nil { + if isRecoverableError(callErr.Err) { + // This error can be recovered by doing the call at + // runtime instead of at compile time. But we need to + // revert any changes made by the call first. + if r.debug { + fmt.Fprintln(os.Stderr, indent+"!! revert because of error:", callErr.Err) + } + callMem.revert() + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + continue + } + // Add to the traceback, so that error handling code can see + // how this function got called. + callErr.Traceback = append(callErr.Traceback, ErrorLine{ + Pos: getPosition(inst.llvmInst), + Inst: inst.llvmInst, + }) + return nil, mem, callErr + } + locals[inst.localIndex] = retval + mem.extend(callMem) + } + case llvm.Load: + // Load instruction, loading some data from the topmost memory view. + ptr, err := operands[0].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + size := operands[1].(literalValue).value.(uint64) + if mem.hasExternalStore(ptr) { + // If there could be an external store (for example, because a + // pointer to the object was passed to a function that could not + // be interpreted at compile time) then the load must be done at + // runtime. + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + continue + } + result := mem.load(ptr, uint32(size)) + if r.debug { + fmt.Fprintln(os.Stderr, indent+"load:", ptr, "->", result) + } + locals[inst.localIndex] = result + case llvm.Store: + // Store instruction. Create a new object in the memory view and + // store to that, to make it possible to roll back this store. + ptr, err := operands[1].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + if mem.hasExternalLoadOrStore(ptr) { + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + continue + } + val := operands[0] + if r.debug { + fmt.Fprintln(os.Stderr, indent+"store:", val, ptr) + } + mem.store(val, ptr) + case llvm.Alloca: + // Alloca normally allocates some stack memory. In the interpreter, + // it allocates a global instead. + // This can likely be optimized, as all it really needs is an alloca + // in the initAll function and creating a global is wasteful for + // this purpose. + + // Create the new object. + size := operands[0].(literalValue).value.(uint64) + alloca := object{ + llvmType: inst.llvmInst.Type(), + globalName: r.pkgName + "$alloca", + buffer: newRawValue(uint32(size)), + size: uint32(size), + } + index := len(r.objects) + r.objects = append(r.objects, alloca) + + // Create a pointer to this object (an alloca produces a pointer). + ptr := newPointerValue(r, index, 0) + if r.debug { + fmt.Fprintln(os.Stderr, indent+"alloca:", operands, "->", ptr) + } + locals[inst.localIndex] = ptr + case llvm.GetElementPtr: + // GetElementPtr does pointer arithmetic, changing the offset of the + // pointer into the underlying object. + var offset uint64 + var gepOperands []uint64 + for i := 2; i < len(operands); i += 2 { + index := operands[i].Uint() + elementSize := operands[i+1].Uint() + if int64(elementSize) < 0 { + // This is a struct field. + // The field number is encoded by flipping all the bits. + gepOperands = append(gepOperands, ^elementSize) + offset += index + } else { + // This is a normal GEP, probably an array index. + gepOperands = append(gepOperands, index) + offset += elementSize * index + } + } + ptr, err := operands[0].asPointer(r) + if err != nil { + return nil, mem, r.errorAt(inst, err) + } + ptr = ptr.addOffset(uint32(offset)) + locals[inst.localIndex] = ptr + if r.debug { + fmt.Fprintln(os.Stderr, indent+"gep:", operands, "->", ptr) + } + case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt: + // Various bitcast-like instructions that all keep the same bits + // while changing the LLVM type. + // Because interp doesn't preserve the type, these operations are + // identity operations. + if r.debug { + fmt.Fprintln(os.Stderr, indent+instructionNameMap[inst.opcode]+":", operands[0]) + } + locals[inst.localIndex] = operands[0] + case llvm.ExtractValue: + agg := operands[0].asRawValue(r) + offset := operands[1].(literalValue).value.(uint64) + size := operands[2].(literalValue).value.(uint64) + elt := rawValue{ + buf: agg.buf[offset : offset+size], + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"extractvalue:", operands, "->", elt) + } + locals[inst.localIndex] = elt + case llvm.InsertValue: + agg := operands[0].asRawValue(r) + elt := operands[1].asRawValue(r) + offset := int(operands[2].(literalValue).value.(uint64)) + newagg := newRawValue(uint32(len(agg.buf))) + copy(newagg.buf, agg.buf) + copy(newagg.buf[offset:], elt.buf) + if r.debug { + fmt.Fprintln(os.Stderr, indent+"insertvalue:", operands, "->", newagg) + } + locals[inst.localIndex] = newagg + case llvm.ICmp: + predicate := llvm.IntPredicate(operands[2].(literalValue).value.(uint8)) + var result bool + lhs := operands[0] + rhs := operands[1] + switch predicate { + case llvm.IntEQ, llvm.IntNE: + lhsPointer, lhsErr := lhs.asPointer(r) + rhsPointer, rhsErr := rhs.asPointer(r) + if (lhsErr == nil) != (rhsErr == nil) { + // Fast path: only one is a pointer, so they can't be equal. + result = false + } else if lhsErr == nil { + // Both must be nil, so both are pointers. + // Compare them directly. + result = lhsPointer.equal(rhsPointer) + } else { + // Fall back to generic comparison. + result = lhs.asRawValue(r).equal(rhs.asRawValue(r)) + } + if predicate == llvm.IntNE { + result = !result + } + case llvm.IntUGT: + result = lhs.Uint() > rhs.Uint() + case llvm.IntUGE: + result = lhs.Uint() >= rhs.Uint() + case llvm.IntULT: + result = lhs.Uint() < rhs.Uint() + case llvm.IntULE: + result = lhs.Uint() <= rhs.Uint() + case llvm.IntSGT: + result = lhs.Int() > rhs.Int() + case llvm.IntSGE: + result = lhs.Int() >= rhs.Int() + case llvm.IntSLT: + result = lhs.Int() < rhs.Int() + case llvm.IntSLE: + result = lhs.Int() <= rhs.Int() + default: + return nil, mem, r.errorAt(inst, errors.New("interp: unsupported icmp")) + } + if result { + locals[inst.localIndex] = literalValue{uint8(1)} + } else { + locals[inst.localIndex] = literalValue{uint8(0)} + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"icmp:", operands[0], intPredicateString(predicate), operands[1], "->", result) + } + case llvm.FCmp: + predicate := llvm.FloatPredicate(operands[2].(literalValue).value.(uint8)) + var result bool + var lhs, rhs float64 + switch operands[0].len(r) { + case 8: + lhs = math.Float64frombits(operands[0].Uint()) + rhs = math.Float64frombits(operands[1].Uint()) + case 4: + lhs = float64(math.Float32frombits(uint32(operands[0].Uint()))) + rhs = float64(math.Float32frombits(uint32(operands[1].Uint()))) + default: + panic("unknown float type") + } + switch predicate { + case llvm.FloatOEQ: + result = lhs == rhs + case llvm.FloatUNE: + result = lhs != rhs + case llvm.FloatOGT: + result = lhs > rhs + case llvm.FloatOGE: + result = lhs >= rhs + case llvm.FloatOLT: + result = lhs < rhs + case llvm.FloatOLE: + result = lhs <= rhs + default: + return nil, mem, r.errorAt(inst, errors.New("interp: unsupported fcmp")) + } + if result { + locals[inst.localIndex] = literalValue{uint8(1)} + } else { + locals[inst.localIndex] = literalValue{uint8(0)} + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+"fcmp:", operands[0], predicate, operands[1], "->", result) + } + case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor: + // Integer binary operations. + lhs := operands[0] + rhs := operands[1] + lhsPtr, err := lhs.asPointer(r) + if err == nil { + // The lhs is a pointer. This sometimes happens for particular + // pointer tricks. + switch inst.opcode { + case llvm.Add: + // This likely means this is part of a + // unsafe.Pointer(uintptr(ptr) + offset) pattern. + lhsPtr = lhsPtr.addOffset(uint32(rhs.Uint())) + locals[inst.localIndex] = lhsPtr + continue + case llvm.Xor: + if rhs.Uint() == 0 { + // Special workaround for strings.noescape, see + // src/strings/builder.go in the Go source tree. This is + // the identity operator, so we can return the input. + locals[inst.localIndex] = lhs + continue + } + default: + // Catch-all for weird operations that should just be done + // at runtime. + err := r.runAtRuntime(fn, inst, locals, &mem, indent) + if err != nil { + return nil, mem, err + } + continue + } + } + var result uint64 + switch inst.opcode { + case llvm.Add: + result = lhs.Uint() + rhs.Uint() + case llvm.Sub: + result = lhs.Uint() - rhs.Uint() + case llvm.Mul: + result = lhs.Uint() * rhs.Uint() + case llvm.UDiv: + result = lhs.Uint() / rhs.Uint() + case llvm.SDiv: + result = uint64(lhs.Int() / rhs.Int()) + case llvm.URem: + result = lhs.Uint() % rhs.Uint() + case llvm.SRem: + result = uint64(lhs.Int() % rhs.Int()) + case llvm.Shl: + result = lhs.Uint() << rhs.Uint() + case llvm.LShr: + result = lhs.Uint() >> rhs.Uint() + case llvm.AShr: + result = uint64(lhs.Int() >> rhs.Uint()) + case llvm.And: + result = lhs.Uint() & rhs.Uint() + case llvm.Or: + result = lhs.Uint() | rhs.Uint() + case llvm.Xor: + result = lhs.Uint() ^ rhs.Uint() + default: + panic("unreachable") + } + switch lhs.len(r) { + case 8: + locals[inst.localIndex] = literalValue{result} + case 4: + locals[inst.localIndex] = literalValue{uint32(result)} + case 2: + locals[inst.localIndex] = literalValue{uint16(result)} + case 1: + locals[inst.localIndex] = literalValue{uint8(result)} + default: + panic("unknown integer size") + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+instructionNameMap[inst.opcode]+":", lhs, rhs, "->", result) + } + case llvm.SExt, llvm.ZExt, llvm.Trunc: + // Change the size of an integer to a larger or smaller bit width. + // We make use of the fact that the Uint() function already + // zero-extends the value and that Int() already sign-extends the + // value, so we only need to truncate it to the appropriate bit + // width. This means we can implement sext, zext and trunc in the + // same way, by first {zero,sign}extending all the way up to uint64 + // and then truncating it as necessary. + var value uint64 + if inst.opcode == llvm.SExt { + value = uint64(operands[0].Int()) + } else { + value = operands[0].Uint() + } + bitwidth := operands[1].Uint() + if r.debug { + fmt.Fprintln(os.Stderr, indent+instructionNameMap[inst.opcode]+":", value, bitwidth) + } + switch bitwidth { + case 64: + locals[inst.localIndex] = literalValue{value} + case 32: + locals[inst.localIndex] = literalValue{uint32(value)} + case 16: + locals[inst.localIndex] = literalValue{uint16(value)} + case 8: + locals[inst.localIndex] = literalValue{uint8(value)} + default: + panic("unknown integer size in sext/zext/trunc") + } + case llvm.SIToFP, llvm.UIToFP: + var value float64 + switch inst.opcode { + case llvm.SIToFP: + value = float64(operands[0].Int()) + case llvm.UIToFP: + value = float64(operands[0].Uint()) + } + bitwidth := operands[1].Uint() + if r.debug { + fmt.Fprintln(os.Stderr, indent+instructionNameMap[inst.opcode]+":", value, bitwidth) + } + switch bitwidth { + case 64: + locals[inst.localIndex] = literalValue{math.Float64bits(value)} + case 32: + locals[inst.localIndex] = literalValue{math.Float32bits(float32(value))} + default: + panic("unknown integer size in sitofp/uitofp") + } + default: + if r.debug { + fmt.Fprintln(os.Stderr, indent+inst.String()) + } + return nil, mem, r.errorAt(inst, errUnsupportedInst) + } + } + return nil, mem, r.errorAt(bb.instructions[len(bb.instructions)-1], errors.New("interp: reached end of basic block without terminator")) +} + +func (r *runner) runAtRuntime(fn *function, inst instruction, locals []value, mem *memoryView, indent string) *Error { + numOperands := inst.llvmInst.OperandsCount() + operands := make([]llvm.Value, numOperands) + for i := 0; i < numOperands; i++ { + operand := inst.llvmInst.Operand(i) + if !operand.IsAInstruction().IsNil() || !operand.IsAArgument().IsNil() { + operand = locals[fn.locals[operand]].toLLVMValue(operand.Type(), mem) + } + operands[i] = operand + } + if r.debug { + fmt.Fprintln(os.Stderr, indent+inst.String()) + } + var result llvm.Value + switch inst.opcode { + case llvm.Call: + llvmFn := operands[len(operands)-1] + args := operands[:len(operands)-1] + for _, arg := range args { + if arg.Type().TypeKind() == llvm.PointerTypeKind { + mem.markExternalStore(arg) + } + } + result = r.builder.CreateCall(llvmFn, args, inst.name) + case llvm.Load: + mem.markExternalLoad(operands[0]) + result = r.builder.CreateLoad(operands[0], inst.name) + if inst.llvmInst.IsVolatile() { + result.SetVolatile(true) + } + case llvm.Store: + mem.markExternalStore(operands[1]) + result = r.builder.CreateStore(operands[0], operands[1]) + if inst.llvmInst.IsVolatile() { + result.SetVolatile(true) + } + case llvm.BitCast: + result = r.builder.CreateBitCast(operands[0], inst.llvmInst.Type(), inst.name) + case llvm.ExtractValue: + indices := inst.llvmInst.Indices() + if len(indices) != 1 { + panic("expected exactly one index") + } + result = r.builder.CreateExtractValue(operands[0], int(indices[0]), inst.name) + case llvm.InsertValue: + indices := inst.llvmInst.Indices() + if len(indices) != 1 { + panic("expected exactly one index") + } + result = r.builder.CreateInsertValue(operands[0], operands[1], int(indices[0]), inst.name) + case llvm.Add: + result = r.builder.CreateAdd(operands[0], operands[1], inst.name) + case llvm.Sub: + result = r.builder.CreateSub(operands[0], operands[1], inst.name) + case llvm.Mul: + result = r.builder.CreateMul(operands[0], operands[1], inst.name) + case llvm.UDiv: + result = r.builder.CreateUDiv(operands[0], operands[1], inst.name) + case llvm.SDiv: + result = r.builder.CreateSDiv(operands[0], operands[1], inst.name) + case llvm.URem: + result = r.builder.CreateURem(operands[0], operands[1], inst.name) + case llvm.SRem: + result = r.builder.CreateSRem(operands[0], operands[1], inst.name) + case llvm.ZExt: + result = r.builder.CreateZExt(operands[0], inst.llvmInst.Type(), inst.name) + default: + return r.errorAt(inst, errUnsupportedRuntimeInst) + } + locals[inst.localIndex] = localValue{result} + mem.instructions = append(mem.instructions, result) + return nil +} + +func intPredicateString(predicate llvm.IntPredicate) string { + switch predicate { + case llvm.IntEQ: + return "eq" + case llvm.IntNE: + return "ne" + case llvm.IntUGT: + return "ugt" + case llvm.IntUGE: + return "uge" + case llvm.IntULT: + return "ult" + case llvm.IntULE: + return "ule" + case llvm.IntSGT: + return "sgt" + case llvm.IntSGE: + return "sge" + case llvm.IntSLT: + return "slt" + case llvm.IntSLE: + return "sle" + default: + return "cmp?" + } +} diff --git a/interp/memory.go b/interp/memory.go new file mode 100644 index 0000000000..5a2343c43c --- /dev/null +++ b/interp/memory.go @@ -0,0 +1,1430 @@ +package interp + +// This file implements memory as used by interp in a reversible way. +// Each new function call creates a new layer which is merged in the parent on +// successful return and is thrown away when the function couldn't complete (in +// which case the function call is done at runtime). +// Memory is not typed, except that there is a difference between pointer and +// non-pointer data. A pointer always points to an object. This implies: +// * Nil pointers are zero, and are not considered a pointer. +// * Pointers for memory-mapped I/O point to numeric pointer values, and are +// thus not considered pointers but regular values. Dereferencing them cannot be +// done in interp and results in a revert. +// +// Right now the memory is assumed to be little endian. This will need an update +// for big endian arcitectures, if TinyGo ever adds support for one. + +import ( + "encoding/binary" + "errors" + "math" + "strconv" + "strings" + + "tinygo.org/x/go-llvm" +) + +// An object is a memory buffer that may be an already existing global or a +// global created with runtime.alloc or the alloca instruction. If llvmGlobal is +// set, that's the global for this object, otherwise it needs to be created (if +// it is still reachable when the package initializer returns). +// +// Objects are copied in a memory view when they are stored to, to provide the +// ability to roll back interpreting a function. +type object struct { + llvmGlobal llvm.Value + llvmType llvm.Type // must match llvmGlobal.Type() if both are set, may be unset if llvmGlobal is set + globalName string // name, if not yet created (not guaranteed to be the final name) + buffer value // buffer with value as given by interp, nil if external + size uint32 // must match buffer.len(), if available + marked uint8 // 0 means unmarked, 1 means external read, 2 means external write +} + +// clone() returns a cloned version of this object, for when an object needs to +// be written to for example. +func (obj object) clone() object { + if obj.buffer != nil { + obj.buffer = obj.buffer.clone() + } + return obj +} + +// A memoryView is bound to a function activation. Loads are done from this view +// or a parent view (up to the *runner if it isn't included in a view). Stores +// copy the object to the current view. +// +// For details, see the README in the package. +type memoryView struct { + r *runner + parent *memoryView + objects map[uint32]object + + // These instructions were added to runtime.initAll while interpreting a + // function. They are stored here in a list so they can be removed if the + // execution of the function needs to be rolled back. + instructions []llvm.Value +} + +// extend integrates the changes done by the sub-memoryView into this memory +// view. This happens when a function is successfully interpreted and returns to +// the parent, in which case all changed objects should be included in this +// memory view. +func (mv *memoryView) extend(sub memoryView) { + if mv.objects == nil && len(sub.objects) != 0 { + mv.objects = make(map[uint32]object) + } + for key, value := range sub.objects { + mv.objects[key] = value + } + mv.instructions = append(mv.instructions, sub.instructions...) +} + +// revert undoes changes done in this memory view: it removes all instructions +// created in this memoryView. Do not reuse this memoryView. +func (mv *memoryView) revert() { + // Erase instructions in reverse order. + for i := len(mv.instructions) - 1; i >= 0; i-- { + llvmInst := mv.instructions[i] + if llvmInst.IsAInstruction().IsNil() { + // The IR builder will try to create constant versions of + // instructions whenever possible. If it does this, it's not an + // instruction and thus shouldn't be removed. + continue + } + llvmInst.EraseFromParentAsInstruction() + } +} + +// markExternalLoad marks the given LLVM value as having an external read. That +// means that the interpreter can still read from it, but cannot write to it as +// that would mean the external read (done at runtime) reads from a state that +// would not exist had the whole initialization been done at runtime. +func (mv *memoryView) markExternalLoad(llvmValue llvm.Value) { + mv.markExternal(llvmValue, 1) +} + +// markExternalStore marks the given LLVM value as having an external write. +// This means that the interpreter can no longer read from it or write to it, as +// that would happen in a different order than if all initialization were +// happening at runtime. +func (mv *memoryView) markExternalStore(llvmValue llvm.Value) { + mv.markExternal(llvmValue, 2) +} + +// markExternal is a helper for markExternalLoad and markExternalStore, and +// should not be called directly. +func (mv *memoryView) markExternal(llvmValue llvm.Value, mark uint8) { + if llvmValue.IsUndef() || llvmValue.IsNull() { + // Null and undef definitely don't contain (valid) pointers. + return + } + if !llvmValue.IsAInstruction().IsNil() || !llvmValue.IsAArgument().IsNil() { + // These are considered external by default, there is nothing to mark. + return + } + + if !llvmValue.IsAGlobalValue().IsNil() { + objectIndex := mv.r.getValue(llvmValue).(pointerValue).index() + obj := mv.get(objectIndex) + if obj.marked < mark { + obj = obj.clone() + obj.marked = mark + if mv.objects == nil { + mv.objects = make(map[uint32]object) + } + mv.objects[objectIndex] = obj + if !llvmValue.IsAGlobalVariable().IsNil() { + initializer := llvmValue.Initializer() + if !initializer.IsNil() { + // Using mark '2' (which means read/write access) because + // even from an object that is only read from, the resulting + // loaded pointer can be written to. + mv.markExternal(initializer, 2) + } + } else { + // This is a function. Go through all instructions and mark all + // objects in there. + for bb := llvmValue.FirstBasicBlock(); !bb.IsNil(); bb = llvm.NextBasicBlock(bb) { + for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) { + opcode := inst.InstructionOpcode() + if opcode == llvm.Call { + calledValue := inst.CalledValue() + if !calledValue.IsAFunction().IsNil() { + functionName := calledValue.Name() + if functionName == "llvm.dbg.value" || strings.HasPrefix(functionName, "llvm.lifetime.") { + continue + } + } + } + if opcode == llvm.Br || opcode == llvm.Switch { + // These don't affect memory. Skipped here because + // they also have a label as operand. + continue + } + numOperands := inst.OperandsCount() + for i := 0; i < numOperands; i++ { + // Using mark '2' (which means read/write access) + // because this might be a store instruction. + mv.markExternal(inst.Operand(i), 2) + } + } + } + } + } + } else if !llvmValue.IsAConstantExpr().IsNil() { + switch llvmValue.Opcode() { + case llvm.IntToPtr, llvm.PtrToInt, llvm.BitCast, llvm.GetElementPtr: + mv.markExternal(llvmValue.Operand(0), mark) + default: + panic("interp: unknown constant expression") + } + } else { + llvmType := llvmValue.Type() + switch llvmType.TypeKind() { + case llvm.IntegerTypeKind, llvm.FloatTypeKind, llvm.DoubleTypeKind: + // Nothing to do here. Integers and floats aren't pointers so don't + // need any marking. + case llvm.StructTypeKind: + numElements := llvmType.StructElementTypesCount() + for i := 0; i < numElements; i++ { + element := llvm.ConstExtractValue(llvmValue, []uint32{uint32(i)}) + mv.markExternal(element, mark) + } + case llvm.ArrayTypeKind: + numElements := llvmType.ArrayLength() + for i := 0; i < numElements; i++ { + element := llvm.ConstExtractValue(llvmValue, []uint32{uint32(i)}) + mv.markExternal(element, mark) + } + default: + panic("interp: unknown type kind in markExternalValue") + } + } +} + +// hasExternalLoadOrStore returns true if this object has an external load or +// store. If this has happened, it is not possible for the interpreter to load +// from the object or store to it without affecting the behavior of the program. +func (mv *memoryView) hasExternalLoadOrStore(v pointerValue) bool { + obj := mv.get(v.index()) + return obj.marked >= 1 +} + +// hasExternalStore returns true if this object has an external store. If this +// is true, stores to this object are no longer allowed by the interpreter. +// It returns false if it only has an external load, in which case it is still +// possible for the interpreter to read from the object. +func (mv *memoryView) hasExternalStore(v pointerValue) bool { + obj := mv.get(v.index()) + return obj.marked >= 2 +} + +// get returns an object that can only be read from, as it may return an object +// of a parent view. +func (mv *memoryView) get(index uint32) object { + if obj, ok := mv.objects[index]; ok { + return obj + } + if mv.parent != nil { + return mv.parent.get(index) + } + return mv.r.objects[index] +} + +// getWritable returns an object that can be written to. +func (mv *memoryView) getWritable(index uint32) object { + if obj, ok := mv.objects[index]; ok { + // Object is already in the current memory view, so can be modified. + return obj + } + // Object is not currently in this view. Get it, and clone it for use. + obj := mv.get(index).clone() + mv.r.objects[index] = obj + return obj +} + +// Replace the object (indicated with index) with the given object. This put is +// only done at the current memory view, so that if this memory view is reverted +// the object is not changed. +func (mv *memoryView) put(index uint32, obj object) { + if mv.objects == nil { + mv.objects = make(map[uint32]object) + } + if checks && mv.get(index).buffer == nil { + panic("writing to external object") + } + if checks && mv.get(index).buffer.len(mv.r) != obj.buffer.len(mv.r) { + panic("put() with a differently-sized object") + } + mv.objects[index] = obj +} + +// Load the value behind the given pointer. +func (mv *memoryView) load(p pointerValue, size uint32) value { + if checks && mv.hasExternalStore(p) { + panic("interp: load from object with external store") + } + obj := mv.get(p.index()) + if p.offset() == 0 && size == obj.size { + return obj.buffer.clone() + } + if checks && p.offset()+size > obj.size { + panic("interp: load out of bounds") + } + v := obj.buffer.asRawValue(mv.r) + loadedValue := rawValue{ + buf: v.buf[p.offset() : p.offset()+size], + } + return loadedValue +} + +// Store to the value behind the given pointer. This overwrites the value in the +// memory view, so that the changed value is discarded when the memory view is +// reverted. +func (mv *memoryView) store(v value, p pointerValue) { + if checks && mv.hasExternalLoadOrStore(p) { + panic("interp: store to object with external load/store") + } + obj := mv.get(p.index()) + if checks && p.offset()+v.len(mv.r) > obj.size { + panic("interp: store out of bounds") + } + if p.offset() == 0 && v.len(mv.r) == obj.buffer.len(mv.r) { + obj.buffer = v + } else { + obj = obj.clone() + buffer := obj.buffer.asRawValue(mv.r) + obj.buffer = buffer + v := v.asRawValue(mv.r) + for i := uint32(0); i < v.len(mv.r); i++ { + buffer.buf[p.offset()+i] = v.buf[i] + } + } + mv.put(p.index(), obj) +} + +// value is some sort of value, comparable to a LLVM constant. It can be +// implemented in various ways for efficiency, but the fallback value (that all +// implementations can be converted to except for localValue) is rawValue. +type value interface { + // len returns the length in bytes. + len(r *runner) uint32 + clone() value + asPointer(*runner) (pointerValue, error) + asRawValue(*runner) rawValue + Uint() uint64 + Int() int64 + toLLVMValue(llvm.Type, *memoryView) llvm.Value + String() string +} + +// literalValue contains simple integer values that don't need to be stored in a +// buffer. +type literalValue struct { + value interface{} +} + +func (v literalValue) len(r *runner) uint32 { + switch v.value.(type) { + case uint64: + return 8 + case uint32: + return 4 + case uint16: + return 2 + case uint8: + return 1 + default: + panic("unknown value type") + } +} + +func (v literalValue) String() string { + return strconv.FormatInt(v.Int(), 10) +} + +func (v literalValue) clone() value { + return v +} + +func (v literalValue) asPointer(r *runner) (pointerValue, error) { + return pointerValue{}, errLiteralToPointer +} + +func (v literalValue) asRawValue(r *runner) rawValue { + var buf []byte + switch value := v.value.(type) { + case uint64: + buf = make([]byte, 8) + binary.LittleEndian.PutUint64(buf, value) + case uint32: + buf = make([]byte, 4) + binary.LittleEndian.PutUint32(buf, uint32(value)) + case uint16: + buf = make([]byte, 2) + binary.LittleEndian.PutUint16(buf, uint16(value)) + case uint8: + buf = []byte{uint8(value)} + default: + panic("unknown value type") + } + raw := newRawValue(uint32(len(buf))) + for i, b := range buf { + raw.buf[i] = uint64(b) + } + return raw +} + +func (v literalValue) Uint() uint64 { + switch value := v.value.(type) { + case uint64: + return value + case uint32: + return uint64(value) + case uint16: + return uint64(value) + case uint8: + return uint64(value) + default: + panic("inpterp: unknown literal type") + } +} + +func (v literalValue) Int() int64 { + switch value := v.value.(type) { + case uint64: + return int64(value) + case uint32: + return int64(int32(value)) + case uint16: + return int64(int16(value)) + case uint8: + return int64(int8(value)) + default: + panic("inpterp: unknown literal type") + } +} + +func (v literalValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) llvm.Value { + switch llvmType.TypeKind() { + case llvm.IntegerTypeKind: + switch value := v.value.(type) { + case uint64: + return llvm.ConstInt(llvmType, value, false) + case uint32: + return llvm.ConstInt(llvmType, uint64(value), false) + case uint16: + return llvm.ConstInt(llvmType, uint64(value), false) + case uint8: + return llvm.ConstInt(llvmType, uint64(value), false) + default: + panic("inpterp: unknown literal type") + } + case llvm.DoubleTypeKind: + return llvm.ConstFloat(llvmType, math.Float64frombits(v.value.(uint64))) + case llvm.FloatTypeKind: + return llvm.ConstFloat(llvmType, float64(math.Float32frombits(v.value.(uint32)))) + default: + return v.asRawValue(mem.r).toLLVMValue(llvmType, mem) + } +} + +// pointerValue contains a single pointer, with an offset into the underlying +// object. +type pointerValue struct { + pointer uint64 // low 32 bits are offset, high 32 bits are index +} + +func newPointerValue(r *runner, index, offset int) pointerValue { + return pointerValue{ + pointer: uint64(index)<<32 | uint64(offset), + } +} + +func (v pointerValue) index() uint32 { + return uint32(v.pointer >> 32) +} + +func (v pointerValue) offset() uint32 { + return uint32(v.pointer) +} + +// addOffset essentially does a GEP operation (pointer arithmetic): it adds the +// offset to the pointer. It also checks that the offset doesn't overflow the +// maximum offset size (which is 4GB). +func (v pointerValue) addOffset(offset uint32) pointerValue { + result := pointerValue{v.pointer + uint64(offset)} + if checks && v.index() != result.index() { + panic("interp: offset out of range") + } + return result +} + +func (v pointerValue) len(r *runner) uint32 { + return r.pointerSize +} + +func (v pointerValue) String() string { + name := strconv.Itoa(int(v.index())) + if v.offset() == 0 { + return "<" + name + ">" + } + return "<" + name + "+" + strconv.Itoa(int(v.offset())) + ">" +} + +func (v pointerValue) clone() value { + return v +} + +func (v pointerValue) asPointer(r *runner) (pointerValue, error) { + return v, nil +} + +func (v pointerValue) asRawValue(r *runner) rawValue { + rv := newRawValue(r.pointerSize) + for i := range rv.buf { + rv.buf[i] = v.pointer + } + return rv +} + +func (v pointerValue) Uint() uint64 { + panic("cannot convert pointer to integer") +} + +func (v pointerValue) Int() int64 { + panic("cannot convert pointer to integer") +} + +func (v pointerValue) equal(rhs pointerValue) bool { + return v.pointer == rhs.pointer +} + +func (v pointerValue) llvmValue(mem *memoryView) llvm.Value { + return mem.get(v.index()).llvmGlobal +} + +// toLLVMValue returns the LLVM value for this pointer, which may be a GEP or +// bitcast. The llvm.Type parameter is optional, if omitted the pointer type may +// be different than expected. +func (v pointerValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) llvm.Value { + // Obtain the llvmValue, creating it if it doesn't exist yet. + llvmValue := v.llvmValue(mem) + if llvmValue.IsNil() { + // The global does not yet exist. Probably this is the result of a + // runtime.alloc. + // First allocate a new global for this object. + obj := mem.get(v.index()) + if obj.llvmType.IsNil() { + // Create an initializer without knowing the global type. + // This is probably the result of a runtime.alloc call. + initializer := obj.buffer.asRawValue(mem.r).rawLLVMValue(mem) + globalType := initializer.Type() + llvmValue = llvm.AddGlobal(mem.r.mod, globalType, obj.globalName) + llvmValue.SetInitializer(initializer) + obj.llvmGlobal = llvmValue + mem.put(v.index(), obj) + } else { + globalType := obj.llvmType.ElementType() + if checks && mem.r.targetData.TypeAllocSize(globalType) != uint64(obj.size) { + panic("size of the globalType isn't the same as the object size") + } + llvmValue = llvm.AddGlobal(mem.r.mod, globalType, obj.globalName) + obj.llvmGlobal = llvmValue + mem.put(v.index(), obj) + + // Set the initializer for the global. Do this after creation to avoid + // infinite recursion between creating the global and creating the + // contents of the global (if the global contains itself). + initializer := obj.buffer.toLLVMValue(globalType, mem) + if checks && initializer.Type() != globalType { + panic("allocated value does not match allocated type") + } + llvmValue.SetInitializer(initializer) + } + + // It should be included in r.globals because otherwise markExternal + // would consider it a new global (and would fail to mark this global as + // having an externa load/store). + mem.r.globals[llvmValue] = int(v.index()) + llvmValue.SetLinkage(llvm.InternalLinkage) + } + + if llvmType.IsNil() { + return llvmValue + } + + if llvmType.TypeKind() != llvm.PointerTypeKind { + // The LLVM value has (or should have) the same bytes once compiled, but + // does not have the right LLVM type. This can happen for example when + // storing to a struct with a single pointer field: this pointer may + // then become the value even though the pointer should be wrapped in a + // struct. + // This can be worked around by simply converting to a raw value, + // rawValue knows how to create such structs. + if v.offset() != 0 { + panic("offset set without known pointer type") + } + return v.asRawValue(mem.r).toLLVMValue(llvmType, mem) + } + + requestedType := llvmType + objectElementType := llvmValue.Type() + if requestedType == objectElementType { + if v.offset() != 0 { + // This should never happen, if offset is non-zero, the types + // shouldn't match. + panic("offset set while there is no way to convert the type") + } + return llvmValue + } + + if v.offset() == 0 { + // Offset is zero, so we can just bitcast to get a correct pointer. + return llvm.ConstBitCast(llvmValue, llvmType) + } + + // We need to make a constant GEP for pointer arithmetic. + int32Type := llvmType.Context().Int32Type() + indices := []llvm.Value{llvm.ConstInt(int32Type, 0, false)} + requestedType = requestedType.ElementType() + objectElementType = objectElementType.ElementType() + offset := int64(v.offset()) + for offset > 0 { + switch objectElementType.TypeKind() { + case llvm.ArrayTypeKind: + elementType := objectElementType.ElementType() + elementSize := mem.r.targetData.TypeAllocSize(elementType) + elementIndex := uint64(offset) / elementSize + indices = append(indices, llvm.ConstInt(int32Type, elementIndex, false)) + offset -= int64(elementIndex * elementSize) + objectElementType = elementType + case llvm.StructTypeKind: + element := mem.r.targetData.ElementContainingOffset(objectElementType, uint64(offset)) + indices = append(indices, llvm.ConstInt(int32Type, uint64(element), false)) + offset -= int64(mem.r.targetData.ElementOffset(objectElementType, element)) + objectElementType = objectElementType.StructElementTypes()[element] + default: + panic("pointer index with something other than a struct or array?") + } + } + if offset < 0 { + panic("offset has somehow gone negative, this should be impossible") + } + + // Finally do the gep, using the above computed indices. + // If it still doesn't match te requested type, it's possible to bitcast (as + // the bits of the pointer are now correct, just not the type). + gep := llvm.ConstInBoundsGEP(llvmValue, indices) + if gep.Type() != llvmType { + return llvm.ConstBitCast(gep, llvmType) + } + return gep +} + +// mapValue implements a Go map which is created at compile time and stored as a +// global variable. +// The value itself is only used as part of an object (object.buffer). Maps are +// reference types aka pointers, so it can only be used as a pointerValue, not +// directly. +type mapValue struct { + r *runner + pkgName string + size uint32 // byte size of runtime.hashmap + hashmap llvm.Value + keyIsString bool + keys []interface{} // either rawValue (for binary key) or mapStringKey (for string key) + values []rawValue + keySize uint32 + valueSize uint32 +} + +type mapStringKey struct { + buf pointerValue + size uint64 + data []uint64 +} + +func newMapValue(r *runner, hashmapPointerType llvm.Type, keySize, valueSize uint32) *mapValue { + size := uint32(r.targetData.TypeAllocSize(hashmapPointerType.ElementType())) + return &mapValue{ + r: r, + pkgName: r.pkgName, + size: size, + keySize: keySize, + valueSize: valueSize, + } +} + +func (v *mapValue) len(r *runner) uint32 { + return v.size +} + +func (v *mapValue) clone() value { + // Return a copy of mapValue. + clone := *v + clone.keys = append([]interface{}{}, clone.keys...) + clone.values = append([]rawValue{}, clone.values...) + return &clone +} + +func (v *mapValue) asPointer(r *runner) (pointerValue, error) { + panic("interp: mapValue.asPointer") +} + +func (v *mapValue) asRawValue(r *runner) rawValue { + panic("interp: mapValue.asRawValue") +} + +func (v *mapValue) Uint() uint64 { + panic("interp: mapValue.Uint") +} + +func (v *mapValue) Int() int64 { + panic("interp: mapValue.Int") +} + +// Temporary struct to collect data before turning this into a hashmap bucket +// LLVM value. +type mapBucket struct { + m *mapValue + tophash [8]uint8 + keys []rawValue // can have up to 8 keys + values []rawValue // can have up to 8 values, len(keys) == len(values) +} + +// create returns a (pointer to a) buffer structurally equivalent to +// runtime.hashmapBucket. +func (b *mapBucket) create(ctx llvm.Context, nextBucket llvm.Value, mem *memoryView) llvm.Value { + // Create tophash array. + int8Type := ctx.Int8Type() + tophashValues := make([]llvm.Value, 8) + for i := range tophashValues { + tophashValues[i] = llvm.ConstInt(int8Type, uint64(b.tophash[i]), false) + } + tophash := llvm.ConstArray(int8Type, tophashValues) + + // Create next pointer (if not set). + if nextBucket.IsNil() { + nextBucket = llvm.ConstNull(llvm.PointerType(int8Type, 0)) + } + + // Create data for keys. + var keyValues []llvm.Value + for _, key := range b.keys { + keyValues = append(keyValues, key.rawLLVMValue(mem)) + } + if len(b.keys) < 8 { + keyValues = append(keyValues, llvm.ConstNull(llvm.ArrayType(int8Type, int(b.m.keySize)*(8-len(b.keys))))) + } + keyValue := ctx.ConstStruct(keyValues, false) + if checks && uint32(b.m.r.targetData.TypeAllocSize(keyValue.Type())) != b.m.keySize*8 { + panic("key size invalid") + } + + // Create data for values. + var valueValues []llvm.Value + for _, value := range b.values { + valueValues = append(valueValues, value.rawLLVMValue(mem)) + } + if len(b.values) < 8 { + valueValues = append(valueValues, llvm.ConstNull(llvm.ArrayType(int8Type, int(b.m.valueSize)*(8-len(b.values))))) + } + valueValue := ctx.ConstStruct(valueValues, false) + if checks && uint32(b.m.r.targetData.TypeAllocSize(valueValue.Type())) != b.m.valueSize*8 { + panic("value size invalid") + } + + // Create the bucket. + bucketInitializer := ctx.ConstStruct([]llvm.Value{ + tophash, + nextBucket, + keyValue, + valueValue, + }, false) + bucket := llvm.AddGlobal(b.m.r.mod, bucketInitializer.Type(), b.m.pkgName+"$mapbucket") + bucket.SetInitializer(bucketInitializer) + bucket.SetLinkage(llvm.InternalLinkage) + bucket.SetUnnamedAddr(true) + return bucket +} + +func (v *mapValue) toLLVMValue(hashmapType llvm.Type, mem *memoryView) llvm.Value { + if !v.hashmap.IsNil() { + return v.hashmap + } + + // Create a slice of buckets with all the keys and values in the hashmap. + var buckets []*mapBucket + var bucket *mapBucket + for i, key := range v.keys { + var data []uint64 + var keyValue rawValue + switch key := key.(type) { + case mapStringKey: + data = key.data + keyValue = newRawValue(v.keySize) + // runtime._string is {ptr, length} + for i := uint32(0); i < v.keySize/2; i++ { + keyValue.buf[i] = key.buf.pointer + } + copy(keyValue.buf[v.keySize/2:], literalValue{key.size}.asRawValue(v.r).buf) + case rawValue: + if key.hasPointer() { + panic("todo: map key with pointer") + } + data = key.buf + keyValue = key + default: + panic("unknown map key type") + } + buf := make([]byte, len(data)) + for i, p := range data { + buf[i] = byte(p) + } + hash := v.hash(buf) + + if i%8 == 0 { + bucket = &mapBucket{m: v} + buckets = append(buckets, bucket) + } + bucket.tophash[i%8] = v.topHash(hash) + bucket.keys = append(bucket.keys, keyValue) + bucket.values = append(bucket.values, v.values[i]) + } + + // Convert these buckets into LLVM global variables. + ctx := v.r.mod.Context() + i8ptrType := llvm.PointerType(ctx.Int8Type(), 0) + var nextBucket llvm.Value + for i := len(buckets) - 1; i >= 0; i-- { + bucket = buckets[i] + bucketValue := bucket.create(ctx, nextBucket, mem) + nextBucket = bucketValue + } + firstBucket := nextBucket + if firstBucket.IsNil() { + firstBucket = llvm.ConstNull(i8ptrType) + } else { + firstBucket = llvm.ConstBitCast(firstBucket, i8ptrType) + } + + // Create the hashmap itself, pointing to these buckets. + hashmapPointerType := llvm.PointerType(hashmapType, 0) + hashmap := llvm.ConstNamedStruct(hashmapType, []llvm.Value{ + llvm.ConstPointerNull(hashmapPointerType), // next + firstBucket, // buckets + llvm.ConstInt(hashmapType.StructElementTypes()[2], uint64(len(v.keys)), false), // count + llvm.ConstInt(ctx.Int8Type(), uint64(v.keySize), false), // keySize + llvm.ConstInt(ctx.Int8Type(), uint64(v.valueSize), false), // valueSize + llvm.ConstInt(ctx.Int8Type(), 0, false), // bucketBits + }) + + v.hashmap = hashmap + return v.hashmap +} + +// putString does a map assign operation, assuming that the map is of type +// map[string]T. +func (v *mapValue) putString(mem *memoryView, stringBuf pointerValue, stringLen uint64, valuePtr pointerValue) error { + if !v.hashmap.IsNil() { + return errMapAlreadyCreated + } + + value := mem.load(valuePtr, v.valueSize) + stringValue := mem.load(stringBuf, uint32(stringLen)).asRawValue(v.r) + if stringValue.hasPointer() { + panic("interp: string contains pointer") + } + + // TODO: avoid duplicate keys + v.keys = append(v.keys, mapStringKey{stringBuf, stringLen, stringValue.buf}) + v.values = append(v.values, value.asRawValue(v.r)) + v.keyIsString = true + + return nil +} + +// putBinary does a map assign operation for binary data (e.g. [3]int etc). The +// key must not contain pointer values. +func (v *mapValue) putBinary(mem *memoryView, keyPtr, valuePtr pointerValue) error { + if !v.hashmap.IsNil() { + return errMapAlreadyCreated + } + + key := mem.load(keyPtr, v.keySize) + value := mem.load(valuePtr, v.valueSize) + + // Sanity checks. + if v.keySize != key.len(mem.r) || v.valueSize != value.len(mem.r) { + // This is a bug (not unhandled input), so panic. + panic("interp: key or value size mismatch") + } + if v.keyIsString { + panic("cannot put binary keys in string map") + } + + // TODO: avoid duplicate keys + v.keys = append(v.keys, key.asRawValue(v.r)) + v.values = append(v.values, value.asRawValue(v.r)) + + return nil +} + +// Get FNV-1a hash of this string. +// +// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash +func (v *mapValue) hash(data []byte) uint32 { + var result uint32 = 2166136261 // FNV offset basis + for _, c := range data { + result ^= uint32(c) + result *= 16777619 // FNV prime + } + return result +} + +// Get the topmost 8 bits of the hash, without using a special value (like 0). +func (v *mapValue) topHash(hash uint32) uint8 { + tophash := uint8(hash >> 24) + if tophash < 1 { + // 0 means empty slot, so make it bigger. + tophash++ + } + return tophash +} + +func (v *mapValue) String() string { + return "" +} + +// rawValue is a raw memory buffer that can store either pointers or regular +// data. This is the fallback data for everything that isn't clearly a +// literalValue or pointerValue. +type rawValue struct { + // An integer in buf contains either pointers or bytes. + // If it is a byte, it is smaller than 256. + // If it is a pointer, the index is contained in the upper 32 bits and the + // offset is contained in the lower 32 bits. + buf []uint64 +} + +func newRawValue(size uint32) rawValue { + return rawValue{make([]uint64, size)} +} + +func (v rawValue) len(r *runner) uint32 { + return uint32(len(v.buf)) +} + +func (v rawValue) String() string { + if len(v.buf) == 2 || len(v.buf) == 4 || len(v.buf) == 8 { + // Format as a pointer if the entire buf is this pointer. + if v.buf[0] > 255 { + isPointer := true + for _, p := range v.buf { + if p != v.buf[0] { + isPointer = false + break + } + } + if isPointer { + return pointerValue{v.buf[0]}.String() + } + } + // Format as number if none of the buf is a pointer. + if !v.hasPointer() { + return strconv.FormatInt(v.Int(), 10) + } + } + return "<[…" + strconv.Itoa(len(v.buf)) + "]>" +} + +func (v rawValue) clone() value { + newValue := v + newValue.buf = make([]uint64, len(v.buf)) + copy(newValue.buf, v.buf) + return newValue +} + +func (v rawValue) asPointer(r *runner) (pointerValue, error) { + if v.buf[0] <= 255 { + // Probably a null pointer or memory-mapped I/O. + return pointerValue{}, errExpectedPointer + } + return pointerValue{v.buf[0]}, nil +} + +func (v rawValue) asRawValue(r *runner) rawValue { + return v +} + +func (v rawValue) bytes() []byte { + buf := make([]byte, len(v.buf)) + for i, p := range v.buf { + if p > 255 { + panic("cannot convert pointer value to byte") + } + buf[i] = byte(p) + } + return buf +} + +func (v rawValue) Uint() uint64 { + buf := v.bytes() + + switch len(v.buf) { + case 1: + return uint64(buf[0]) + case 2: + return uint64(binary.LittleEndian.Uint16(buf)) + case 4: + return uint64(binary.LittleEndian.Uint32(buf)) + case 8: + return binary.LittleEndian.Uint64(buf) + default: + panic("unknown integer size") + } +} + +func (v rawValue) Int() int64 { + switch len(v.buf) { + case 1: + return int64(int8(v.Uint())) + case 2: + return int64(int16(v.Uint())) + case 4: + return int64(int32(v.Uint())) + case 8: + return int64(int64(v.Uint())) + default: + panic("unknown integer size") + } +} + +// equal returns true if (and only if) the value matches rhs. +func (v rawValue) equal(rhs rawValue) bool { + if len(v.buf) != len(rhs.buf) { + panic("comparing values of different size") + } + for i, p := range v.buf { + if rhs.buf[i] != p { + return false + } + } + return true +} + +// rawLLVMValue returns a llvm.Value for this rawValue, making up a type as it +// goes. The resulting value does not have a specified type, but it will be the +// same size and have the same bytes if it was created with a provided LLVM type +// (through toLLVMValue). +func (v rawValue) rawLLVMValue(mem *memoryView) llvm.Value { + var structFields []llvm.Value + ctx := mem.r.mod.Context() + int8Type := ctx.Int8Type() + + var bytesBuf []llvm.Value + // addBytes can be called after adding to bytesBuf to flush remaining bytes + // to a new array in structFields. + addBytes := func() { + if len(bytesBuf) == 0 { + return + } + if len(bytesBuf) == 1 { + structFields = append(structFields, bytesBuf[0]) + } else { + structFields = append(structFields, llvm.ConstArray(int8Type, bytesBuf)) + } + bytesBuf = nil + } + + // Create structFields, converting the rawValue to a LLVM value. + for i := uint32(0); i < uint32(len(v.buf)); { + if v.buf[i] > 255 { + addBytes() + field := pointerValue{v.buf[i]}.toLLVMValue(llvm.Type{}, mem) + elementType := field.Type().ElementType() + if elementType.TypeKind() == llvm.StructTypeKind { + // There are some special pointer types that should be used as a + // ptrtoint, so that they can be used in certain optimizations. + name := elementType.StructName() + if name == "runtime.typeInInterface" || name == "runtime.funcValueWithSignature" { + uintptrType := ctx.IntType(int(mem.r.pointerSize) * 8) + field = llvm.ConstPtrToInt(field, uintptrType) + } + } + structFields = append(structFields, field) + i += mem.r.pointerSize + continue + } + val := llvm.ConstInt(int8Type, uint64(v.buf[i]), false) + bytesBuf = append(bytesBuf, val) + i++ + } + addBytes() + + // Return the created data. + if len(structFields) == 1 { + return structFields[0] + } + return ctx.ConstStruct(structFields, false) +} + +func (v rawValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) llvm.Value { + isZero := true + for _, p := range v.buf { + if p != 0 { + isZero = false + break + } + } + if isZero { + return llvm.ConstNull(llvmType) + } + switch llvmType.TypeKind() { + case llvm.IntegerTypeKind: + if v.buf[0] > 255 { + ptr, err := v.asPointer(mem.r) + if err != nil { + panic(err) + } + return llvm.ConstPtrToInt(ptr.toLLVMValue(llvm.Type{}, mem), llvmType) + } + var n uint64 + switch llvmType.IntTypeWidth() { + case 64: + n = rawValue{v.buf[:8]}.Uint() + case 32: + n = rawValue{v.buf[:4]}.Uint() + case 16: + n = rawValue{v.buf[:2]}.Uint() + case 8: + n = uint64(v.buf[0]) + case 1: + n = uint64(v.buf[0]) + if n != 0 && n != 1 { + panic("bool must be 0 or 1") + } + default: + panic("unknown integer size") + } + return llvm.ConstInt(llvmType, n, false) + case llvm.StructTypeKind: + fieldTypes := llvmType.StructElementTypes() + fields := make([]llvm.Value, len(fieldTypes)) + for i, fieldType := range fieldTypes { + offset := mem.r.targetData.ElementOffset(llvmType, i) + field := rawValue{ + buf: v.buf[offset:], + } + fields[i] = field.toLLVMValue(fieldType, mem) + } + if llvmType.StructName() != "" { + return llvm.ConstNamedStruct(llvmType, fields) + } + return llvmType.Context().ConstStruct(fields, false) + case llvm.ArrayTypeKind: + numElements := llvmType.ArrayLength() + childType := llvmType.ElementType() + childTypeSize := mem.r.targetData.TypeAllocSize(childType) + fields := make([]llvm.Value, numElements) + for i := range fields { + offset := i * int(childTypeSize) + field := rawValue{ + buf: v.buf[offset:], + } + fields[i] = field.toLLVMValue(childType, mem) + if checks && fields[i].Type() != childType { + panic("child type doesn't match") + } + } + return llvm.ConstArray(childType, fields) + case llvm.PointerTypeKind: + if v.buf[0] > 255 { + // This is a regular pointer. + llvmValue := pointerValue{v.buf[0]}.toLLVMValue(llvm.Type{}, mem) + if llvmValue.Type() != llvmType { + llvmValue = llvm.ConstBitCast(llvmValue, llvmType) + } + return llvmValue + } + // This is either a null pointer or a raw pointer for memory-mapped I/O + // (such as 0xe000ed00). + ptr := rawValue{v.buf[:mem.r.pointerSize]}.Uint() + if ptr == 0 { + // Null pointer. + return llvm.ConstNull(llvmType) + } + var ptrValue llvm.Value // the underlying int + switch mem.r.pointerSize { + case 8: + ptrValue = llvm.ConstInt(llvmType.Context().Int64Type(), ptr, false) + case 4: + ptrValue = llvm.ConstInt(llvmType.Context().Int32Type(), ptr, false) + case 2: + ptrValue = llvm.ConstInt(llvmType.Context().Int16Type(), ptr, false) + default: + panic("unknown pointer size") + } + return llvm.ConstIntToPtr(ptrValue, llvmType) + case llvm.DoubleTypeKind: + b := rawValue{v.buf[:8]}.Uint() + f := math.Float64frombits(b) + return llvm.ConstFloat(llvmType, f) + case llvm.FloatTypeKind: + b := uint32(rawValue{v.buf[:4]}.Uint()) + f := math.Float32frombits(b) + return llvm.ConstFloat(llvmType, float64(f)) + default: + panic("todo: raw value to LLVM value: " + llvmType.String()) + } +} + +func (v *rawValue) set(llvmValue llvm.Value, r *runner) { + if llvmValue.IsNull() { + // A zero value is common so check that first. + return + } + if !llvmValue.IsAGlobalValue().IsNil() { + ptrSize := r.pointerSize + ptr, err := r.getValue(llvmValue).asPointer(r) + if err != nil { + panic(err) + } + for i := uint32(0); i < ptrSize; i++ { + v.buf[i] = ptr.pointer + } + } else if !llvmValue.IsAConstantExpr().IsNil() { + switch llvmValue.Opcode() { + case llvm.IntToPtr, llvm.PtrToInt, llvm.BitCast: + // All these instructions effectively just reinterprets the bits + // (like a bitcast) while no bits change and keeping the same + // length, so just read its contents. + v.set(llvmValue.Operand(0), r) + case llvm.GetElementPtr: + ptr := llvmValue.Operand(0) + index := llvmValue.Operand(1) + if checks && index.IsAConstantInt().IsNil() || index.ZExtValue() != 0 { + panic("expected first index of const gep to be i32 0") + } + numOperands := llvmValue.OperandsCount() + elementType := ptr.Type().ElementType() + totalOffset := uint64(0) + for i := 2; i < numOperands; i++ { + indexValue := llvmValue.Operand(i) + if checks && indexValue.IsAConstantInt().IsNil() { + panic("expected const gep index to be a constant integer") + } + index := indexValue.ZExtValue() + switch elementType.TypeKind() { + case llvm.StructTypeKind: + // Indexing into a struct field. + offsetInBytes := r.targetData.ElementOffset(elementType, int(index)) + totalOffset += offsetInBytes + elementType = elementType.StructElementTypes()[index] + default: + // Indexing into an array. + elementType = elementType.ElementType() + elementSize := r.targetData.TypeAllocSize(elementType) + totalOffset += index * elementSize + } + } + ptrSize := r.pointerSize + ptrValue, err := r.getValue(ptr).asPointer(r) + if err != nil { + panic(err) + } + ptrValue.pointer += totalOffset + for i := uint32(0); i < ptrSize; i++ { + v.buf[i] = ptrValue.pointer + } + default: + llvmValue.Dump() + println() + panic("unknown constant expr") + } + } else if llvmValue.IsUndef() { + // Let undef be zero, by lack of an explicit 'undef' marker. + } else { + if checks && llvmValue.IsAConstant().IsNil() { + panic("expected a constant") + } + llvmType := llvmValue.Type() + switch llvmType.TypeKind() { + case llvm.IntegerTypeKind: + n := llvmValue.ZExtValue() + switch llvmValue.Type().IntTypeWidth() { + case 64: + var buf [8]byte + binary.LittleEndian.PutUint64(buf[:], n) + for i, b := range buf { + v.buf[i] = uint64(b) + } + case 32: + var buf [4]byte + binary.LittleEndian.PutUint32(buf[:], uint32(n)) + for i, b := range buf { + v.buf[i] = uint64(b) + } + case 16: + var buf [2]byte + binary.LittleEndian.PutUint16(buf[:], uint16(n)) + for i, b := range buf { + v.buf[i] = uint64(b) + } + case 8, 1: + v.buf[0] = n + default: + panic("unknown integer size") + } + case llvm.StructTypeKind: + numElements := llvmType.StructElementTypesCount() + for i := 0; i < numElements; i++ { + offset := r.targetData.ElementOffset(llvmType, i) + field := rawValue{ + buf: v.buf[offset:], + } + field.set(llvm.ConstExtractValue(llvmValue, []uint32{uint32(i)}), r) + } + case llvm.ArrayTypeKind: + numElements := llvmType.ArrayLength() + childType := llvmType.ElementType() + childTypeSize := r.targetData.TypeAllocSize(childType) + for i := 0; i < numElements; i++ { + offset := i * int(childTypeSize) + field := rawValue{ + buf: v.buf[offset:], + } + field.set(llvm.ConstExtractValue(llvmValue, []uint32{uint32(i)}), r) + } + case llvm.DoubleTypeKind: + f, _ := llvmValue.DoubleValue() + var buf [8]byte + binary.LittleEndian.PutUint64(buf[:], math.Float64bits(f)) + for i, b := range buf { + v.buf[i] = uint64(b) + } + case llvm.FloatTypeKind: + f, _ := llvmValue.DoubleValue() + var buf [4]byte + binary.LittleEndian.PutUint32(buf[:], math.Float32bits(float32(f))) + for i, b := range buf { + v.buf[i] = uint64(b) + } + default: + llvmValue.Dump() + println() + panic("unknown constant") + } + } +} + +// hasPointer returns true if this raw value contains a pointer somewhere in the +// buffer. +func (v rawValue) hasPointer() bool { + for _, p := range v.buf { + if p > 255 { + return true + } + } + return false +} + +// localValue is a special implementation of the value interface. It is a +// placeholder for other values in instruction operands, and is replaced with +// one of the others before executing. +type localValue struct { + value llvm.Value +} + +func (v localValue) len(r *runner) uint32 { + panic("interp: localValue.len") +} + +func (v localValue) String() string { + return "" +} + +func (v localValue) clone() value { + panic("interp: localValue.clone()") +} + +func (v localValue) asPointer(r *runner) (pointerValue, error) { + return pointerValue{}, errors.New("interp: localValue.asPointer called") +} + +func (v localValue) asRawValue(r *runner) rawValue { + panic("interp: localValue.asRawValue") +} + +func (v localValue) Uint() uint64 { + panic("interp: localValue.Uint") +} + +func (v localValue) Int() int64 { + panic("interp: localValue.Int") +} + +func (v localValue) toLLVMValue(llvmType llvm.Type, mem *memoryView) llvm.Value { + return v.value +} + +func (r *runner) getValue(llvmValue llvm.Value) value { + if checks && llvmValue.IsNil() { + panic("nil llvmValue") + } + if !llvmValue.IsAGlobalValue().IsNil() { + index, ok := r.globals[llvmValue] + if !ok { + obj := object{ + llvmGlobal: llvmValue, + } + index = len(r.objects) + r.globals[llvmValue] = index + r.objects = append(r.objects, obj) + if !llvmValue.IsAGlobalVariable().IsNil() { + obj.size = uint32(r.targetData.TypeAllocSize(llvmValue.Type().ElementType())) + if initializer := llvmValue.Initializer(); !initializer.IsNil() { + obj.buffer = r.getValue(initializer) + } + } else if !llvmValue.IsAFunction().IsNil() { + // OK + } else { + panic("interp: unknown global value") + } + // Update the object after it has been created. This avoids an + // infinite recursion when using getValue on a global that contains + // a reference to itself. + r.objects[index] = obj + } + return newPointerValue(r, index, 0) + } else if !llvmValue.IsAConstant().IsNil() { + if !llvmValue.IsAConstantInt().IsNil() { + n := llvmValue.ZExtValue() + switch llvmValue.Type().IntTypeWidth() { + case 64: + return literalValue{n} + case 32: + return literalValue{uint32(n)} + case 16: + return literalValue{uint16(n)} + case 8, 1: + return literalValue{uint8(n)} + default: + panic("unknown integer size") + } + } + size := r.targetData.TypeAllocSize(llvmValue.Type()) + v := newRawValue(uint32(size)) + v.set(llvmValue, r) + return v + } else if !llvmValue.IsAInstruction().IsNil() || !llvmValue.IsAArgument().IsNil() { + return localValue{llvmValue} + } else if !llvmValue.IsAInlineAsm().IsNil() { + return localValue{llvmValue} + } else { + llvmValue.Dump() + println() + panic("unknown value") + } +} diff --git a/interp/scan.go b/interp/scan.go deleted file mode 100644 index da06169ac8..0000000000 --- a/interp/scan.go +++ /dev/null @@ -1,259 +0,0 @@ -package interp - -import ( - "errors" - "strings" - - "tinygo.org/x/go-llvm" -) - -type sideEffectSeverity int - -func (severity sideEffectSeverity) String() string { - switch severity { - case sideEffectInProgress: - return "in progress" - case sideEffectNone: - return "none" - case sideEffectLimited: - return "limited" - case sideEffectAll: - return "all" - default: - return "unknown" - } -} - -const ( - sideEffectInProgress sideEffectSeverity = iota // computing side effects is in progress (for recursive functions) - sideEffectNone // no side effects at all (pure) - sideEffectLimited // has side effects, but the effects are known - sideEffectAll // has unknown side effects -) - -// sideEffectResult contains the scan results after scanning a function for side -// effects (recursively). -type sideEffectResult struct { - severity sideEffectSeverity - mentionsGlobals map[llvm.Value]struct{} -} - -// hasSideEffects scans this function and all descendants, recursively. It -// returns whether this function has side effects and if it does, which globals -// it mentions anywhere in this function or any called functions. -func (e *evalPackage) hasSideEffects(fn llvm.Value) (*sideEffectResult, *Error) { - name := fn.Name() - switch { - case name == "runtime.alloc": - // Cannot be scanned but can be interpreted. - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "runtime.nanotime": - // Fixed value at compile time. - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "runtime._panic": - return &sideEffectResult{severity: sideEffectLimited}, nil - case name == "runtime.typeAssert": - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "runtime.interfaceImplements": - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "runtime.sliceCopy": - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "runtime.trackPointer": - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "llvm.dbg.value": - return &sideEffectResult{severity: sideEffectNone}, nil - case name == "(*sync/atomic.Value).Load" || name == "(*sync/atomic.Value).Store": - // These functions do some unsafe pointer loading/storing but are - // otherwise safe. - return &sideEffectResult{severity: sideEffectLimited}, nil - case strings.HasPrefix(name, "llvm.lifetime."): - return &sideEffectResult{severity: sideEffectNone}, nil - } - if fn.IsDeclaration() { - return &sideEffectResult{severity: sideEffectLimited}, nil - } - if e.sideEffectFuncs == nil { - e.sideEffectFuncs = make(map[llvm.Value]*sideEffectResult) - } - if se, ok := e.sideEffectFuncs[fn]; ok { - return se, nil - } - result := &sideEffectResult{ - severity: sideEffectInProgress, - mentionsGlobals: map[llvm.Value]struct{}{}, - } - e.sideEffectFuncs[fn] = result - dirtyLocals := map[llvm.Value]struct{}{} - for bb := fn.EntryBasicBlock(); !bb.IsNil(); bb = llvm.NextBasicBlock(bb) { - for inst := bb.FirstInstruction(); !inst.IsNil(); inst = llvm.NextInstruction(inst) { - if inst.IsAInstruction().IsNil() { - // Should not happen in valid IR. - panic("not an instruction") - } - - // Check for any globals mentioned anywhere in the function. Assume - // any mentioned globals may be read from or written to when - // executed, thus must be marked dirty with a call. - for i := 0; i < inst.OperandsCount(); i++ { - operand := inst.Operand(i) - if !operand.IsAGlobalVariable().IsNil() { - result.mentionsGlobals[operand] = struct{}{} - } - } - - switch inst.InstructionOpcode() { - case llvm.IndirectBr, llvm.Invoke: - // Not emitted by the compiler. - return nil, e.errorAt(inst, errors.New("unknown instructions")) - case llvm.Call: - child := inst.CalledValue() - if !child.IsAInlineAsm().IsNil() { - // Inline assembly. This most likely has side effects. - // Assume they're only limited side effects, similar to - // external function calls. - result.updateSeverity(sideEffectLimited) - continue - } - if child.IsAFunction().IsNil() { - // Indirect call? - // In any case, we can't know anything here about what it - // affects exactly so mark this function as invoking all - // possible side effects. - result.updateSeverity(sideEffectAll) - continue - } - if child.IsDeclaration() { - // External function call. Assume only limited side effects - // (no affected globals, etc.). - switch child.Name() { - case "runtime.alloc": - continue - case "runtime.typeAssert": - continue // implemented in interp - case "runtime.interfaceImplements": - continue // implemented in interp - } - if e.hasLocalSideEffects(dirtyLocals, inst) { - result.updateSeverity(sideEffectLimited) - } - continue - } - childSideEffects, err := e.hasSideEffects(child) - if err != nil { - return nil, err - } - switch childSideEffects.severity { - case sideEffectInProgress, sideEffectNone: - // no side effects or recursive function - continue scanning - case sideEffectLimited: - // The return value may be problematic. - if e.hasLocalSideEffects(dirtyLocals, inst) { - result.updateSeverity(sideEffectLimited) - } - case sideEffectAll: - result.updateSeverity(sideEffectAll) - default: - panic("unreachable") - } - case llvm.Load: - if inst.IsVolatile() { - result.updateSeverity(sideEffectLimited) - } - if _, ok := e.dirtyGlobals[inst.Operand(0)]; ok { - if e.hasLocalSideEffects(dirtyLocals, inst) { - result.updateSeverity(sideEffectLimited) - } - } - case llvm.Store: - if inst.IsVolatile() { - result.updateSeverity(sideEffectLimited) - } - case llvm.IntToPtr: - // Pointer casts are not yet supported. - result.updateSeverity(sideEffectLimited) - default: - // Ignore most instructions. - // Check this list for completeness: - // https://godoc.org/github.com/llvm-mirror/llvm/bindings/go/llvm#Opcode - } - } - } - - if result.severity == sideEffectInProgress { - // No side effect was reported for this function. - result.severity = sideEffectNone - } - return result, nil -} - -// hasLocalSideEffects checks whether the given instruction flows into a branch -// or return instruction, in which case the whole function must be marked as -// having side effects and be called at runtime. -func (e *Eval) hasLocalSideEffects(dirtyLocals map[llvm.Value]struct{}, inst llvm.Value) bool { - if _, ok := dirtyLocals[inst]; ok { - // It is already known that this local is dirty. - return true - } - - for use := inst.FirstUse(); !use.IsNil(); use = use.NextUse() { - user := use.User() - if user.IsAInstruction().IsNil() { - // Should not happen in valid IR. - panic("user not an instruction") - } - switch user.InstructionOpcode() { - case llvm.Br, llvm.Switch: - // A branch on a dirty value makes this function dirty: it cannot be - // interpreted at compile time so has to be run at runtime. It is - // marked as having side effects for this reason. - return true - case llvm.Ret: - // This function returns a dirty value so it is itself marked as - // dirty to make sure it is called at runtime. - return true - case llvm.Store: - ptr := user.Operand(1) - if !ptr.IsAGlobalVariable().IsNil() { - // Store to a global variable. - // Already handled in (*Eval).hasSideEffects. - continue - } - // This store might affect all kinds of values. While it is - // certainly possible to traverse through all of them, the easiest - // option right now is to just assume the worst and say that this - // function has side effects. - // TODO: traverse through all stores and mark all relevant allocas / - // globals dirty. - return true - default: - // All instructions that take 0 or more operands (1 or more if it - // was a use) and produce a result. - // For a list: - // https://godoc.org/github.com/llvm-mirror/llvm/bindings/go/llvm#Opcode - dirtyLocals[user] = struct{}{} - if e.hasLocalSideEffects(dirtyLocals, user) { - return true - } - } - } - - // No side effects found. - return false -} - -// updateSeverity sets r.severity to the max of r.severity and severity, -// conservatively assuming the worst severity. -func (r *sideEffectResult) updateSeverity(severity sideEffectSeverity) { - if severity > r.severity { - r.severity = severity - } -} - -// updateSeverity updates the severity with the severity of the child severity, -// like in a function call. This means it also copies the mentioned globals. -func (r *sideEffectResult) update(child *sideEffectResult) { - r.updateSeverity(child.severity) - for global := range child.mentionsGlobals { - r.mentionsGlobals[global] = struct{}{} - } -} diff --git a/interp/scan_test.go b/interp/scan_test.go deleted file mode 100644 index 373cec3ecc..0000000000 --- a/interp/scan_test.go +++ /dev/null @@ -1,95 +0,0 @@ -package interp - -import ( - "os" - "sort" - "testing" - - "tinygo.org/x/go-llvm" -) - -var scanTestTable = []struct { - name string - severity sideEffectSeverity - mentionsGlobals []string -}{ - {"returnsConst", sideEffectNone, nil}, - {"returnsArg", sideEffectNone, nil}, - {"externalCallOnly", sideEffectNone, nil}, - {"externalCallAndReturn", sideEffectLimited, nil}, - {"externalCallBranch", sideEffectLimited, nil}, - {"readCleanGlobal", sideEffectNone, []string{"cleanGlobalInt"}}, - {"readDirtyGlobal", sideEffectLimited, []string{"dirtyGlobalInt"}}, - {"callFunctionPointer", sideEffectAll, []string{"functionPointer"}}, - {"getDirtyPointer", sideEffectLimited, nil}, - {"storeToPointer", sideEffectLimited, nil}, - {"callTypeAssert", sideEffectNone, nil}, - {"callInterfaceImplements", sideEffectNone, nil}, -} - -func TestScan(t *testing.T) { - t.Parallel() - - // Read the input IR. - path := "testdata/scan.ll" - ctx := llvm.NewContext() - buf, err := llvm.NewMemoryBufferFromFile(path) - os.Stat(path) // make sure this file is tracked by `go test` caching - if err != nil { - t.Fatalf("could not read file %s: %v", path, err) - } - mod, err := ctx.ParseIR(buf) - if err != nil { - t.Fatalf("could not load module:\n%v", err) - } - - // Check all to-be-tested functions. - for _, tc := range scanTestTable { - // Create an eval object, for testing. - e := &Eval{ - Mod: mod, - TargetData: llvm.NewTargetData(mod.DataLayout()), - dirtyGlobals: map[llvm.Value]struct{}{}, - } - - // Mark some globals dirty, for testing. - e.markDirty(mod.NamedGlobal("dirtyGlobalInt")) - - // Scan for side effects. - fn := mod.NamedFunction(tc.name) - if fn.IsNil() { - t.Errorf("scan test: could not find tested function %s in the IR", tc.name) - continue - } - evalPkg := &evalPackage{e, "testdata"} - result, err := evalPkg.hasSideEffects(fn) - if err != nil { - t.Errorf("scan test: failed to scan %s for side effects: %v", fn.Name(), err) - } - - // Check whether the result is what we expect. - if result.severity != tc.severity { - t.Errorf("scan test: function %s should have severity %s but it has %s", tc.name, tc.severity, result.severity) - } - - // Check whether the mentioned globals match with what we'd expect. - mentionsGlobalNames := make([]string, 0, len(result.mentionsGlobals)) - for global := range result.mentionsGlobals { - mentionsGlobalNames = append(mentionsGlobalNames, global.Name()) - } - sort.Strings(mentionsGlobalNames) - globalsMismatch := false - if len(result.mentionsGlobals) != len(tc.mentionsGlobals) { - globalsMismatch = true - } else { - for i, globalName := range mentionsGlobalNames { - if tc.mentionsGlobals[i] != globalName { - globalsMismatch = true - } - } - } - if globalsMismatch { - t.Errorf("scan test: expected %s to mention globals %v, but it mentions globals %v", tc.name, tc.mentionsGlobals, mentionsGlobalNames) - } - } -} diff --git a/interp/testdata/basic.ll b/interp/testdata/basic.ll index 8aebff312b..b223b8404c 100644 --- a/interp/testdata/basic.ll +++ b/interp/testdata/basic.ll @@ -4,6 +4,10 @@ target triple = "x86_64--linux" @main.v1 = internal global i64 0 @main.nonConst1 = global [4 x i64] zeroinitializer @main.nonConst2 = global i64 0 +@main.someArray = global [8 x {i16, i32}] zeroinitializer +@main.exportedValue = global [1 x i16*] [i16* @main.exposedValue1] +@main.exposedValue1 = global i16 0 +@main.exposedValue2 = global i16 0 declare void @runtime.printint64(i64) unnamed_addr @@ -47,6 +51,20 @@ entry: %value2 = load i64, i64* %gep2 store i64 %value2, i64* @main.nonConst2 + ; Test that the following GEP works: + ; var someArray + ; modifyExternal(&someArray[3].field1) + %gep3 = getelementptr [8 x {i16, i32}], [8 x {i16, i32}]* @main.someArray, i32 0, i32 3, i32 1 + call void @modifyExternal(i32* %gep3) + + ; Test that marking a value as external also marks all referenced values. + call void @modifyExternal(i32* bitcast ([1 x i16*]* @main.exportedValue to i32*)) + store i16 5, i16* @main.exposedValue1 + + ; Test that this even propagates through functions. + call void @modifyExternal(i32* bitcast (void ()* @willModifyGlobal to i32*)) + store i16 7, i16* @main.exposedValue2 + ret void } @@ -58,3 +76,14 @@ entry: } declare i64 @someValue() + +declare void @modifyExternal(i32*) + +; This function will modify an external value. By passing this function as a +; function pointer to an external function, @main.exposedValue2 should be +; marked as external. +define void @willModifyGlobal() { +entry: + store i16 8, i16* @main.exposedValue2 + ret void +} diff --git a/interp/testdata/basic.out.ll b/interp/testdata/basic.out.ll index 41f0aacbe5..9a16c85eb8 100644 --- a/interp/testdata/basic.out.ll +++ b/interp/testdata/basic.out.ll @@ -3,6 +3,10 @@ target triple = "x86_64--linux" @main.nonConst1 = local_unnamed_addr global [4 x i64] zeroinitializer @main.nonConst2 = local_unnamed_addr global i64 0 +@main.someArray = global [8 x { i16, i32 }] zeroinitializer +@main.exportedValue = global [1 x i16*] [i16* @main.exposedValue1] +@main.exposedValue1 = global i16 0 +@main.exposedValue2 = local_unnamed_addr global i16 0 declare void @runtime.printint64(i64) unnamed_addr @@ -16,6 +20,11 @@ entry: store i64 %value1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @main.nonConst1, i32 0, i32 0) %value2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @main.nonConst1, i32 0, i32 0) store i64 %value2, i64* @main.nonConst2 + call void @modifyExternal(i32* getelementptr inbounds ([8 x { i16, i32 }], [8 x { i16, i32 }]* @main.someArray, i32 0, i32 3, i32 1)) + call void @modifyExternal(i32* bitcast ([1 x i16*]* @main.exportedValue to i32*)) + store i16 5, i16* @main.exposedValue1 + call void @modifyExternal(i32* bitcast (void ()* @willModifyGlobal to i32*)) + store i16 7, i16* @main.exposedValue2 ret void } @@ -27,3 +36,11 @@ entry: } declare i64 @someValue() local_unnamed_addr + +declare void @modifyExternal(i32*) local_unnamed_addr + +define void @willModifyGlobal() { +entry: + store i16 8, i16* @main.exposedValue2 + ret void +} diff --git a/interp/testdata/map.ll b/interp/testdata/map.ll index 50f9503628..9afb533223 100644 --- a/interp/testdata/map.ll +++ b/interp/testdata/map.ll @@ -48,8 +48,7 @@ entry: define internal void @main.testNonConstantBinarySet() { %hashmap.key = alloca i8 %hashmap.value = alloca i8 - ; Create hashmap from global. This breaks the normal hashmapBinarySet - ; optimization, to test the fallback. + ; Create hashmap from global. %map.new = call %runtime.hashmap* @runtime.hashmapMake(i8 1, i8 1, i32 1, i8* undef, i8* null) store %runtime.hashmap* %map.new, %runtime.hashmap** @main.binaryMap %map = load %runtime.hashmap*, %runtime.hashmap** @main.binaryMap @@ -64,8 +63,7 @@ define internal void @main.testNonConstantBinarySet() { ; operations (with string keys). define internal void @main.testNonConstantStringSet() { %hashmap.value = alloca i8 - ; Create hashmap from global. This breaks the normal hashmapStringSet - ; optimization, to test the fallback. + ; Create hashmap from global. %map.new = call %runtime.hashmap* @runtime.hashmapMake(i8 8, i8 1, i32 1, i8* undef, i8* null) store %runtime.hashmap* %map.new, %runtime.hashmap** @main.stringMap %map = load %runtime.hashmap*, %runtime.hashmap** @main.stringMap diff --git a/interp/testdata/map.out.ll b/interp/testdata/map.out.ll index e891362bc6..a8acc49f3b 100644 --- a/interp/testdata/map.out.ll +++ b/interp/testdata/map.out.ll @@ -2,27 +2,19 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv6m-none-eabi" %runtime.hashmap = type { %runtime.hashmap*, i8*, i32, i8, i8, i8 } -%runtime._string = type { i8*, i32 } @main.m = local_unnamed_addr global %runtime.hashmap* @"main$map" -@main.binaryMap = local_unnamed_addr global %runtime.hashmap* @"main$map.4" -@main.stringMap = local_unnamed_addr global %runtime.hashmap* @"main$map.6" +@main.binaryMap = local_unnamed_addr global %runtime.hashmap* @"main$map.1" +@main.stringMap = local_unnamed_addr global %runtime.hashmap* @"main$map.3" @main.init.string = internal unnamed_addr constant [7 x i8] c"CONNECT" -@"main$mapbucket" = internal unnamed_addr global { [8 x i8], i8*, [8 x i8], [8 x %runtime._string] } { [8 x i8] c"\04\00\00\00\00\00\00\00", i8* null, [8 x i8] c"\01\00\00\00\00\00\00\00", [8 x %runtime._string] [%runtime._string { i8* getelementptr inbounds ([7 x i8], [7 x i8]* @main.init.string, i32 0, i32 0), i32 7 }, %runtime._string zeroinitializer, %runtime._string zeroinitializer, %runtime._string zeroinitializer, %runtime._string zeroinitializer, %runtime._string zeroinitializer, %runtime._string zeroinitializer, %runtime._string zeroinitializer] } -@"main$map" = internal unnamed_addr global %runtime.hashmap { %runtime.hashmap* null, i8* getelementptr inbounds ({ [8 x i8], i8*, [8 x i8], [8 x %runtime._string] }, { [8 x i8], i8*, [8 x i8], [8 x %runtime._string] }* @"main$mapbucket", i32 0, i32 0, i32 0), i32 1, i8 1, i8 8, i8 0 } -@"main$alloca.2" = internal global i8 1 -@"main$alloca.3" = internal global i8 2 -@"main$map.4" = internal unnamed_addr global %runtime.hashmap { %runtime.hashmap* null, i8* null, i32 0, i8 1, i8 1, i8 0 } -@"main$alloca.5" = internal global i8 2 -@"main$map.6" = internal unnamed_addr global %runtime.hashmap { %runtime.hashmap* null, i8* null, i32 0, i8 8, i8 1, i8 0 } - -declare void @runtime.hashmapBinarySet(%runtime.hashmap*, i8*, i8*, i8*, i8*) local_unnamed_addr - -declare void @runtime.hashmapStringSet(%runtime.hashmap*, i8*, i32, i8*, i8*, i8*) local_unnamed_addr +@"main$map" = internal global %runtime.hashmap { %runtime.hashmap* null, i8* getelementptr inbounds ({ [8 x i8], i8*, { i8, [7 x i8] }, { { [7 x i8]*, [4 x i8] }, [56 x i8] } }, { [8 x i8], i8*, { i8, [7 x i8] }, { { [7 x i8]*, [4 x i8] }, [56 x i8] } }* @"main$mapbucket", i32 0, i32 0, i32 0), i32 1, i8 1, i8 8, i8 0 } +@"main$mapbucket" = internal unnamed_addr global { [8 x i8], i8*, { i8, [7 x i8] }, { { [7 x i8]*, [4 x i8] }, [56 x i8] } } { [8 x i8] c"\04\00\00\00\00\00\00\00", i8* null, { i8, [7 x i8] } { i8 1, [7 x i8] zeroinitializer }, { { [7 x i8]*, [4 x i8] }, [56 x i8] } { { [7 x i8]*, [4 x i8] } { [7 x i8]* @main.init.string, [4 x i8] c"\07\00\00\00" }, [56 x i8] zeroinitializer } } +@"main$map.1" = internal global %runtime.hashmap { %runtime.hashmap* null, i8* getelementptr inbounds ({ [8 x i8], i8*, { i8, [7 x i8] }, { i8, [7 x i8] } }, { [8 x i8], i8*, { i8, [7 x i8] }, { i8, [7 x i8] } }* @"main$mapbucket.2", i32 0, i32 0, i32 0), i32 1, i8 1, i8 1, i8 0 } +@"main$mapbucket.2" = internal unnamed_addr global { [8 x i8], i8*, { i8, [7 x i8] }, { i8, [7 x i8] } } { [8 x i8] c"\04\00\00\00\00\00\00\00", i8* null, { i8, [7 x i8] } { i8 1, [7 x i8] zeroinitializer }, { i8, [7 x i8] } { i8 2, [7 x i8] zeroinitializer } } +@"main$map.3" = internal global %runtime.hashmap { %runtime.hashmap* null, i8* getelementptr inbounds ({ [8 x i8], i8*, { { [7 x i8]*, [4 x i8] }, [56 x i8] }, { i8, [7 x i8] } }, { [8 x i8], i8*, { { [7 x i8]*, [4 x i8] }, [56 x i8] }, { i8, [7 x i8] } }* @"main$mapbucket.4", i32 0, i32 0, i32 0), i32 1, i8 8, i8 1, i8 0 } +@"main$mapbucket.4" = internal unnamed_addr global { [8 x i8], i8*, { { [7 x i8]*, [4 x i8] }, [56 x i8] }, { i8, [7 x i8] } } { [8 x i8] c"x\00\00\00\00\00\00\00", i8* null, { { [7 x i8]*, [4 x i8] }, [56 x i8] } { { [7 x i8]*, [4 x i8] } { [7 x i8]* @main.init.string, [4 x i8] c"\07\00\00\00" }, [56 x i8] zeroinitializer }, { i8, [7 x i8] } { i8 2, [7 x i8] zeroinitializer } } define void @runtime.initAll() unnamed_addr { entry: - call void @runtime.hashmapBinarySet(%runtime.hashmap* @"main$map.4", i8* @"main$alloca.2", i8* @"main$alloca.3", i8* undef, i8* null) - call void @runtime.hashmapStringSet(%runtime.hashmap* @"main$map.6", i8* getelementptr inbounds ([7 x i8], [7 x i8]* @main.init.string, i32 0, i32 0), i32 7, i8* @"main$alloca.5", i8* undef, i8* null) ret void } diff --git a/interp/testdata/scan.ll b/interp/testdata/scan.ll deleted file mode 100644 index bf44750e1f..0000000000 --- a/interp/testdata/scan.ll +++ /dev/null @@ -1,78 +0,0 @@ -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64--linux" - -%runtime.typecodeID = type { %runtime.typecodeID*, i64 } - -declare i1 @runtime.typeAssert(i64, %runtime.typecodeID*, i8*, i8*) -declare i1 @runtime.interfaceImplements(i64, i8**) - -define i64 @returnsConst() { - ret i64 0 -} - -define i64 @returnsArg(i64 %arg) { - ret i64 %arg -} - -declare i64 @externalCall() - -define i64 @externalCallOnly() { - %result = call i64 @externalCall() - ret i64 0 -} - -define i64 @externalCallAndReturn() { - %result = call i64 @externalCall() - ret i64 %result -} - -define i64 @externalCallBranch() { - %result = call i64 @externalCall() - %zero = icmp eq i64 %result, 0 - br i1 %zero, label %if.then, label %if.done - -if.then: - ret i64 2 - -if.done: - ret i64 4 -} - -@cleanGlobalInt = global i64 5 -define i64 @readCleanGlobal() { - %global = load i64, i64* @cleanGlobalInt - ret i64 %global -} - -@dirtyGlobalInt = global i64 5 -define i64 @readDirtyGlobal() { - %global = load i64, i64* @dirtyGlobalInt - ret i64 %global -} - -declare i64* @getDirtyPointer() - -define void @storeToPointer() { - %ptr = call i64* @getDirtyPointer() - store i64 3, i64* %ptr - ret void -} - -@functionPointer = global i64()* null -define i64 @callFunctionPointer() { - %fp = load i64()*, i64()** @functionPointer - %result = call i64 %fp() - ret i64 %result -} - -define i1 @callTypeAssert() { - ; Note: parameters are not realistic. - %ok = call i1 @runtime.typeAssert(i64 0, %runtime.typecodeID* null, i8* undef, i8* null) - ret i1 %ok -} - -define i1 @callInterfaceImplements() { - ; Note: parameters are not realistic. - %ok = call i1 @runtime.interfaceImplements(i64 0, i8** null) - ret i1 %ok -} diff --git a/interp/testdata/slice-copy.out.ll b/interp/testdata/slice-copy.out.ll index 2817564333..f3b15950fa 100644 --- a/interp/testdata/slice-copy.out.ll +++ b/interp/testdata/slice-copy.out.ll @@ -1,6 +1,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux" +@"main$alloc.1" = internal unnamed_addr constant [6 x i8] c"\05\00{\00\00\04" + declare void @runtime.printuint8(i8) local_unnamed_addr declare void @runtime.printint16(i16) local_unnamed_addr @@ -15,6 +17,7 @@ entry: call void @runtime.printuint8(i8 3) call void @runtime.printuint8(i8 3) call void @runtime.printint16(i16 5) - call void @runtime.printint16(i16 5) + %int16SliceDst.val = load i16, i16* bitcast ([6 x i8]* @"main$alloc.1" to i16*) + call void @runtime.printint16(i16 %int16SliceDst.val) ret void } diff --git a/interp/utils.go b/interp/utils.go deleted file mode 100644 index c115477133..0000000000 --- a/interp/utils.go +++ /dev/null @@ -1,126 +0,0 @@ -package interp - -import ( - "errors" - - "tinygo.org/x/go-llvm" -) - -// Return a list of values (actually, instructions) where this value is used as -// an operand. -func getUses(value llvm.Value) []llvm.Value { - var uses []llvm.Value - use := value.FirstUse() - for !use.IsNil() { - uses = append(uses, use.User()) - use = use.NextUse() - } - return uses -} - -// getStringBytes loads the byte slice of a Go string represented as a -// {ptr, len} pair. -func getStringBytes(strPtr Value, strLen llvm.Value) ([]byte, error) { - if !strLen.IsConstant() { - return nil, errors.New("getStringBytes with a non-constant length") - } - buf := make([]byte, strLen.ZExtValue()) - for i := range buf { - gep, err := strPtr.GetElementPtr([]uint32{uint32(i)}) - if err != nil { - return nil, err - } - c, err := gep.Load() - if err != nil { - return nil, err - } - buf[i] = byte(c.ZExtValue()) - } - return buf, nil -} - -// getLLVMIndices converts an []uint32 into an []llvm.Value, for use in -// llvm.ConstGEP. -func getLLVMIndices(int32Type llvm.Type, indices []uint32) []llvm.Value { - llvmIndices := make([]llvm.Value, len(indices)) - for i, index := range indices { - llvmIndices[i] = llvm.ConstInt(int32Type, uint64(index), false) - } - return llvmIndices -} - -// Return true if this type is a scalar value (integer or floating point), false -// otherwise. -func isScalar(t llvm.Type) bool { - switch t.TypeKind() { - case llvm.IntegerTypeKind, llvm.FloatTypeKind, llvm.DoubleTypeKind: - return true - default: - return false - } -} - -// isPointerNil returns whether this is a nil pointer or not. The ok value -// indicates whether the result is certain: if it is false the result boolean is -// not valid. -func isPointerNil(v llvm.Value) (result bool, ok bool) { - if !v.IsAConstantExpr().IsNil() { - switch v.Opcode() { - case llvm.IntToPtr: - // Whether a constant inttoptr is nil is easy to - // determine. - result, ok = isZero(v.Operand(0)) - if ok { - return - } - case llvm.BitCast, llvm.GetElementPtr: - // These const instructions are just a kind of wrappers for the - // underlying pointer. - return isPointerNil(v.Operand(0)) - } - } - if !v.IsAConstantPointerNull().IsNil() { - // A constant pointer null is always null, of course. - return true, true - } - if !v.IsAGlobalValue().IsNil() { - // A global value is never null. - return false, true - } - return false, false // not valid -} - -// isZero returns whether the value in v is the integer zero, and whether that -// can be known right now. -func isZero(v llvm.Value) (result bool, ok bool) { - if !v.IsAConstantExpr().IsNil() { - switch v.Opcode() { - case llvm.PtrToInt: - return isPointerNil(v.Operand(0)) - } - } - if !v.IsAConstantInt().IsNil() { - val := v.ZExtValue() - return val == 0, true - } - return false, false // not valid -} - -// unwrap returns the underlying value, with GEPs removed. This can be useful to -// get the underlying global of a GEP pointer. -func unwrap(value llvm.Value) llvm.Value { - for { - if !value.IsAConstantExpr().IsNil() { - switch value.Opcode() { - case llvm.GetElementPtr: - value = value.Operand(0) - continue - } - } else if !value.IsAGetElementPtrInst().IsNil() { - value = value.Operand(0) - continue - } - break - } - return value -} diff --git a/interp/values.go b/interp/values.go deleted file mode 100644 index ae29bf99d2..0000000000 --- a/interp/values.go +++ /dev/null @@ -1,443 +0,0 @@ -package interp - -// This file provides a litte bit of abstraction around LLVM values. - -import ( - "errors" - "strconv" - - "tinygo.org/x/go-llvm" -) - -// A Value is a LLVM value with some extra methods attached for easier -// interpretation. -type Value interface { - Value() llvm.Value // returns a LLVM value - Type() llvm.Type // equal to Value().Type() - IsConstant() bool // returns true if this value is a constant value - Load() (llvm.Value, error) // dereference a pointer - Store(llvm.Value) error // store to a pointer - GetElementPtr([]uint32) (Value, error) // returns an interior pointer - String() string // string representation, for debugging -} - -// A type that simply wraps a LLVM constant value. -type LocalValue struct { - Eval *Eval - Underlying llvm.Value -} - -// Value implements Value by returning the constant value itself. -func (v *LocalValue) Value() llvm.Value { - return v.Underlying -} - -func (v *LocalValue) Type() llvm.Type { - return v.Underlying.Type() -} - -func (v *LocalValue) IsConstant() bool { - if _, ok := v.Eval.dirtyGlobals[unwrap(v.Underlying)]; ok { - return false - } - return v.Underlying.IsConstant() -} - -// Load loads a constant value if this is a constant pointer. -func (v *LocalValue) Load() (llvm.Value, error) { - if !v.Underlying.IsAGlobalVariable().IsNil() { - return v.Underlying.Initializer(), nil - } - switch v.Underlying.Opcode() { - case llvm.GetElementPtr: - indices := v.getConstGEPIndices() - if indices[0] != 0 { - return llvm.Value{}, errors.New("invalid GEP") - } - global := v.Eval.getValue(v.Underlying.Operand(0)) - agg, err := global.Load() - if err != nil { - return llvm.Value{}, err - } - return llvm.ConstExtractValue(agg, indices[1:]), nil - case llvm.BitCast: - return llvm.Value{}, errors.New("interp: load from a bitcast") - default: - return llvm.Value{}, errors.New("interp: load from a constant") - } -} - -// Store stores to the underlying value if the value type is a pointer type, -// otherwise it returns an error. -func (v *LocalValue) Store(value llvm.Value) error { - if !v.Underlying.IsAGlobalVariable().IsNil() { - if !value.IsConstant() { - v.MarkDirty() - v.Eval.builder.CreateStore(value, v.Underlying) - } else { - v.Underlying.SetInitializer(value) - } - return nil - } - if !value.IsConstant() { - v.MarkDirty() - v.Eval.builder.CreateStore(value, v.Underlying) - return nil - } - switch v.Underlying.Opcode() { - case llvm.GetElementPtr: - indices := v.getConstGEPIndices() - if indices[0] != 0 { - return errors.New("invalid GEP") - } - global := &LocalValue{v.Eval, v.Underlying.Operand(0)} - agg, err := global.Load() - if err != nil { - return err - } - agg = llvm.ConstInsertValue(agg, value, indices[1:]) - return global.Store(agg) - default: - return errors.New("interp: store on a constant") - } -} - -// GetElementPtr returns a GEP when the underlying value is of pointer type. -func (v *LocalValue) GetElementPtr(indices []uint32) (Value, error) { - if !v.Underlying.IsAGlobalVariable().IsNil() { - int32Type := v.Underlying.Type().Context().Int32Type() - gep := llvm.ConstGEP(v.Underlying, getLLVMIndices(int32Type, indices)) - return &LocalValue{v.Eval, gep}, nil - } - if !v.Underlying.IsAConstantExpr().IsNil() { - switch v.Underlying.Opcode() { - case llvm.GetElementPtr, llvm.IntToPtr, llvm.BitCast: - int32Type := v.Underlying.Type().Context().Int32Type() - llvmIndices := getLLVMIndices(int32Type, indices) - return &LocalValue{v.Eval, llvm.ConstGEP(v.Underlying, llvmIndices)}, nil - } - } - return nil, errors.New("interp: unknown GEP") -} - -// stripPointerCasts removes all const bitcasts from pointer values, if there -// are any. -func (v *LocalValue) stripPointerCasts() *LocalValue { - value := v.Underlying - for { - if !value.IsAConstantExpr().IsNil() { - switch value.Opcode() { - case llvm.BitCast: - value = value.Operand(0) - continue - } - } - return &LocalValue{ - Eval: v.Eval, - Underlying: value, - } - } -} - -func (v *LocalValue) String() string { - isConstant := "false" - if v.IsConstant() { - isConstant = "true" - } - return "&LocalValue{Type: " + v.Type().String() + ", IsConstant: " + isConstant + "}" -} - -// getConstGEPIndices returns indices of this constant GEP, if this is a GEP -// instruction. If it is not, the behavior is undefined. -func (v *LocalValue) getConstGEPIndices() []uint32 { - indices := make([]uint32, v.Underlying.OperandsCount()-1) - for i := range indices { - operand := v.Underlying.Operand(i + 1) - indices[i] = uint32(operand.ZExtValue()) - } - return indices -} - -// MarkDirty marks this global as dirty, meaning that every load from and store -// to this global (from now on) must be performed at runtime. -func (v *LocalValue) MarkDirty() { - underlying := unwrap(v.Underlying) - if underlying.IsAGlobalVariable().IsNil() { - panic("trying to mark a non-global as dirty") - } - if !v.IsConstant() { - return // already dirty - } - v.Eval.dirtyGlobals[underlying] = struct{}{} -} - -// MapValue implements a Go map which is created at compile time and stored as a -// global variable. -type MapValue struct { - Eval *Eval - PkgName string - Underlying llvm.Value - Keys []Value - Values []Value - KeySize int - ValueSize int - KeyType llvm.Type - ValueType llvm.Type -} - -func (v *MapValue) newBucket() llvm.Value { - ctx := v.Eval.Mod.Context() - i8ptrType := llvm.PointerType(ctx.Int8Type(), 0) - bucketType := ctx.StructType([]llvm.Type{ - llvm.ArrayType(ctx.Int8Type(), 8), // tophash - i8ptrType, // next bucket - llvm.ArrayType(v.KeyType, 8), // key type - llvm.ArrayType(v.ValueType, 8), // value type - }, false) - bucketValue := llvm.ConstNull(bucketType) - bucket := llvm.AddGlobal(v.Eval.Mod, bucketType, v.PkgName+"$mapbucket") - bucket.SetInitializer(bucketValue) - bucket.SetLinkage(llvm.InternalLinkage) - bucket.SetUnnamedAddr(true) - return bucket -} - -// Value returns a global variable which is a pointer to the actual hashmap. -func (v *MapValue) Value() llvm.Value { - if !v.Underlying.IsNil() { - return v.Underlying - } - - ctx := v.Eval.Mod.Context() - i8ptrType := llvm.PointerType(ctx.Int8Type(), 0) - - var firstBucketGlobal llvm.Value - if len(v.Keys) == 0 { - // there are no buckets - firstBucketGlobal = llvm.ConstPointerNull(i8ptrType) - } else { - // create initial bucket - firstBucketGlobal = v.newBucket() - } - - // Insert each key/value pair in the hashmap. - bucketGlobal := firstBucketGlobal - for i, key := range v.Keys { - var keyBuf []byte - llvmKey := key.Value() - llvmValue := v.Values[i].Value() - if key.Type().TypeKind() == llvm.StructTypeKind && key.Type().StructName() == "runtime._string" { - keyPtr := llvm.ConstExtractValue(llvmKey, []uint32{0}) - keyLen := llvm.ConstExtractValue(llvmKey, []uint32{1}) - keyPtrVal := v.Eval.getValue(keyPtr) - var err error - keyBuf, err = getStringBytes(keyPtrVal, keyLen) - if err != nil { - panic(err) // TODO - } - } else if key.Type().TypeKind() == llvm.IntegerTypeKind { - keyBuf = make([]byte, v.Eval.TargetData.TypeAllocSize(key.Type())) - n := key.Value().ZExtValue() - for i := range keyBuf { - keyBuf[i] = byte(n) - n >>= 8 - } - } else if key.Type().TypeKind() == llvm.ArrayTypeKind && - key.Type().ElementType().TypeKind() == llvm.IntegerTypeKind && - key.Type().ElementType().IntTypeWidth() == 8 { - keyBuf = make([]byte, v.Eval.TargetData.TypeAllocSize(key.Type())) - for i := range keyBuf { - keyBuf[i] = byte(llvm.ConstExtractValue(llvmKey, []uint32{uint32(i)}).ZExtValue()) - } - } else { - panic("interp: map key type not implemented: " + key.Type().String()) - } - hash := v.hash(keyBuf) - - if i%8 == 0 && i != 0 { - // Bucket is full, create a new one. - newBucketGlobal := v.newBucket() - zero := llvm.ConstInt(ctx.Int32Type(), 0, false) - newBucketPtr := llvm.ConstInBoundsGEP(newBucketGlobal, []llvm.Value{zero}) - newBucketPtrCast := llvm.ConstBitCast(newBucketPtr, i8ptrType) - // insert pointer into old bucket - bucket := bucketGlobal.Initializer() - bucket = llvm.ConstInsertValue(bucket, newBucketPtrCast, []uint32{1}) - bucketGlobal.SetInitializer(bucket) - // switch to next bucket - bucketGlobal = newBucketGlobal - } - - tophashValue := llvm.ConstInt(ctx.Int8Type(), uint64(v.topHash(hash)), false) - bucket := bucketGlobal.Initializer() - bucket = llvm.ConstInsertValue(bucket, tophashValue, []uint32{0, uint32(i % 8)}) - bucket = llvm.ConstInsertValue(bucket, llvmKey, []uint32{2, uint32(i % 8)}) - bucket = llvm.ConstInsertValue(bucket, llvmValue, []uint32{3, uint32(i % 8)}) - bucketGlobal.SetInitializer(bucket) - } - - // Create the hashmap itself. - zero := llvm.ConstInt(ctx.Int32Type(), 0, false) - bucketPtr := llvm.ConstInBoundsGEP(firstBucketGlobal, []llvm.Value{zero}) - hashmapType := v.Type() - hashmap := llvm.ConstNamedStruct(hashmapType, []llvm.Value{ - llvm.ConstPointerNull(llvm.PointerType(hashmapType, 0)), // next - llvm.ConstBitCast(bucketPtr, i8ptrType), // buckets - llvm.ConstInt(hashmapType.StructElementTypes()[2], uint64(len(v.Keys)), false), // count - llvm.ConstInt(ctx.Int8Type(), uint64(v.KeySize), false), // keySize - llvm.ConstInt(ctx.Int8Type(), uint64(v.ValueSize), false), // valueSize - llvm.ConstInt(ctx.Int8Type(), 0, false), // bucketBits - }) - - // Create a pointer to this hashmap. - hashmapPtr := llvm.AddGlobal(v.Eval.Mod, hashmap.Type(), v.PkgName+"$map") - hashmapPtr.SetInitializer(hashmap) - hashmapPtr.SetLinkage(llvm.InternalLinkage) - hashmapPtr.SetUnnamedAddr(true) - v.Underlying = llvm.ConstInBoundsGEP(hashmapPtr, []llvm.Value{zero}) - return v.Underlying -} - -// Type returns type runtime.hashmap, which is the actual hashmap type. -func (v *MapValue) Type() llvm.Type { - return v.Eval.Mod.GetTypeByName("runtime.hashmap") -} - -func (v *MapValue) IsConstant() bool { - return true // TODO: dirty maps -} - -// Load panics: maps are of reference type so cannot be dereferenced. -func (v *MapValue) Load() (llvm.Value, error) { - panic("interp: load from a map") -} - -// Store returns an error: maps are of reference type so cannot be stored to. -func (v *MapValue) Store(value llvm.Value) error { - // This must be a bug, but it might be helpful to indicate the location - // anyway. - return errors.New("interp: store on a map") -} - -// GetElementPtr panics: maps are of reference type so their (interior) -// addresses cannot be calculated. -func (v *MapValue) GetElementPtr(indices []uint32) (Value, error) { - return nil, errors.New("interp: GEP on a map") -} - -// PutString does a map assign operation, assuming that the map is of type -// map[string]T. -func (v *MapValue) PutString(keyBuf, keyLen, valPtr *LocalValue) error { - if !v.Underlying.IsNil() { - return errors.New("map already created") - } - - if valPtr.Underlying.Opcode() == llvm.BitCast { - valPtr = &LocalValue{v.Eval, valPtr.Underlying.Operand(0)} - } - value, err := valPtr.Load() - if err != nil { - return err - } - if v.ValueType.IsNil() { - v.ValueType = value.Type() - if int(v.Eval.TargetData.TypeAllocSize(v.ValueType)) != v.ValueSize { - return errors.New("interp: map store value type has the wrong size") - } - } else { - if value.Type() != v.ValueType { - return errors.New("interp: map store value type is inconsistent") - } - } - - keyType := v.Eval.Mod.GetTypeByName("runtime._string") - v.KeyType = keyType - key := llvm.ConstNull(keyType) - key = llvm.ConstInsertValue(key, keyBuf.Value(), []uint32{0}) - key = llvm.ConstInsertValue(key, keyLen.Value(), []uint32{1}) - - // TODO: avoid duplicate keys - v.Keys = append(v.Keys, &LocalValue{v.Eval, key}) - v.Values = append(v.Values, &LocalValue{v.Eval, value}) - - return nil -} - -// PutBinary does a map assign operation. -func (v *MapValue) PutBinary(keyPtr, valPtr *LocalValue) error { - if !v.Underlying.IsNil() { - return errors.New("map already created") - } - - if valPtr.Underlying.Opcode() == llvm.BitCast { - valPtr = &LocalValue{v.Eval, valPtr.Underlying.Operand(0)} - } - value, err := valPtr.Load() - if err != nil { - return err - } - if v.ValueType.IsNil() { - v.ValueType = value.Type() - if int(v.Eval.TargetData.TypeAllocSize(v.ValueType)) != v.ValueSize { - return errors.New("interp: map store value type has the wrong size") - } - } else { - if value.Type() != v.ValueType { - return errors.New("interp: map store value type is inconsistent") - } - } - - if !keyPtr.Underlying.IsAConstantExpr().IsNil() { - if keyPtr.Underlying.Opcode() == llvm.BitCast { - keyPtr = &LocalValue{v.Eval, keyPtr.Underlying.Operand(0)} - } else if keyPtr.Underlying.Opcode() == llvm.GetElementPtr { - keyPtr = &LocalValue{v.Eval, keyPtr.Underlying.Operand(0)} - } - } - key, err := keyPtr.Load() - if err != nil { - return err - } - if v.KeyType.IsNil() { - v.KeyType = key.Type() - if int(v.Eval.TargetData.TypeAllocSize(v.KeyType)) != v.KeySize { - return errors.New("interp: map store key type has the wrong size") - } - } else { - if key.Type() != v.KeyType { - return errors.New("interp: map store key type is inconsistent") - } - } - - // TODO: avoid duplicate keys - v.Keys = append(v.Keys, &LocalValue{v.Eval, key}) - v.Values = append(v.Values, &LocalValue{v.Eval, value}) - - return nil -} - -// Get FNV-1a hash of this string. -// -// https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash -func (v *MapValue) hash(data []byte) uint32 { - var result uint32 = 2166136261 // FNV offset basis - for _, c := range data { - result ^= uint32(c) - result *= 16777619 // FNV prime - } - return result -} - -// Get the topmost 8 bits of the hash, without using a special value (like 0). -func (v *MapValue) topHash(hash uint32) uint8 { - tophash := uint8(hash >> 24) - if tophash < 1 { - // 0 means empty slot, so make it bigger. - tophash += 1 - } - return tophash -} - -func (v *MapValue) String() string { - return "&MapValue{KeySize: " + strconv.Itoa(v.KeySize) + ", ValueSize: " + strconv.Itoa(v.ValueSize) + "}" -} diff --git a/src/runtime/interface.go b/src/runtime/interface.go index 9a327510ba..870ba6ab10 100644 --- a/src/runtime/interface.go +++ b/src/runtime/interface.go @@ -123,7 +123,7 @@ type structField struct { // than a function call. Also, by keeping the method set around it is easier to // implement interfaceImplements in the interp package. type typeInInterface struct { - typecode *typecodeID + typecode *typecodeID // element type, underlying type, or reference to struct fields methodSet *interfaceMethodInfo // nil or a GEP of an array }