Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,11 @@ tinygo-test:
$(TINYGO) test encoding/ascii85
$(TINYGO) test encoding/base32
$(TINYGO) test encoding/hex
$(TINYGO) test hash/adler32
$(TINYGO) test hash/fnv
$(TINYGO) test hash/crc64
$(TINYGO) test math
$(TINYGO) test math/cmplx
$(TINYGO) test text/scanner
$(TINYGO) test unicode/utf8

Expand Down
4 changes: 1 addition & 3 deletions compiler/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,9 +465,7 @@ func (c *compilerContext) getInterfaceInvokeWrapper(f *ir.Function) llvm.Value {
paramTypes := append([]llvm.Type{c.i8ptrType}, fnType.ParamTypes()[len(expandedReceiverType):]...)
wrapFnType := llvm.FunctionType(fnType.ReturnType(), paramTypes, false)
wrapper = llvm.AddFunction(c.mod, wrapperName, wrapFnType)
if f.LLVMFn.LastParam().Name() == "parentHandle" {
wrapper.LastParam().SetName("parentHandle")
}
wrapper.LastParam().SetName("parentHandle")

wrapper.SetLinkage(llvm.InternalLinkage)
wrapper.SetUnnamedAddr(true)
Expand Down
68 changes: 44 additions & 24 deletions compiler/llvmutil/wordpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
packedType := ctx.StructType(valueTypes, false)

// Allocate memory for the packed data.
var packedAlloc, packedHeapAlloc llvm.Value
size := targetData.TypeAllocSize(packedType)
if size == 0 {
return llvm.ConstPointerNull(i8ptrType)
Expand All @@ -39,9 +38,39 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
// Try to keep this cast in SSA form.
return builder.CreateIntToPtr(values[0], i8ptrType, "pack.int")
}

// Because packedType is a struct and we have to cast it to a *i8, store
// it in an alloca first for bitcasting (store+bitcast+load).
packedAlloc, _, _ = CreateTemporaryAlloca(builder, mod, packedType, "")
// it in a *i8 alloca first and load the *i8 value from there. This is
// effectively a bitcast.
packedAlloc, _, _ := CreateTemporaryAlloca(builder, mod, i8ptrType, "")

if size < targetData.TypeAllocSize(i8ptrType) {
// The alloca is bigger than the value that will be stored in it.
// To avoid having some bits undefined, zero the alloca first.
// Hopefully this will get optimized away.
builder.CreateStore(llvm.ConstNull(i8ptrType), packedAlloc)
}

// Store all values in the alloca.
packedAllocCast := builder.CreateBitCast(packedAlloc, llvm.PointerType(packedType, 0), "")
for i, value := range values {
indices := []llvm.Value{
llvm.ConstInt(ctx.Int32Type(), 0, false),
llvm.ConstInt(ctx.Int32Type(), uint64(i), false),
}
gep := builder.CreateInBoundsGEP(packedAllocCast, indices, "")
builder.CreateStore(value, gep)
}

// Load value (the *i8) from the alloca.
result := builder.CreateLoad(packedAlloc, "")

// End the lifetime of the alloca, to help the optimizer.
packedPtr := builder.CreateBitCast(packedAlloc, i8ptrType, "")
packedSize := llvm.ConstInt(ctx.Int64Type(), targetData.TypeAllocSize(packedAlloc.Type()), false)
EmitLifetimeEnd(builder, mod, packedPtr, packedSize)

return result
} else {
// Check if the values are all constants.
constant := true
Expand All @@ -67,7 +96,7 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
// Packed data is bigger than a pointer, so allocate it on the heap.
sizeValue := llvm.ConstInt(uintptrType, size, false)
alloc := mod.NamedFunction("runtime.alloc")
packedHeapAlloc = builder.CreateCall(alloc, []llvm.Value{
packedHeapAlloc := builder.CreateCall(alloc, []llvm.Value{
sizeValue,
llvm.Undef(i8ptrType), // unused context parameter
llvm.ConstPointerNull(i8ptrType), // coroutine handle
Expand All @@ -80,28 +109,19 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
llvm.ConstPointerNull(i8ptrType), // coroutine handle
}, "")
}
packedAlloc = builder.CreateBitCast(packedHeapAlloc, llvm.PointerType(packedType, 0), "")
}
// Store all values in the alloca or heap pointer.
for i, value := range values {
indices := []llvm.Value{
llvm.ConstInt(ctx.Int32Type(), 0, false),
llvm.ConstInt(ctx.Int32Type(), uint64(i), false),
packedAlloc := builder.CreateBitCast(packedHeapAlloc, llvm.PointerType(packedType, 0), "")

// Store all values in the heap pointer.
for i, value := range values {
indices := []llvm.Value{
llvm.ConstInt(ctx.Int32Type(), 0, false),
llvm.ConstInt(ctx.Int32Type(), uint64(i), false),
}
gep := builder.CreateInBoundsGEP(packedAlloc, indices, "")
builder.CreateStore(value, gep)
}
gep := builder.CreateInBoundsGEP(packedAlloc, indices, "")
builder.CreateStore(value, gep)
}

if packedHeapAlloc.IsNil() {
// Load value (as *i8) from the alloca.
packedAlloc = builder.CreateBitCast(packedAlloc, llvm.PointerType(i8ptrType, 0), "")
result := builder.CreateLoad(packedAlloc, "")
packedPtr := builder.CreateBitCast(packedAlloc, i8ptrType, "")
packedSize := llvm.ConstInt(ctx.Int64Type(), targetData.TypeAllocSize(packedAlloc.Type()), false)
EmitLifetimeEnd(builder, mod, packedPtr, packedSize)
return result
} else {
// Get the original heap allocation pointer, which already is an *i8.
// Return the original heap allocation pointer, which already is an *i8.
return packedHeapAlloc
}
}
Expand Down
131 changes: 93 additions & 38 deletions interp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,50 +6,81 @@ possible and only run unknown expressions (e.g. external calls) at runtime. This
is in practice a partial evaluator of the `runtime.initAll` function, which
calls each package initializer.

It works by directly interpreting LLVM IR:

* Almost all operations work directly on constants, and are implemented using
the llvm.Const* set of functions that are evaluated directly.
* External function calls and some other operations (inline assembly, volatile
load, volatile store) are seen as having limited side effects. Limited in
the sense that it is known at compile time which globals it affects, which
then are marked 'dirty' (meaning, further operations on it must be done at
runtime). These operations are emitted directly in the `runtime.initAll`
function. Return values are also considered 'dirty'.
* Such 'dirty' objects and local values must be executed at runtime instead of
at compile time. This dirtyness propagates further through the IR, for
example storing a dirty local value to a global also makes the global dirty,
meaning that the global may not be read or written at compile time as it's
contents at that point during interpretation is unknown.
* There are some heuristics in place to avoid doing too much with dirty
values. For example, a branch based on a dirty local marks the whole
function itself as having side effect (as if it is an external function).
However, all globals it touches are still taken into account and when a call
is inserted in `runtime.initAll`, all globals it references are also marked
dirty.
* Heap allocation (`runtime.alloc`) is emulated by creating new objects. The
value in the allocation is the initializer of the global, the zero value is
the zero initializer.
* Stack allocation (`alloca`) is often emulated using a fake alloca object,
until the address of the alloca is taken in which case it is also created as
a real `alloca` in `runtime.initAll` and marked dirty. This may be necessary
when calling an external function with the given alloca as paramter.
This package is a rewrite of a previous partial evaluator that worked
directly on LLVM IR and used the module and LLVM constants as intermediate
values. This newer version instead uses a mostly Go intermediate form. It
compiles functions and extracts relevant data first (compiler.go), then
executes those functions (interpreter.go) in a memory space that can be
rolled back per function (memory.go). This means that it is not necessary to
scan functions to see whether they can be run at compile time, which was very
error prone. Instead it just tries to execute everything and if it hits
something it cannot interpret (such as a store to memory-mapped I/O) it rolls
back the execution of that function and runs the function at runtime instead.
All in all, this design provides several benefits:

* Much better error handling. By being able to revert to runtime execution
without the need for scanning functions, this version is able to
automatically work around many bugs in the previous implementation.
* More correct memory model. This is not inherent to the new design, but the
new design also made the memory model easier to reason about.
* Faster execution of initialization code. While it is not much faster for
normal interpretation (maybe 25% or so) due to the compilation overhead,
it should be a whole lot faster for loops as it doesn't have to call into
LLVM (via CGo) for every operation.

As mentioned, this partial evaulator comes in three parts: a compiler, an
interpreter, and a memory manager.

## Compiler

The main task of the compiler is that it extracts all necessary data from
every instruction in a function so that when this instruction is interpreted,
no additional CGo calls are necessary. This is not currently done for all
instructions (`runtime.alloc` is a notable exception), but at least it does
so for the vast majority of instructions.

## Interpreter

The interpreter runs an instruction just like it would if it were executed
'for real'. The vast majority of instructions can be executed at compile
time. As indicated above, some instructions need to be executed at runtime
instead.

## Memory

Memory is represented as objects (the `object` type) that contains data that
will eventually be stored in a global and values (the `value` interface) that
can be worked with while running the interpreter. Values therefore are only
used locally and are always passed by value (just like most LLVM constants)
while objects represent the backing storage (like LLVM globals). Some values
are pointer values, and point to an object.

Importantly, this partial evaluator can roll back the execution of a
function. This is implemented by creating a new memory view per function
activation, which makes sure that any change to a global (such as a store
instruction) is stored in the memory view. It creates a copy of the object
and stores that in the memory view to be modified. Once the function has
executed successfully, all these modified objects are then copied into the
parent function, up to the root function invocation which (on successful
execution) writes the values back into the LLVM module. This way, function
invocations can be rolled back without leaving a trace.

Pointer values point to memory objects, but not to a particular memory
object. Every memory object is given an index, and pointers use that index to
look up the current active object for the pointer to load from or to copy
when storing to it.

Rolling back a function should roll back everyting, including the few
instructions emitted at runtime. This is done by treating instructions much
like memory objects and removing the created instructions when necessary.

## Why is this necessary?

A partial evaluator is hard to get right, so why go through all the trouble of
writing one?

The main reason is that the previous attempt wasn't complete and wasn't sound.
It simply tried to evaluate Go SSA directly, which was good but more difficult
than necessary. An IR based interpreter needs to understand fewer instructions
as the LLVM IR simply has less (complex) instructions than Go SSA. Also, LLVM
provides some useful tools like easily getting all uses of a function or global,
which Go SSA does not provide.

But why is it necessary at all? The answer is that globals with initializers are
much easier to optimize by LLVM than initialization code. Also, there are a few
other benefits:
The answer is that globals with initializers are much easier to optimize by
LLVM than initialization code. Also, there are a few other benefits:

* Dead globals are trivial to optimize away.
* Constant globals are easier to detect. Remember that Go does not have global
Expand All @@ -60,5 +91,29 @@ other benefits:
* Constants are much more efficent on microcontrollers, as they can be
allocated in flash instead of RAM.

The Go SSA package does not create constant initializers for globals.
Instead, it emits initialization functions, so if you write the following:

```go
var foo = []byte{1, 2, 3, 4}
```

It would generate something like this:

```go
var foo []byte

func init() {
foo = make([]byte, 4)
foo[0] = 1
foo[1] = 2
foo[2] = 3
foo[3] = 4
}
```

This is of course hugely wasteful, it's much better to create `foo` as a
global array instead of initializing it at runtime.

For more details, see [this section of the
documentation](https://tinygo.org/compiler-internals/differences-from-go/).
Loading