tinygo-org · deadprogram · Dec 22, 2020 · Oct 7, 2020 · Nov 4, 2020 · Sep 27, 2020
diff --git a/Makefile b/Makefile
@@ -187,9 +187,11 @@ tinygo-test:
 	$(TINYGO) test encoding/ascii85
 	$(TINYGO) test encoding/base32
 	$(TINYGO) test encoding/hex
+	$(TINYGO) test hash/adler32
 	$(TINYGO) test hash/fnv
 	$(TINYGO) test hash/crc64
 	$(TINYGO) test math
+	$(TINYGO) test math/cmplx
 	$(TINYGO) test text/scanner
 	$(TINYGO) test unicode/utf8
 

diff --git a/compiler/interface.go b/compiler/interface.go
@@ -465,9 +465,7 @@ func (c *compilerContext) getInterfaceInvokeWrapper(f *ir.Function) llvm.Value {
 	paramTypes := append([]llvm.Type{c.i8ptrType}, fnType.ParamTypes()[len(expandedReceiverType):]...)
 	wrapFnType := llvm.FunctionType(fnType.ReturnType(), paramTypes, false)
 	wrapper = llvm.AddFunction(c.mod, wrapperName, wrapFnType)
-	if f.LLVMFn.LastParam().Name() == "parentHandle" {
-		wrapper.LastParam().SetName("parentHandle")
-	}
+	wrapper.LastParam().SetName("parentHandle")
 
 	wrapper.SetLinkage(llvm.InternalLinkage)
 	wrapper.SetUnnamedAddr(true)

diff --git a/compiler/llvmutil/wordpack.go b/compiler/llvmutil/wordpack.go
@@ -26,7 +26,6 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
 	packedType := ctx.StructType(valueTypes, false)
 
 	// Allocate memory for the packed data.
-	var packedAlloc, packedHeapAlloc llvm.Value
 	size := targetData.TypeAllocSize(packedType)
 	if size == 0 {
 		return llvm.ConstPointerNull(i8ptrType)
@@ -39,9 +38,39 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
 			// Try to keep this cast in SSA form.
 			return builder.CreateIntToPtr(values[0], i8ptrType, "pack.int")
 		}
+
 		// Because packedType is a struct and we have to cast it to a *i8, store
-		// it in an alloca first for bitcasting (store+bitcast+load).
-		packedAlloc, _, _ = CreateTemporaryAlloca(builder, mod, packedType, "")
+		// it in a *i8 alloca first and load the *i8 value from there. This is
+		// effectively a bitcast.
+		packedAlloc, _, _ := CreateTemporaryAlloca(builder, mod, i8ptrType, "")
+
+		if size < targetData.TypeAllocSize(i8ptrType) {
+			// The alloca is bigger than the value that will be stored in it.
+			// To avoid having some bits undefined, zero the alloca first.
+			// Hopefully this will get optimized away.
+			builder.CreateStore(llvm.ConstNull(i8ptrType), packedAlloc)
+		}
+
+		// Store all values in the alloca.
+		packedAllocCast := builder.CreateBitCast(packedAlloc, llvm.PointerType(packedType, 0), "")
+		for i, value := range values {
+			indices := []llvm.Value{
+				llvm.ConstInt(ctx.Int32Type(), 0, false),
+				llvm.ConstInt(ctx.Int32Type(), uint64(i), false),
+			}
+			gep := builder.CreateInBoundsGEP(packedAllocCast, indices, "")
+			builder.CreateStore(value, gep)
+		}
+
+		// Load value (the *i8) from the alloca.
+		result := builder.CreateLoad(packedAlloc, "")
+
+		// End the lifetime of the alloca, to help the optimizer.
+		packedPtr := builder.CreateBitCast(packedAlloc, i8ptrType, "")
+		packedSize := llvm.ConstInt(ctx.Int64Type(), targetData.TypeAllocSize(packedAlloc.Type()), false)
+		EmitLifetimeEnd(builder, mod, packedPtr, packedSize)
+
+		return result
 	} else {
 		// Check if the values are all constants.
 		constant := true
@@ -67,7 +96,7 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
 		// Packed data is bigger than a pointer, so allocate it on the heap.
 		sizeValue := llvm.ConstInt(uintptrType, size, false)
 		alloc := mod.NamedFunction("runtime.alloc")
-		packedHeapAlloc = builder.CreateCall(alloc, []llvm.Value{
+		packedHeapAlloc := builder.CreateCall(alloc, []llvm.Value{
 			sizeValue,
 			llvm.Undef(i8ptrType),            // unused context parameter
 			llvm.ConstPointerNull(i8ptrType), // coroutine handle
@@ -80,28 +109,19 @@ func EmitPointerPack(builder llvm.Builder, mod llvm.Module, config *compileopts.
 				llvm.ConstPointerNull(i8ptrType), // coroutine handle
 			}, "")
 		}
-		packedAlloc = builder.CreateBitCast(packedHeapAlloc, llvm.PointerType(packedType, 0), "")
-	}
-	// Store all values in the alloca or heap pointer.
-	for i, value := range values {
-		indices := []llvm.Value{
-			llvm.ConstInt(ctx.Int32Type(), 0, false),
-			llvm.ConstInt(ctx.Int32Type(), uint64(i), false),
+		packedAlloc := builder.CreateBitCast(packedHeapAlloc, llvm.PointerType(packedType, 0), "")
+
+		// Store all values in the heap pointer.
+		for i, value := range values {
+			indices := []llvm.Value{
+				llvm.ConstInt(ctx.Int32Type(), 0, false),
+				llvm.ConstInt(ctx.Int32Type(), uint64(i), false),
+			}
+			gep := builder.CreateInBoundsGEP(packedAlloc, indices, "")
+			builder.CreateStore(value, gep)
 		}
-		gep := builder.CreateInBoundsGEP(packedAlloc, indices, "")
-		builder.CreateStore(value, gep)
-	}
 
-	if packedHeapAlloc.IsNil() {
-		// Load value (as *i8) from the alloca.
-		packedAlloc = builder.CreateBitCast(packedAlloc, llvm.PointerType(i8ptrType, 0), "")
-		result := builder.CreateLoad(packedAlloc, "")
-		packedPtr := builder.CreateBitCast(packedAlloc, i8ptrType, "")
-		packedSize := llvm.ConstInt(ctx.Int64Type(), targetData.TypeAllocSize(packedAlloc.Type()), false)
-		EmitLifetimeEnd(builder, mod, packedPtr, packedSize)
-		return result
-	} else {
-		// Get the original heap allocation pointer, which already is an *i8.
+		// Return the original heap allocation pointer, which already is an *i8.
 		return packedHeapAlloc
 	}
 }

diff --git a/interp/README.md b/interp/README.md
@@ -6,50 +6,81 @@ possible and only run unknown expressions (e.g. external calls) at runtime. This
 is in practice a partial evaluator of the `runtime.initAll` function, which
 calls each package initializer.
 
-It works by directly interpreting LLVM IR:
-
-  * Almost all operations work directly on constants, and are implemented using
-    the llvm.Const* set of functions that are evaluated directly.
-  * External function calls and some other operations (inline assembly, volatile
-    load, volatile store) are seen as having limited side effects. Limited in
-    the sense that it is known at compile time which globals it affects, which
-    then are marked 'dirty' (meaning, further operations on it must be done at
-    runtime). These operations are emitted directly in the `runtime.initAll`
-    function. Return values are also considered 'dirty'.
-  * Such 'dirty' objects and local values must be executed at runtime instead of
-    at compile time. This dirtyness propagates further through the IR, for
-    example storing a dirty local value to a global also makes the global dirty,
-    meaning that the global may not be read or written at compile time as it's
-    contents at that point during interpretation is unknown.
-  * There are some heuristics in place to avoid doing too much with dirty
-    values. For example, a branch based on a dirty local marks the whole
-    function itself as having side effect (as if it is an external function).
-    However, all globals it touches are still taken into account and when a call
-    is inserted in `runtime.initAll`, all globals it references are also marked
-    dirty.
-  * Heap allocation (`runtime.alloc`) is emulated by creating new objects. The
-    value in the allocation is the initializer of the global, the zero value is
-    the zero initializer.
-  * Stack allocation (`alloca`) is often emulated using a fake alloca object,
-    until the address of the alloca is taken in which case it is also created as
-    a real `alloca` in `runtime.initAll` and marked dirty. This may be necessary
-    when calling an external function with the given alloca as paramter.
+This package is a rewrite of a previous partial evaluator that worked
+directly on LLVM IR and used the module and LLVM constants as intermediate
+values. This newer version instead uses a mostly Go intermediate form. It
+compiles functions and extracts relevant data first (compiler.go), then
+executes those functions (interpreter.go) in a memory space that can be
+rolled back per function (memory.go). This means that it is not necessary to
+scan functions to see whether they can be run at compile time, which was very
+error prone. Instead it just tries to execute everything and if it hits
+something it cannot interpret (such as a store to memory-mapped I/O) it rolls
+back the execution of that function and runs the function at runtime instead.
+All in all, this design provides several benefits:
+
+  * Much better error handling. By being able to revert to runtime execution
+    without the need for scanning functions, this version is able to
+    automatically work around many bugs in the previous implementation.
+  * More correct memory model. This is not inherent to the new design, but the
+    new design also made the memory model easier to reason about.
+  * Faster execution of initialization code. While it is not much faster for
+    normal interpretation (maybe 25% or so) due to the compilation overhead,
+    it should be a whole lot faster for loops as it doesn't have to call into
+    LLVM (via CGo) for every operation.
+
+As mentioned, this partial evaulator comes in three parts: a compiler, an
+interpreter, and a memory manager.
+
+## Compiler
+
+The main task of the compiler is that it extracts all necessary data from
+every instruction in a function so that when this instruction is interpreted,
+no additional CGo calls are necessary. This is not currently done for all
+instructions (`runtime.alloc` is a notable exception), but at least it does
+so for the vast majority of instructions.
+
+## Interpreter
+
+The interpreter runs an instruction just like it would if it were executed
+'for real'. The vast majority of instructions can be executed at compile
+time. As indicated above, some instructions need to be executed at runtime
+instead.
+
+## Memory
+
+Memory is represented as objects (the `object` type) that contains data that
+will eventually be stored in a global and values (the `value` interface) that
+can be worked with while running the interpreter. Values therefore are only
+used locally and are always passed by value (just like most LLVM constants)
+while objects represent the backing storage (like LLVM globals). Some values
+are pointer values, and point to an object.
+
+Importantly, this partial evaluator can roll back the execution of a
+function. This is implemented by creating a new memory view per function
+activation, which makes sure that any change to a global (such as a store
+instruction) is stored in the memory view. It creates a copy of the object
+and stores that in the memory view to be modified. Once the function has
+executed successfully, all these modified objects are then copied into the
+parent function, up to the root function invocation which (on successful
+execution) writes the values back into the LLVM module. This way, function
+invocations can be rolled back without leaving a trace.
+
+Pointer values point to memory objects, but not to a particular memory
+object. Every memory object is given an index, and pointers use that index to
+look up the current active object for the pointer to load from or to copy
+when storing to it.
+
+Rolling back a function should roll back everyting, including the few
+instructions emitted at runtime. This is done by treating instructions much
+like memory objects and removing the created instructions when necessary.
 
 ## Why is this necessary?
 
 A partial evaluator is hard to get right, so why go through all the trouble of
 writing one?
 
-The main reason is that the previous attempt wasn't complete and wasn't sound.
-It simply tried to evaluate Go SSA directly, which was good but more difficult
-than necessary. An IR based interpreter needs to understand fewer instructions
-as the LLVM IR simply has less (complex) instructions than Go SSA. Also, LLVM
-provides some useful tools like easily getting all uses of a function or global,
-which Go SSA does not provide.
-
-But why is it necessary at all? The answer is that globals with initializers are
-much easier to optimize by LLVM than initialization code. Also, there are a few
-other benefits:
+The answer is that globals with initializers are much easier to optimize by
+LLVM than initialization code. Also, there are a few other benefits:
 
   * Dead globals are trivial to optimize away.
   * Constant globals are easier to detect. Remember that Go does not have global
@@ -60,5 +91,29 @@ other benefits:
   * Constants are much more efficent on microcontrollers, as they can be
     allocated in flash instead of RAM.
 
+The Go SSA package does not create constant initializers for globals.
+Instead, it emits initialization functions, so if you write the following:
+
+```go
+var foo = []byte{1, 2, 3, 4}
+```
+
+It would generate something like this:
+
+```go
+var foo []byte
+
+func init() {
+    foo = make([]byte, 4)
+    foo[0] = 1
+    foo[1] = 2
+    foo[2] = 3
+    foo[3] = 4
+}
+```
+
+This is of course hugely wasteful, it's much better to create `foo` as a
+global array instead of initializing it at runtime.
+
 For more details, see [this section of the
 documentation](https://tinygo.org/compiler-internals/differences-from-go/).