Permalink
Browse files

New compilation engine for Mono

svn path=/trunk/mono/; revision=13205
  • Loading branch information...
1 parent 64655bb commit 5a710fa941e8d9c64a6228fe173cf50976eed05e @migueldeicaza migueldeicaza committed Apr 5, 2003
Showing with 18,825 additions and 0 deletions.
  1. +44 −0 docs/aot-compiler.txt
  2. +208 −0 docs/local-regalloc.txt
  3. +628 −0 docs/mini-doc.txt
  4. +113 −0 docs/opcode-decomp.txt
  5. +5 −0 mono/mini/.cvsignore
  6. +1 −0 mono/mini/README
  7. +13 −0 mono/mini/TODO
  8. +79 −0 mono/mini/TestDriver.cs
  9. +44 −0 mono/mini/aot-compiler.txt
  10. +555 −0 mono/mini/aot.c
  11. +131 −0 mono/mini/arrays.cs
  12. +426 −0 mono/mini/basic-float.cs
  13. +595 −0 mono/mini/basic-long.cs
  14. +622 −0 mono/mini/basic.cs
  15. +244 −0 mono/mini/bench.cs
  16. +216 −0 mono/mini/cfold.c
  17. +130 −0 mono/mini/cprop.c
  18. +611 −0 mono/mini/cpu-g4.md
  19. +619 −0 mono/mini/cpu-pentium.md
  20. +1,400 −0 mono/mini/debug-dwarf2.c
  21. +164 −0 mono/mini/debug-mini.c
  22. +111 −0 mono/mini/debug-private.h
  23. +231 −0 mono/mini/debug-stabs.c
  24. +1,138 −0 mono/mini/debug.c
  25. +163 −0 mono/mini/debug.h
  26. +28 −0 mono/mini/debugger-main.c
  27. +479 −0 mono/mini/dominators.c
  28. +733 −0 mono/mini/driver.c
  29. +86 −0 mono/mini/emullong.brg
  30. +953 −0 mono/mini/exceptions-ppc.c
  31. +972 −0 mono/mini/exceptions-x86.c
  32. +1,317 −0 mono/mini/exceptions.cs
  33. +236 −0 mono/mini/genmdesc.c
  34. +323 −0 mono/mini/graph.c
  35. +80 −0 mono/mini/helpers.c
  36. +94 −0 mono/mini/iltests.il
  37. +233 −0 mono/mini/inssel-float.brg
  38. +47 −0 mono/mini/inssel-long.brg
  39. +606 −0 mono/mini/inssel-long32.brg
  40. +497 −0 mono/mini/inssel-x86.brg
  41. +1,589 −0 mono/mini/inssel.brg
  42. +401 −0 mono/mini/jit-icalls.c
  43. +189 −0 mono/mini/linear-scan.c
  44. +202 −0 mono/mini/liveness.c
  45. +208 −0 mono/mini/local-regalloc.txt
  46. +8 −0 mono/mini/main.c
  47. +99 −0 mono/mini/makefile
  48. +12 −0 mono/mini/mini-arch.h
  49. +628 −0 mono/mini/mini-doc.txt
  50. +314 −0 mono/mini/mini-ops.h
Sorry, we could not display the entire diff because it was too big.
View
44 docs/aot-compiler.txt
@@ -0,0 +1,44 @@
+Mono Ahead Of Time Compiler
+===========================
+
+The new mono JIT has sophisticated optimization features. It uses SSA and has a
+pluggable architecture for further optimizations. This makes it possible and
+efficient to use the JIT also for AOT compilation.
+
+
+* file format: We use the native object format of the platform. That way it is
+ possible to reuse existing tools like objdump and the dynamic loader. All we
+ need is a working assembler, i.e. we write out a text file which is then
+ passed to gas (the gnu assembler) to generate the object file.
+
+* file names: we simply add ".so" to the generated file. For example:
+ basic.exe -> basic.exe.so
+ corlib.dll -> corlib.dll.so
+
+* staring the AOT compiler: mini --aot assembly_name
+
+The following things are saved in the object file:
+
+* version infos:
+
+* native code: this is labeled with method_XXXXXXXX: where XXXXXXXX is the
+ hexadecimal token number of the method.
+
+* additional informations needed by the runtime: For example we need to store
+ the code length and the exception tables. We also need a way to patch
+ constants only available at runtime (for example vtable and class
+ addresses). This is stored i a binary blob labeled method_info_XXXXXXXX:
+
+PROBLEMS:
+
+- all precompiled methods must be domain independent, or we add patch infos to
+ patch the target doamin.
+
+- the main problem is how to patch runtime related addresses, for example:
+
+ - current application domain
+ - string objects loaded with LDSTR
+ - address of MonoClass data
+ - static field offsets
+ - method addreses
+ - virtual function and interface slots
View
208 docs/local-regalloc.txt
@@ -0,0 +1,208 @@
+
+* Proposal for the local register allocator
+
+ The local register allocator deals with allocating registers
+ for temporaries inside a single basic block, while the global
+ register allocator is concerned with method-wide allocation of
+ variables.
+ The global register allocator uses callee-saved register for it's
+ purpouse so that there is no need to save and restore these registers
+ at call sites.
+
+ There are a number of issues the local allocator needs to deal with:
+ *) some instructions expect operands in specific registers (for example
+ the shl instruction on x86, or the call instruction with thiscall
+ convention, or the equivalent call instructions on other architectures,
+ such as the need to put output registers in %oX on sparc)
+ *) some instructions deliver results only in specific registers (for example
+ the div instruction on x86, or the call instructionson on almost all
+ the architectures).
+ *) it needs to know what registers may be clobbered by an instruction
+ (such as in a method call)
+ *) it should avoid excessive reloads or stores to improve performance
+
+ While which specific instructions have limitations is architecture-dependent,
+ the problem shold be solved in an arch-independent way to reduce code duplication.
+ The register allocator will be 'driven' by the arch-dependent code, but it's
+ implementation should be arch-independent.
+
+ To improve the current local register allocator, we need to
+ keep more state in it than the current setup that only keeps busy/free info.
+
+ Possible state information is:
+
+ free: the resgister is free to use and it doesn't contain useful info
+ freeable: the register contains data loaded from a local (there is
+ also info about _which_ local it contains) as a result from previous
+ instructions (like, there was a store from the register to the local)
+ moveable: it contains live data that is needed in a following instruction, but
+ the contents may be moved to a different register
+ busy: the register contains live data and it is placed there because
+ the following instructions need it exactly in that register
+ allocated: the register is used by the global allocator
+
+ The local register allocator will have the following interfaces:
+
+ int get_register ();
+ Searches for a register in the free state. If it doesn't find it,
+ searches for a freeable register. Sets the status to moveable.
+ Looking for a 'free' register before a freeable one should allow for
+ removing a few redundant loads (though I'm still unsure if such
+ things should be delegated entirely to the peephole pass).
+
+ int get_register_force (int reg);
+ Returns 'reg' if it is free or freeable. If it is moveable, it moves it
+ to another free or freeable register.
+ Sets the status of 'reg' to busy.
+
+ void set_register_freeable (int reg);
+ Sets the status of 'reg' to freeable.
+
+ void set_register_free (int reg);
+ Sets the status of 'reg' to free.
+
+ void will_clobber (int reg);
+ Spills the register to the stack. Sets the status to freeable.
+ After the clobbering has occurred, set the status to free.
+
+ void register_unspill (int reg);
+ Un-spills register reg and sets the status to moveable.
+
+ FIXME: how is the 'local' information represented? Maybe a MonoInst* pointer.
+
+ Note: the register allocator will insert instructions in the basic block
+ during it's operation.
+
+* Examples
+
+ Given the tree (on x86 the right argument to shl needs to be in ecx):
+
+ store (local1, shl (local1, call (some_arg)))
+
+ At the start of the basic block, the registers are set to the free state.
+ The sequence of instructions may be:
+ instruction register status -> [%eax %ecx %edx]
+ start free free free
+ eax = load local1 mov free free
+ /* call clobbers eax, ecx, edx */
+ spill eax free free free
+ call mov free free
+ /* now eax contains the right operand of the shl */
+ mov %eax -> %ecx free busy free
+ un-spill mov busy free
+ shl %cl, %eax mov free free
+
+ The resulting x86 code is:
+ mov $fffc(%ebp), %eax
+ mov %eax, $fff0(%ebp)
+ push some_arg
+ call func
+ mov %eax, %ecx
+ mov $fff0(%ebp), %eax
+ shl %cl, %eax
+
+ Note that since shl could operate directly on memory, we could have:
+
+ push some_arg
+ call func
+ mov %eax, %ecx
+ shl %cl, $fffc(%ebp)
+
+ The above example with loading the operand in a register is just to complicate
+ the example and show that the algorithm should be able to handle it.
+
+ Let's take another example with the this-call call convention (the first argument
+ is passed in %ecx).
+ In this case, will_clobber() will be called only on %eax and %edx, while %ecx
+ will be allocated with get_register_force ().
+ Note: when a register is allocated with get_register_force(), it should be set
+ to a different state as soon as possible.
+
+ store (local1, shl (local1, this-call (local1)))
+
+ instruction register status -> [%eax %ecx %edx]
+ start free free free
+ eax = load local1 mov free free
+ /* force load in %ecx */
+ ecx = load local1 mov busy free
+ spill eax free busy free
+ call mov free free
+ /* now eax contains the right operand of the shl */
+ mov %eax -> %ecx free busy free
+ un-spill mov busy free
+ shl %cl, %eax mov free free
+
+ What happens when a register that we need to allocate with get_register_force ()
+ contains an operand for the next instruction?
+
+ instruction register status -> [%eax %ecx %edx]
+ eax = load local0 mov free free
+ ecx = load local1 mov mov free
+ get_register_force (ecx) here.
+ We have two options:
+ mov %ecx, %edx
+ or:
+ spill %ecx
+ The first option is way better (and allows the peephole pass to
+ just load the value in %edx directly, instead of loading first to %ecx).
+ This doesn't work, though, if the instruction clobbers the %edx register
+ (like in a this-call). So, we first need to clobber the registers
+ (so the state of %ecx changes to freebale and there is no issue
+ with get_register_force ()).
+ What if an instruction both clobbers a register and requires it as
+ an operand? Lets' take the x86 idiv instruction as an example: it
+ requires the dividend in edx:eax and returns the result in eax,
+ with the modulus in edx.
+
+ store (local1, div (local1, local2))
+
+ instruction register status -> [%eax %ecx %edx]
+ eax = load local0 mov free free
+ will_clobber eax, edx free mov free
+ force mov %ecx, %eax busy free free
+ set %edx busy free busy
+ idiv mov free free
+
+ Note: edx is set to free after idiv, because the modulus is not needed
+ (if it was a rem, eax would have been freed).
+ If we load the divisor before will_clobber(), we'll have to spill
+ eax and reload it later. If we load it just after the idiv, there is no issue.
+ In any case, the algorithm should give the correct results and allow the operation.
+
+ Working recursively on the isntructions there shouldn't be huge issues
+ with this algorithm (though, of course, it's not optimal and it may
+ introduce excessive spills or register moves). The advantage over the current
+ local reg allocator is that:
+ 1) the number of spills/moves would be smaller anyway
+ 2) a separate peephole pass could be able to eliminate reg moves
+ 3) we'll be able to remove the 'forced' spills we currently do with
+ the return value of method calls
+
+* Issues
+
+ How to best integrate such a reg allocator with the burg stuff.
+
+ Think about a call os sparc with two arguments: they got into %o0 and %o1
+ and each of them sets the register as busy. But what if the values to put there
+ are themselves the result of a call? %o0 is no problem, but for all the
+ next argument n the above algorithm would spill all the 0...n-1 registers...
+
+* Papers
+
+ More complex solutions to the local register allocator problem:
+ http://dimacs.rutgers.edu/TechnicalReports/abstracts/1997/97-33.html
+
+ Combining register allocation and instruction scheduling:
+ http://citeseer.nj.nec.com/motwani95combining.html
+
+ More on LRA euristics:
+ http://citeseer.nj.nec.com/liberatore97hardness.html
+
+ Linear-time optimal code scheduling for delayedload architectures
+ http://www.cs.wisc.edu/~fischer/cs701.f01/inst.sched.ps.gz
+
+ Precise Register Allocation for Irregular Architectures
+ http://citeseer.nj.nec.com/kong98precise.html
+
+ Allocate registers first to subtrees that need more of them.
+ http://www.upb.de/cs/ag-kastens/compii/folien/comment401-409.2.pdf
View
628 docs/mini-doc.txt
@@ -0,0 +1,628 @@
+
+ A new JIT compiler for the Mono Project
+
+ Miguel de Icaza (miguel@{ximian.com,gnome.org}),
+
+
+* Abstract
+
+ Mini is a new compilation engine for the Mono runtime. The
+ new engine is designed to bring new code generation
+ optimizations, portability and precompilation.
+
+ In this document we describe the design decisions and the
+ architecture of the new compilation engine.
+
+* Introduction
+
+ First we discuss the overall architecture of the Mono runtime,
+ and how code generation fits into it; Then we discuss the
+ development and basic architecture of our first JIT compiler
+ for the ECMA CIL framework. The next section covers the
+ objectives for the work on the new JIT compiler, then we
+ discuss the new features available in the new JIT compiler,
+ and finally a technical description of the new code generation
+ engine.
+
+* Architecture of the Mono Runtime
+
+ The Mono runtime is an implementation of the ECMA Common
+ Language Infrastructure (CLI), whose aim is to be a common
+ platform for executing code in multiple languages.
+
+ Languages that target the CLI generate images that contain
+ code in high-level intermediate representation called the
+ "Common Intermediate Language". This intermediate language is
+ rich enough to allow for programs and pre-compiled libraries
+ to be reflected. The execution environment allows for an
+ object oriented execution environment with single inheritance
+ and multiple interface implementations.
+
+ This runtime provides a number of services for programs that
+ are targeted to it: Just-in-Time compilation of CIL code into
+ native code, garbage collection, thread management, I/O
+ routines, single, double and decimal floating point,
+ asynchronous method invocation, application domains, and a
+ framework for building arbitrary RPC systems (remoting) and
+ integration with system libraries through the Platform Invoke
+ functionality.
+
+ The focus of this document is on the services provided by the
+ Mono runtime to transform CIL bytecodes into code that is
+ native to the underlying architecture.
+
+ The code generation interface is a set of macros that allow a
+ C programmer to generate code on the fly, this is done
+ through a set of macros found in the mono/jit/arch/ directory.
+ These macros are used by the JIT compiler to generate native
+ code.
+
+ The platform invocation code is interesting, as it generates
+ CIL code on the fly to marshal parameters, and then this
+ code is in turned processed by the JIT engine.
+
+* Previous Experiences
+
+ Mono has built a JIT engine, which has been used to bootstrap
+ Mono since January, 2002. This JIT engine has reasonable
+ performance, and uses an tree pattern matching instruction
+ selector based on the BURS technology. This JIT compiler was
+ designed by Dietmar Maurer, Paolo Molaro and Miguel de Icaza.
+
+ The existing JIT compiler has three phases:
+
+ * Re-creation of the semantic tree from CIL
+ byte-codes.
+
+ * Instruction selection, with a cost-driven
+ engine.
+
+ * Code generation and register allocation.
+
+ It is also hooked into the rest of the runtime to provide
+ services like marshaling, just-in-time compilation and
+ invocation of "internal calls".
+
+ This engine constructed a collection of trees, which we
+ referred to as the "forest of trees", this forest is created by
+ "hydrating" the CIL instruction stream.
+
+ The first step was to identify the basic blocks on the method,
+ and computing the control flow graph (cfg) for it. Once this
+ information was computed, a stack analysis on each basic block
+ was performed to create a forest of trees for each one of
+ them.
+
+ So for example, the following statement:
+
+ int a, b;
+ ...
+ b = a + 1;
+
+ Which would be represented in CIL as:
+
+ ldloc.0
+ ldc.i4.1
+ add
+ stloc.1
+
+ After the stack analysis would create the following tree:
+
+ (STIND_I4 ADDR_L[EBX|2] (
+ ADD (LDIND_I4 ADDR_L[ESI|1])
+ CONST_I4[1]))
+
+ This tree contains information from the stack analysis: for
+ instance, notice that the operations explicitly encode the
+ data types they are operating on, there is no longer an
+ ambiguity on the types, because this information has been
+ inferred.
+
+ At this point the JIT would pass the constructed forest of
+ trees to the architecture-dependant JIT compiler.
+
+ The architecture dependent code then performed register
+ allocation (optionally using linear scan allocation for
+ variables, based on life analysis).
+
+ Once variables had been assigned, a tree pattern matching with
+ dynamic programming is used (the tree pattern matcher is
+ custom build for each architecture, using a code
+ generator: monoburg). The instruction selector used cost
+ functions to select the best instruction patterns.
+
+ The instruction selector is able to produce instructions that
+ take advantage of the x86 instruction indexing instructions
+ for example.
+
+ One problem though is that the code emitter and the register
+ allocator did not have any visibility outside the current
+ tree, which meant that some redundant instructions were
+ generated. A peephole optimizer with this architecture was
+ hard to write, given the tree-based representation that is
+ used.
+
+ This JIT was functional, but it did not provide a good
+ architecture to base future optimizations on. Also the
+ line between architecture neutral and architecture
+ specific code and optimizations was hard to draw.
+
+ The JIT engine supported two code generation modes to support
+ the two optimization modes for applications that host multiple
+ application domains: generate code that will be shared across
+ application domains, or generate code that will not be shared
+ across application domains.
+
+* Objectives of the new JIT engine.
+
+ We wanted to support a number of features that were missing:
+
+ * Ahead-of-time compilation.
+
+ The idea is to allow developers to pre-compile their code
+ to native code to reduce startup time, and the working
+ set that is used at runtime in the just-in-time compiler.
+
+ Although in Mono this has not been a visible problem, we
+ wanted to pro-actively address this problem.
+
+ When an assembly (a Mono/.NET executable) is installed in
+ the system, it would then be possible to pre-compile the
+ code, and have the JIT compiler tune the generated code
+ to the particular CPU on which the software is
+ installed.
+
+ This is done in the Microsoft.NET world with a tool
+ called ngen.exe
+
+ * Have a good platform for doing code optimizations.
+
+ The design called for a good architecture that would
+ enable various levels of optimizations: some
+ optimizations are better performed on high-level
+ intermediate representations, some on medium-level and
+ some at low-level representations.
+
+ Also it should be possible to conditionally turn these on
+ or off. Some optimizations are too expensive to be used
+ in just-in-time compilation scenarios, but these
+ expensive optimizations can be turned on for
+ ahead-of-time compilations or when using profile-guided
+ optimizations on a subset of the executed methods.
+
+ * Reduce the effort required to port the Mono code
+ generator to new architectures.
+
+ For Mono to gain wide adoption in the Unix world, it is
+ necessary that the JIT engine works in most of today's
+ commercial hardware platforms.
+
+* Features of the new JIT engine.
+
+ The new JIT engine was architected by Dietmar Maurer and Paolo
+ Molaro, based on the new objectives.
+
+ Mono provides a number of services to applications running
+ with the new JIT compiler:
+
+ * Just-in-Time compilation of CLI code into native code.
+
+ * Ahead-of-Time compilation of CLI code, to reduce
+ startup time of applications.
+
+ A number of software development features are also available:
+
+ * Execution time profiling (--profile)
+
+ Generates a report of the times consumed by routines,
+ as well as the invocation times, as well as the
+ callers.
+
+ * Memory usage profiling (--profile)
+
+ Generates a report of the memory usage by a program
+ that is ran under the Mono JIT.
+
+ * Code coverage (--coverage)
+
+ * Execution tracing.
+
+ People who are interested in developing and improving the Mini
+ JIT compiler will also find a few useful routines:
+
+ * Compilation times
+
+ This is used to time the execution time for the JIT
+ when compiling a routine.
+
+ * Control Flow Graph and Dominator Tree drawing.
+
+ These are visual aids for the JIT developer: they
+ render representations of the Control Flow graph, and
+ for the more advanced optimizations, they draw the
+ dominator tree graph.
+
+ This requires Dot (from the graphwiz package) and Ghostview.
+
+ * Code generator regression tests.
+
+ The engine contains support for running regression
+ tests on the virtual machine, which is very helpful to
+ developers interested in improving the engine.
+
+ * Optimization benchmark framework.
+
+ The JIT engine will generate graphs that compare
+ various benchmarks embedded in an assembly, and run the
+ various tests with different optimization flags.
+
+ This requires Perl, GD::Graph.
+
+* Flexibility
+
+ This is probably the most important component of the new code
+ generation engine. The internals are relatively easy to
+ replace and update, even large passes can be replaced and
+ implemented differently.
+
+* New code generator
+
+ Compiling a method begins with the `mini_method_to_ir' routine
+ that converts the CIL representation into a medium
+ intermediate representation.
+
+ The mini_method_to_ir routine performs a number of operations:
+
+ * Flow analysis and control flow graph computation.
+
+ Unlike the previous version, stack analysis and control
+ flow graphs are computed in a single pass in the
+ mini_method_to_ir function, this is done for performance
+ reasons: although the complexity increases, the benefit
+ for a JIT compiler is that there is more time available
+ for performing other optimizations.
+
+ * Basic block computation.
+
+ mini_method_to_ir populates the MonoCompile structure
+ with an array of basic blocks each of which contains
+ forest of trees made up of MonoInst structures.
+
+ * Inlining
+
+ Inlining is no longer restricted to methods containing
+ one single basic block, instead it is possible to inline
+ arbitrary complex methods.
+
+ The heuristics to choose what to inline are likely going
+ to be tuned in the future.
+
+ * Method to opcode conversion.
+
+ Some method call invocations like `call Math.Sin' are
+ transformed into an opcode: this transforms the call
+ into a semantically rich node, which is later inline
+ into an FPU instruction.
+
+ Various Array methods invocations are turned into
+ opcodes as well (The Get, Set and Address methods)
+
+ * Tail recursion elimination
+
+ Basic blocks ****
+
+ The MonoInst structure holds the actual decoded instruction,
+ with the semantic information from the stack analysis.
+ MonoInst is interesting because initially it is part of a tree
+ structure, here is a sample of the same tree with the new JIT
+ engine:
+
+ (stind.i4 regoffset[0xffffffd4(%ebp)]
+ (add (ldind.i4 regoffset[0xffffffd8(%ebp)])
+ iconst[1]))
+
+ This is a medium-level intermediate representation (MIR).
+
+ Some complex opcodes are decomposed at this stage into a
+ collection of simpler opcodes. Not every complex opcode is
+ decomposed at this stage, as we need to preserve the semantic
+ information during various optimization phases.
+
+ For example a NEWARR opcode carries the length and the type of
+ the array that could be used later to avoid type checking or
+ array bounds check.
+
+ There are a number of operations supported on this
+ representation:
+
+ * Branch optimizations.
+
+ * Variable liveness.
+
+ * Loop optimizations: the dominator trees are
+ computed, loops are detected, and their nesting
+ level computed.
+
+ * Conversion of the method into static single assignment
+ form (SSA form).
+
+ * Dead code elimination.
+
+ * Constant propagation.
+
+ * Copy propagation.
+
+ * Constant folding.
+
+ Once the above optimizations are optionally performed, a
+ decomposition phase is used to turn some complex opcodes into
+ internal method calls. In the initial version of the JIT
+ engine, various operations on longs are emulated instead of
+ being inlined. Also the newarr invocation is turned into a
+ call to the runtime.
+
+ At this point, after computing variable liveness, it is
+ possible to use the linear scan algorithm for allocating
+ variables to registers. The linear scan pass uses the
+ information that was previously gathered by the loop nesting
+ and loop structure computation to favor variables in inner
+ loops.
+
+ Stack space is then reserved for the local variables and any
+ temporary variables generated during the various
+ optimizations.
+
+** Instruction selection
+
+ At this point, the BURS instruction selector is invoked to
+ transform the tree-based representation into a list of
+ instructions. This is done using a tree pattern matcher that
+ is generated for the architecture using the `monoburg' tool.
+
+ Monoburg takes as input a file that describes tree patterns,
+ which are matched against the trees that were produced by the
+ engine in the previous stages.
+
+ The pattern matching might have more than one match for a
+ particular tree. In this case, the match selected is the one
+ whose cost is the smallest. A cost can be attached to each
+ rule, and if no cost is provided, the implicit cost is one.
+ Smaller costs are selected over higher costs.
+
+ The cost function can be used to select particular blocks of
+ code for a given architecture, or by using a prohibitive high
+ number to avoid having the rule match.
+
+ The various rules that our JIT engine uses transform a tree of
+ MonoInsts into a list of monoinsts:
+
+ +-----------------------------------------------------------+
+ | Tree List |
+ | of ===> Instruction selection ===> of |
+ | MonoInst MonoInst. |
+ +-----------------------------------------------------------+
+
+ During this process various "types" of MonoInst kinds
+ disappear and turned into lower-level representations. The
+ JIT compiler just happens to reuse the same structure (this is
+ done to reduce memory usage and improve memory locality).
+
+ The instruction selection rules are split in a number of
+ files, each one with a particular purpose:
+
+ inssel.brg
+ Contains the generic instruction selection
+ patterns.
+
+ inssel-x86.brg
+ Contains x86 specific rules.
+
+ inssel-ppc.brg
+ Contains PowerPC specific rules.
+
+ inssel-long32.brg
+ burg file for 64bit instructions on 32bit architectures.
+
+ inssel-long.brg
+ burg file for 64bit architectures.
+
+ inssel-float.brg
+ burg file for floating point instructions
+
+ For a given build, a set of those files would be included.
+ For example, for the build of Mono on the x86, the following
+ set is used:
+
+ inssel.brg inssel-x86.brg inssel-long32.brg inssel-float.brg
+
+** Native method generation
+
+ The native method generation has a number of steps:
+
+ * Architecture specific register allocation.
+
+ The information about loop nesting that was
+ previously gathered is used here to hint the
+ register allocator.
+
+ * Generating the method prolog/epilog.
+
+ * Optionally generate code to introduce tracing facilities.
+
+ * Hooking into the debugger.
+
+ * Performing any pending fixups.
+
+ * Code generation.
+
+*** Code Generation
+
+ The actual code generation is contained in the architecture
+ specific portion of the compiler. The input to the code
+ generator is each one of the basic blocks with its list of
+ instructions that were produced in the instruction selection
+ phase.
+
+ During the instruction selection phase, virtual registers are
+ assigned. Just before the peephole optimization is performed,
+ physical registers are assigned.
+
+ A simple peephole and algebraic optimizer is ran at this
+ stage.
+
+ The peephole optimizer removes some redundant operations at
+ this point. This is possible because the code generation at
+ this point has visibility into the basic block that spans the
+ original trees.
+
+ The algebraic optimizer performs some simple algebraic
+ optimizations that replace expensive operations with cheaper
+ operations if possible.
+
+ The rest of the code generation is fairly simple: a switch
+ statement is used to generate code for each of the MonoInsts
+
+ We always try to allocate code in sequence, instead of just using
+ malloc. This way we increase spatial locality which gives a massive
+ speedup on most architectures.
+
+*** Ahead of Time compilation
+
+ Ahead-of-Time compilation is a new feature of our new
+ compilation engine. The compilation engine is shared by the
+ Just-in-Time (JIT) compiler and the Ahead-of-Time compiler
+ (AOT).
+
+ The difference is on the set of optimizations that are turned
+ on for each mode: Just-in-Time compilation should be as fast
+ as possible, while Ahead-of-Time compilation can take as long
+ as required, because this is not done at a time criticial
+ time.
+
+ With AOT compilation, we can afford to turn all of the
+ computationally expensive optimizations on.
+
+ After the code generation phase is done, the code and any
+ required fixup information is saved into a file that is
+ readable by "as" (the native assembler available on all
+ systems). This assembly file is then passed to the native
+ assembler, which generates a loadable module.
+
+ At execution time, when an assembly is loaded from the disk,
+ the runtime engine will probe for the existance of a
+ pre-compiled image. If the pre-compiled image exists, then it
+ is loaded, and the method invocations are resolved to the code
+ contained in the loaded module.
+
+ The code generated under the AOT scenario is slightly
+ different than the JIT scenario. It generates code that is
+ application-domain relative and that can be shared among
+ multiple thread.
+
+ This is the same code generation that is used when the runtime
+ is instructed to maximize code sharing on a multi-application
+ domain scenario.
+
+* SSA-based optimizations
+
+ SSA form simplifies many optimization because each variable has exactly
+ one definition site. All uses of a variable are "dominated" by its
+ definition, which enables us to implement algorithm like:
+
+ * conditional constant propagation
+
+ * array bound check removal
+
+ * dead code elimination
+
+ And we can implement those algorithm in a efficient way using SSA.
+
+
+* Register allocation.
+
+ Global register allocation is performed on the medium
+ intermediate representation just before instruction selection
+ is performed on the method. Local register allocation is
+ later performed at the basic-block level on the
+
+ Global register allocation uses the following input:
+
+ 1) set of register-sized variables that can be allocated to a
+ register (this is an architecture specific setting, for x86
+ these registers are the callee saved register ESI, EDI and
+ EBX).
+
+ 2) liveness information for the variables
+
+ 3) (optionally) loop info to favour variables that are used in
+ inner loops.
+
+ During instruction selection phase, symbolic registers are
+ assigned to temporary values in expressions.
+
+ Local register allocation assigns hard registers to the
+ symbolic registers, and it is performed just before the code
+ is actually emitted and is performed at the basic block level.
+ A CPU description file describes the input registers, output
+ registers, fixed registers and clobbered registers by each
+ operation.
+
+
+----------
+* Bootstrap
+
+ The Mini bootstrap parses the arguments passed on the command
+ line, and initializes the JIT runtime. Each time the
+ mini_init() routine is invoked, a new Application Domain will
+ be returned.
+
+* Signal handlers
+
+ mono_runtime_install_handlers
+
+* BURG Code Generator Generator
+
+ monoburg was written by Dietmar Maurer. It is based on the
+ papers from Christopher W. Fraser, Robert R. Henry and Todd
+ A. Proebsting: "BURG - Fast Optimal Instruction Selection and
+ Tree Parsing" and "Engineering a Simple, Efficient Code
+ Generator Generator".
+
+ The original BURG implementation is unable to work on DAGs, instead only
+ trees are allowed. Our monoburg implementations is able to generate tree
+ matcher which works on DAGs, and we use this feature in the new
+ JIT. This simplifies the code because we can directly pass DAGs and
+ don't need to convert them to trees.
+
+* Future
+
+ Profile-based optimization is something that we are very
+ interested in supporting. There are two possible usage
+ scenarios:
+
+ * Based on the profile information gathered during
+ the execution of a program, hot methods can be compiled
+ with the highest level of optimizations, while bootstrap
+ code and cold methods can be compiled with the least set
+ of optimizations and placed in a discardable list.
+
+ * Code reordering: this profile-based optimization would
+ only make sense for pre-compiled code. The profile
+ information is used to re-order the assembly code on disk
+ so that the code is placed on the disk in a way that
+ increments locality.
+
+ This is the same principle under which SGI's cord program
+ works.
+
+ The nature of the CIL allows the above optimizations to be
+ easy to implement and deploy. Since we live and define our
+ universe for these things, there are no interactions with
+ system tools required, nor upgrades on the underlying
+ infrastructure required.
+
+ Instruction scheduling is important for certain kinds of
+ processors, and some of the framework exists today in our
+ register allocator and the instruction selector to cope with
+ this, but has not been finished. The instruction selection
+ would happen at the same time as local register allocation.
View
113 docs/opcode-decomp.txt
@@ -0,0 +1,113 @@
+
+* How to handle complex IL opcodes in an arch-independent way
+
+ Many IL opcodes are very simple: add, ldind etc.
+ Such opcodes can be implemented with a single cpu instruction
+ in most architectures (on some, a group of IL instructions
+ can be converted to a single cpu op).
+ There are many IL opcodes, though, that are more complex, but
+ can be expressed as a series of trees or a single tree of
+ simple operations. Such simple operations are architecture-independent.
+ It makes sense to decompose such complex IL instructions in their
+ simpler equivalent so that we gain in several ways:
+ *) porting effort is easier, because only the simple instructions
+ need to be implemented in arch-specific code
+ *) we could apply BURG rules to the trees and do pattern matching
+ on them to optimize the expressions according to the host cpu
+
+ The issue is: where do we do such conversion from coarse opcodes to
+ simple expressions?
+
+* Doing the conversion in method_to_ir ()
+
+ Some of these conversions can certainly be done in method_to_ir (),
+ but it's not always easy to decide which are better done there and
+ which in a different pass.
+ For example, let's take ldlen: in the mono implementation, ldlen
+ can be simply implemented with a load from a fixed position in the
+ array object:
+
+ len = [reg + maxlen_offset]
+
+ However, ldlen carries also semantics information: the result is the
+ length of the array, and since in the CLR arrays are of fixed size,
+ this information can be useful to later do bounds check removal.
+ If we convert this opcode in method_to_ir () we lost some useful
+ information for further optimizations.
+
+ In some other ways, decomposing an opcode in method_to_ir() may
+ allow for better optimizations later on (need to come up with an
+ example here ...).
+
+* Doing the conversion in inssel.brg
+
+ Some conversion may be done inside the burg rules: this has the
+ disadvantage that the instruction selector is not run again on
+ the resulting expression tree and we could miss some optimization
+ (this is what effectively happens with the coarse opcodes in the old
+ jit). This may also interfere with an efficient local register allocator.
+ It may be possible to add an extension in monoburg that allows a rule
+ such as:
+
+ recheck: LDLEN (reg) {
+ create an expression tree representing LDLEN
+ and return it
+ }
+
+ When the monoburg label process gets back a recheck, it will run
+ the labeling again on the resulting expression tree.
+ If this is possible at all (and in an efficient way) is a
+ question for dietmar:-)
+ It should be noted, though, that this may not always work, since
+ some complex IL opcodes may require a series of expression trees
+ and handling such cases in monoburg could become quite hairy.
+ For example, think of opcode that need to do multiple actions on the
+ same object: this basically means a DUP...
+ On the other end, if a complex opcode needs a DUP, monoburg doesn't
+ actually need to create trees if it emits the instructions in
+ the correct sequence and maintains the right values in the registers
+ (usually the values that need a DUP are not changed...). How
+ this integrates with the current register allocator is not clear, since
+ that assigns registers based on the rule, but the instructions emitted
+ by the rules may be different (this already happens with the current JIT
+ where a MULT is replaced with lea etc...).
+
+* Doing it in a separate pass.
+
+ Doing the conversion in a separate pass over the instructions
+ is another alternative. This can be done right after method_to_ir ()
+ or after the SSA pass (since the IR after the SSA pass should look
+ almost like the IR we get back from method_to_ir ()).
+
+ This has the following advantages:
+ *) monoburg will handle only the simple opcodes (makes porting easier)
+ *) the instruction selection will be run on all the additional trees
+ *) it's easier to support coarse opcodes that produce multiple expression
+ trees (and apply the monoburg selector on all of them)
+ *) the SSA optimizer will see the original opcodes and will be able to use
+ the semantic info associated with them
+
+ The disadvantage is that this is a separate pass on the code and
+ it takes time (how much has not been measured yet, though).
+
+ With this approach, we may also be able to have C implementations
+ of some of the opcodes: this pass would insert a function call to
+ the C implementation (for example in the cases when first porting
+ to a new arch and implemenating some stuff may be too hard in asm).
+
+* Extended basic blocks
+
+ IL code needs a lot of checks, bounds checks, overflow checks,
+ type checks and so on. This potentially increases by a lot
+ the number of basic blocks in a control flow graph. However,
+ all such blocks end up with a throw opcode that gives control to the
+ exception handling mechanism.
+ After method_to_ir () a MonoBasicBlock can be considered a sort
+ of extended basic block where the additional exits don't point
+ to basic blocks in the same procedure (at least when the method
+ doesn't have exception tables).
+ We need to make sure the passes following method_to_ir () can cope
+ with such kinds of extended basic blocks (especially the passes
+ that we need to apply to all the methods: as a start, we could
+ skip SSA optimizations for methods with exception clauses...)
+
View
5 mono/mini/.cvsignore
@@ -0,0 +1,5 @@
+mini
+mono-debugger-mini-wrapper
+.libs
+*.o
+test.exe
View
1 mono/mini/README
@@ -0,0 +1 @@
+Mini is the new JIT compiler for Mono.
View
13 mono/mini/TODO
@@ -0,0 +1,13 @@
+* use a pool of MBState structures to speedup monoburg instead of using a
+ mempool.
+* the decode tables in the burg-generated could use short instead of int
+ (this should save about 1 KB)
+* track the use of ESP, so that we can avoid the x86_lea in the epilog
+
+
+Other Ideas:
+
+* the ORP people avoids optimizations inside catch handlers - just to save
+ memory (for example allocation of strings - instead they allocate strings when
+ the code is executed (like the --shared option)). But there are only a few
+ functions using catch handlers, so I consider this a minor issue.
View
79 mono/mini/TestDriver.cs
@@ -0,0 +1,79 @@
+using System;
+using System.Reflection;
+
+
+public class TestDriver {
+
+ static public int RunTests (Type type, string[] args) {
+ int failed = 0, ran = 0;
+ int result, expected, elen;
+ int i, j;
+ string name;
+ MethodInfo[] methods;
+ bool do_timings = false;
+ int tms = 0;
+ DateTime start, end = DateTime.Now;
+
+ if (args != null && args.Length > 0) {
+ for (j = 0; j < args.Length; j++) {
+ if (args [j] == "--time") {
+ do_timings = true;
+ string[] new_args = new string [args.Length - 1];
+ for (i = 0; i < j; ++i)
+ new_args [i] = args [i];
+ j++;
+ for (; j < args.Length; ++i, ++j)
+ new_args [i] = args [j];
+ args = new_args;
+ break;
+ }
+ }
+ }
+ methods = type.GetMethods (BindingFlags.Public|BindingFlags.NonPublic|BindingFlags.Static);
+ for (i = 0; i < methods.Length; ++i) {
+ name = methods [i].Name;
+ if (!name.StartsWith ("test_"))
+ continue;
+ if (args != null && args.Length > 0) {
+ bool found = false;
+ for (j = 0; j < args.Length; j++) {
+ if (name.EndsWith (args [j])) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ continue;
+ }
+ for (j = 5; j < name.Length; ++j)
+ if (!Char.IsDigit (name [j]))
+ break;
+ expected = Int32.Parse (name.Substring (5, j - 5));
+ start = DateTime.Now;
+ result = (int)methods [i].Invoke (null, null);
+ if (do_timings) {
+ end = DateTime.Now;
+ long tdiff = end.Ticks - start.Ticks;
+ int mdiff = (int)tdiff/10000;
+ tms += mdiff;
+ Console.WriteLine ("{0} took {1} ms", name, mdiff);
+ }
+ ran++;
+ if (result != expected) {
+ failed++;
+ Console.WriteLine ("{0} failed: got {1}, expected {2}", name, result, expected);
+ }
+ }
+
+ if (do_timings) {
+ Console.WriteLine ("Total ms: {0}", tms);
+ }
+ Console.WriteLine ("Regression tests: {0} ran, {1} failed in {2}", ran, failed, type);
+ //Console.WriteLine ("Regression tests: {0} ran, {1} failed in [{2}]{3}", ran, failed, type.Assembly.GetName().Name, type);
+ return failed;
+ }
+ static public int RunTests (Type type) {
+ return RunTests (type, null);
+ }
+}
+
View
44 mono/mini/aot-compiler.txt
@@ -0,0 +1,44 @@
+Mono Ahead Of Time Compiler
+===========================
+
+The new mono JIT has sophisticated optimization features. It uses SSA and has a
+pluggable architecture for further optimizations. This makes it possible and
+efficient to use the JIT also for AOT compilation.
+
+
+* file format: We use the native object format of the platform. That way it is
+ possible to reuse existing tools like objdump and the dynamic loader. All we
+ need is a working assembler, i.e. we write out a text file which is then
+ passed to gas (the gnu assembler) to generate the object file.
+
+* file names: we simply add ".so" to the generated file. For example:
+ basic.exe -> basic.exe.so
+ corlib.dll -> corlib.dll.so
+
+* staring the AOT compiler: mini --aot assembly_name
+
+The following things are saved in the object file:
+
+* version infos:
+
+* native code: this is labeled with method_XXXXXXXX: where XXXXXXXX is the
+ hexadecimal token number of the method.
+
+* additional informations needed by the runtime: For example we need to store
+ the code length and the exception tables. We also need a way to patch
+ constants only available at runtime (for example vtable and class
+ addresses). This is stored i a binary blob labeled method_info_XXXXXXXX:
+
+PROBLEMS:
+
+- all precompiled methods must be domain independent, or we add patch infos to
+ patch the target doamin.
+
+- the main problem is how to patch runtime related addresses, for example:
+
+ - current application domain
+ - string objects loaded with LDSTR
+ - address of MonoClass data
+ - static field offsets
+ - method addreses
+ - virtual function and interface slots
View
555 mono/mini/aot.c
@@ -0,0 +1,555 @@
+/*
+ * aot.c: mono Ahead of Time compiler
+ *
+ * Author:
+ * Dietmar Maurer (dietmar@ximian.com)
+ *
+ * (C) 2002 Ximian, Inc.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include <limits.h> /* for PAGESIZE */
+#ifndef PAGESIZE
+#define PAGESIZE 4096
+#endif
+
+#include <mono/metadata/tabledefs.h>
+#include <mono/metadata/class.h>
+#include <mono/metadata/object.h>
+#include <mono/metadata/tokentype.h>
+#include <mono/metadata/appdomain.h>
+#include <mono/metadata/debug-helpers.h>
+
+#include "mini.h"
+
+#define ENCODE_TYPE_POS(t,l) (((t) << 24) | (l))
+#define DECODE_TYPE(v) ((v) >> 24)
+#define DECODE_POS(v) ((v) & 0xffffff)
+
+gpointer
+mono_aot_get_method (MonoMethod *method)
+{
+ MonoClass *klass = method->klass;
+ MonoAssembly *ass = klass->image->assembly;
+ MonoJumpInfo *patch_info = NULL;
+ GModule *module = ass->aot_module;
+ char *method_label, *info_label;
+ guint8 *code = NULL;
+ gpointer *info;
+ guint code_len, used_int_regs;
+
+ if (!module)
+ return NULL;
+
+ if (!method->token)
+ return NULL;
+
+ g_assert (klass->inited);
+
+ method_label = g_strdup_printf ("method_%08X", method->token);
+
+ if (!g_module_symbol (module, method_label, (gpointer *)&code)) {
+ g_free (method_label);
+ return NULL;
+ }
+
+ info_label = g_strdup_printf ("%s_patch_info", method_label);
+ if (!g_module_symbol (module, info_label, (gpointer *)&info)) {
+ g_free (method_label);
+ g_free (info_label);
+ return NULL;
+ }
+
+ //printf ("FOUND AOT compiled code for %s %p %p\n", mono_method_full_name (method, TRUE), code, info);
+
+ code_len = GPOINTER_TO_UINT (*((gpointer **)info));
+ info++;
+ used_int_regs = GPOINTER_TO_UINT (*((gpointer **)info));
+ info++;
+
+ if (info) {
+ MonoMemPool *mp = mono_mempool_new ();
+ MonoImage *image;
+ guint8 *page_start;
+ gpointer *table;
+ int pages;
+ int i, err;
+
+ while (*info) {
+ MonoJumpInfo *ji = mono_mempool_alloc0 (mp, sizeof (MonoJumpInfo));
+ gpointer *data = *((gpointer **)info);
+ info++;
+ ji->type = DECODE_TYPE (GPOINTER_TO_UINT (*info));
+ ji->ip.i = DECODE_POS (GPOINTER_TO_UINT (*info));
+
+ switch (ji->type) {
+ case MONO_PATCH_INFO_CLASS:
+ image = mono_image_loaded_by_guid ((char *)data [1]);
+ g_assert (image);
+ ji->data.klass = mono_class_get (image, (guint32)data [0]);
+ g_assert (ji->data.klass);
+ mono_class_init (ji->data.klass);
+ break;
+ case MONO_PATCH_INFO_IMAGE:
+ ji->data.image = mono_image_loaded_by_guid ((char *)data);
+ g_assert (ji->data.image);
+ break;
+ case MONO_PATCH_INFO_METHOD:
+ case MONO_PATCH_INFO_METHODCONST:
+ image = mono_image_loaded_by_guid ((char *)data [1]);
+ g_assert (image);
+ ji->data.method = mono_get_method (image, (guint32)data [0], NULL);
+ g_assert (ji->data.method);
+ mono_class_init (ji->data.method->klass);
+ break;
+ case MONO_PATCH_INFO_FIELD:
+ image = mono_image_loaded_by_guid ((char *)data [1]);
+ g_assert (image);
+ ji->data.field = mono_field_from_token (image, (guint32)data [0], NULL);
+ mono_class_init (ji->data.field->parent);
+ g_assert (ji->data.field);
+ break;
+ case MONO_PATCH_INFO_INTERNAL_METHOD:
+ ji->data.name = (char *)data;
+ g_assert (ji->data.name);
+ break;
+ case MONO_PATCH_INFO_SWITCH:
+ ji->table_size = (int)data [0];
+ table = g_new (gpointer, ji->table_size);
+ ji->data.target = table;
+ for (i = 0; i < ji->table_size; i++) {
+ table [i] = data [i + 1];
+ }
+ break;
+ case MONO_PATCH_INFO_R4:
+ case MONO_PATCH_INFO_R8:
+ ji->data.target = data;
+ break;
+ default:
+ g_warning ("unhandled type %d", ji->type);
+ g_assert_not_reached ();
+ }
+
+ info++;
+ ji->next = patch_info;
+ patch_info = ji;
+ }
+
+ /* disable write protection */
+ page_start = (char *) (((int) (code)) & ~ (PAGESIZE - 1));
+ pages = (code + code_len - page_start + PAGESIZE - 1) / PAGESIZE;
+ err = mprotect (page_start, pages * PAGESIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
+ g_assert (err == 0);
+
+ mono_arch_patch_code (method, mono_root_domain, code, patch_info);
+ mono_mempool_destroy (mp);
+ }
+
+ g_free (info_label);
+ g_free (method_label);
+
+ {
+ MonoJitInfo *info;
+ info = mono_mempool_alloc0 (mono_root_domain->mp, sizeof (MonoJitInfo));
+ info->code_size = code_len;
+ info->used_regs = used_int_regs;
+ info->method = method;
+ info->code_start = code;
+ mono_jit_info_table_add (mono_root_domain, info);
+ }
+ mono_jit_stats.methods_aot++;
+
+ return code;
+}
+
+#if 0
+static void
+write_data_symbol (FILE *fp, const char *name, guint8 *buf, int size, int align)
+{
+ int i;
+
+ fprintf (fp, ".globl %s\n", name);
+ fprintf (fp, ".data\n\t.align %d\n", align);
+ fprintf (fp, "\t.type %s,@object\n", name);
+ fprintf (fp, "\t.size %s,%d\n", name, size);
+ fprintf (fp, "%s:\n", name);
+ for (i = 0; i < size; i++) {
+ fprintf (fp, ".byte %d\n", buf [i]);
+ }
+
+}
+#endif
+
+static void
+write_string_symbol (FILE *fp, const char *name, char *value)
+{
+ fprintf (fp, ".globl %s\n", name);
+ fprintf (fp, ".data\n");
+ fprintf (fp, "%s:\n", name);
+ fprintf (fp, "\t.string \"%s\"\n", value);
+}
+
+static guint32
+mono_get_field_token (MonoClassField *field)
+{
+ MonoClass *klass = field->parent;
+ int i;
+
+ for (i = 0; i < klass->field.count; ++i) {
+ if (field == &klass->fields [i])
+ return MONO_TOKEN_FIELD_DEF | (klass->field.first + 1 + i);
+ }
+
+ g_assert_not_reached ();
+ return 0;
+}
+
+static char *
+cond_emit_image_label (FILE *tmpfp, GHashTable *image_hash, MonoImage *image)
+{
+ char *label;
+
+ if ((label = g_hash_table_lookup (image_hash, image)))
+ return label;
+
+ label = g_strdup_printf ("image_patch_info_%p", image);
+ fprintf (tmpfp, "%s:\n", label);
+ fprintf (tmpfp, "\t.string \"%s\"\n", image->guid);
+
+ g_hash_table_insert (image_hash, image, label);
+
+ return label;
+}
+
+static char *
+cond_emit_icall_label (FILE *tmpfp, GHashTable *hash, const char *icall_name)
+{
+ char *label;
+
+ if ((label = g_hash_table_lookup (hash, icall_name)))
+ return label;
+
+ label = g_strdup_printf ("icall_patch_info_%s", icall_name);
+ fprintf (tmpfp, "%s:\n", label);
+ fprintf (tmpfp, "\t.string \"%s\"\n", icall_name);
+
+ g_hash_table_insert (hash, (gpointer)icall_name, label);
+
+ return label;
+}
+
+static char *
+cond_emit_method_label (FILE *tmpfp, GHashTable *hash, MonoJumpInfo *patch_info)
+{
+ MonoMethod *method = patch_info->data.method;
+ char *l1, *l2;
+
+ if ((l1 = g_hash_table_lookup (hash, method)))
+ return l1;
+
+ l2 = cond_emit_image_label (tmpfp, hash, method->klass->image);
+ fprintf (tmpfp, "\t.align %d\n", sizeof (gpointer));
+ l1 = g_strdup_printf ("method_patch_info_%08x_%p", method->token, method);
+ fprintf (tmpfp, "%s:\n", l1);
+ fprintf (tmpfp, "\t.long 0x%08x\n", method->token);
+ g_assert (method->token);
+ fprintf (tmpfp, "\t.long %s\n", l2);
+
+ g_hash_table_insert (hash, method, l1);
+
+ return l1;
+}
+
+static char *
+cond_emit_klass_label (FILE *tmpfp, GHashTable *hash, MonoJumpInfo *patch_info)
+{
+ MonoClass *klass = patch_info->data.klass;
+ char *l1, *l2;
+
+ if ((l1 = g_hash_table_lookup (hash, klass)))
+ return l1;
+
+ l2 = cond_emit_image_label (tmpfp, hash, klass->image);
+ fprintf (tmpfp, "\t.align %d\n", sizeof (gpointer));
+ l1 = g_strdup_printf ("klass_patch_info_%08x_%p", klass->type_token, klass);
+ fprintf (tmpfp, "%s:\n", l1);
+ fprintf (tmpfp, "\t.long 0x%08x\n", klass->type_token);
+ g_assert (klass->type_token);
+ fprintf (tmpfp, "\t.long %s\n", l2);
+
+ g_hash_table_insert (hash, klass, l1);
+
+ return l1;
+}
+
+static char *
+cond_emit_field_label (FILE *tmpfp, GHashTable *hash, MonoJumpInfo *patch_info)
+{
+ MonoClassField *field = patch_info->data.field;
+ char *l1, *l2;
+ guint token;
+
+ if ((l1 = g_hash_table_lookup (hash, field)))
+ return l1;
+
+ l2 = cond_emit_image_label (tmpfp, hash, field->parent->image);
+ fprintf (tmpfp, "\t.align %d\n", sizeof (gpointer));
+ token = mono_get_field_token (field);
+ l1 = g_strdup_printf ("klass_patch_info_%08x_%p", token, field);
+ fprintf (tmpfp, "%s:\n", l1);
+ fprintf (tmpfp, "\t.long 0x%08x\n", token);
+ g_assert (token);
+ fprintf (tmpfp, "\t.long %s\n", l2);
+
+ g_hash_table_insert (hash, field, l1);
+
+ return l1;
+}
+
+int
+mono_compile_assembly (MonoAssembly *ass, guint32 opts)
+{
+ MonoCompile *cfg;
+ MonoImage *image = ass->image;
+ MonoMethod *method;
+ char *com, *tmpfname;
+ FILE *tmpfp;
+ int i, j;
+ guint8 *code, *mname;
+ int ccount = 0, mcount = 0, lmfcount = 0, ecount = 0, abscount = 0, wrappercount = 0, ocount = 0;
+ GHashTable *ref_hash;
+ int func_alignment = 16;
+
+ printf ("Mono AOT compiler - compiling assembly %s\n", image->name);
+
+ tmpfname = g_strdup_printf ("%s/mono_aot_%05d", P_tmpdir, getpid ());
+ tmpfp = fopen (tmpfname, "w+");
+ g_assert (tmpfp);
+
+ ref_hash = g_hash_table_new (NULL, NULL);
+
+ write_string_symbol (tmpfp, "mono_assembly_guid" , image->guid);
+
+ for (i = 0; i < image->tables [MONO_TABLE_METHOD].rows; ++i) {
+ MonoJumpInfo *patch_info;
+ gboolean skip;
+ guint32 token = MONO_TOKEN_METHOD_DEF | (i + 1);
+ method = mono_get_method (image, token, NULL);
+
+ /* fixme: maybe we can also precompile wrapper methods */
+ if ((method->iflags & METHOD_IMPL_ATTRIBUTE_INTERNAL_CALL) ||
+ (method->flags & METHOD_ATTRIBUTE_PINVOKE_IMPL) ||
+ (method->iflags & METHOD_IMPL_ATTRIBUTE_RUNTIME) ||
+ (method->flags & METHOD_ATTRIBUTE_ABSTRACT)) {
+ //printf ("Skip (impossible): %s\n", mono_method_full_name (method, TRUE));
+ continue;
+ }
+
+ mcount++;
+
+ /* fixme: we need to patch the IP for the LMF in that case */
+ if (method->save_lmf) {
+ //printf ("Skip (needs lmf): %s\n", mono_method_full_name (method, TRUE));
+ lmfcount++;
+ continue;
+ }
+
+ /* fixme: add methods with exception tables */
+ if (((MonoMethodNormal *)method)->header->num_clauses) {
+ //printf ("Skip (exceptions): %s\n", mono_method_full_name (method, TRUE));
+ ecount++;
+ continue;
+ }
+
+ //printf ("START: %s\n", mono_method_full_name (method, TRUE));
+ //mono_compile_method (method);
+
+ cfg = mini_method_compile (method, opts, mono_root_domain, 0);
+ g_assert (cfg);
+
+ if (cfg->disable_aot) {
+ ocount++;
+ continue;
+ }
+
+ skip = FALSE;
+ for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+ if (patch_info->type == MONO_PATCH_INFO_ABS) {
+ /* unable to handle this */
+ //printf ("Skip (abs addr): %s %d\n", mono_method_full_name (method, TRUE), patch_info->type);
+ skip = TRUE;
+ break;
+ }
+ }
+
+ if (skip) {
+ abscount++;
+ continue;
+ }
+
+ skip = FALSE;
+ for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+ if ((patch_info->type == MONO_PATCH_INFO_METHOD ||
+ patch_info->type == MONO_PATCH_INFO_METHODCONST) &&
+ patch_info->data.method->wrapper_type) {
+ /* unable to handle this */
+ //printf ("Skip (wrapper call): %s %d\n", mono_method_full_name (method, TRUE), patch_info->type);
+ skip = TRUE;
+ break;
+ }
+ }
+
+ if (skip) {
+ wrappercount++;
+ continue;
+ }
+
+ //printf ("Compile: %s\n", mono_method_full_name (method, TRUE));
+
+ code = cfg->native_code;
+
+ fprintf (tmpfp, ".text\n");
+ mname = g_strdup_printf ("method_%08X", token);
+ fprintf (tmpfp, "\t.align %d\n", func_alignment);
+ fprintf (tmpfp, ".globl %s\n", mname);
+ fprintf (tmpfp, "%s:\n", mname);
+
+ for (j = 0; j < cfg->code_len; j++)
+ fprintf (tmpfp, ".byte %d\n", (unsigned int) code [j]);
+
+ fprintf (tmpfp, ".data\n");
+
+ j = 0;
+ for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+ switch (patch_info->type) {
+ case MONO_PATCH_INFO_LABEL:
+ case MONO_PATCH_INFO_BB:
+ /* relative jumps are no problem, there is no need to handle then here */
+ break;
+ case MONO_PATCH_INFO_SWITCH: {
+ gpointer *table = (gpointer *)patch_info->data.target;
+ int k;
+
+ fprintf (tmpfp, "\t.align %d\n", sizeof (gpointer));
+ fprintf (tmpfp, "%s_patch_info_%d:\n", mname, j);
+ fprintf (tmpfp, "\t.long %d\n", patch_info->table_size);
+
+ for (k = 0; k < patch_info->table_size; k++) {
+ fprintf (tmpfp, "\t.long %d\n", (guint8 *)table [k] - code);
+ }
+ j++;
+ break;
+ }
+ case MONO_PATCH_INFO_INTERNAL_METHOD:
+ patch_info->data.name = cond_emit_icall_label (tmpfp, ref_hash, patch_info->data.name);
+ j++;
+ break;
+ case MONO_PATCH_INFO_METHODCONST:
+ case MONO_PATCH_INFO_METHOD:
+ patch_info->data.name = cond_emit_method_label (tmpfp, ref_hash, patch_info);
+ j++;
+ break;
+ case MONO_PATCH_INFO_FIELD:
+ patch_info->data.name = cond_emit_field_label (tmpfp, ref_hash, patch_info);
+ j++;
+ break;
+ case MONO_PATCH_INFO_CLASS:
+ patch_info->data.name = cond_emit_klass_label (tmpfp, ref_hash, patch_info);
+ j++;
+ break;
+ case MONO_PATCH_INFO_IMAGE:
+ patch_info->data.name = cond_emit_image_label (tmpfp, ref_hash, patch_info->data.image);
+ j++;
+ break;
+ case MONO_PATCH_INFO_R4:
+ fprintf (tmpfp, "\t.align 8\n");
+ fprintf (tmpfp, "%s_patch_info_%d:\n", mname, j);
+ fprintf (tmpfp, "\t.long 0x%08x\n", *((guint32 *)patch_info->data.target));
+
+ j++;
+ break;
+ case MONO_PATCH_INFO_R8:
+ fprintf (tmpfp, "\t.align 8\n");
+ fprintf (tmpfp, "%s_patch_info_%d:\n", mname, j);
+ fprintf (tmpfp, "\t.long 0x%08x\n", *((guint32 *)patch_info->data.target));
+ fprintf (tmpfp, "\t.long 0x%08x\n", *((guint32 *)patch_info->data.target + 1));
+
+ j++;
+ break;
+ default:
+ g_warning ("unable to handle jump info %d", patch_info->type);
+ g_assert_not_reached ();
+ }
+ }
+
+ fprintf (tmpfp, ".globl %s_patch_info\n", mname);
+ fprintf (tmpfp, "\t.align %d\n", sizeof (gpointer));
+ fprintf (tmpfp, "%s_patch_info:\n", mname);
+
+ fprintf (tmpfp, "\t.long %d\n", cfg->code_len);
+ fprintf (tmpfp, "\t.long %d\n", cfg->used_int_regs);
+
+ if (j) {
+ j = 0;
+ for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
+ switch (patch_info->type) {
+ case MONO_PATCH_INFO_METHODCONST:
+ case MONO_PATCH_INFO_METHOD:
+ case MONO_PATCH_INFO_CLASS:
+ case MONO_PATCH_INFO_FIELD:
+ case MONO_PATCH_INFO_INTERNAL_METHOD:
+ case MONO_PATCH_INFO_IMAGE:
+ fprintf (tmpfp, "\t.long %s\n", patch_info->data.name);
+ fprintf (tmpfp, "\t.long 0x%08x\n", ENCODE_TYPE_POS (patch_info->type, patch_info->ip.i));
+ j++;
+ break;
+ case MONO_PATCH_INFO_SWITCH:
+ case MONO_PATCH_INFO_R4:
+ case MONO_PATCH_INFO_R8:
+ fprintf (tmpfp, "\t.long %s_patch_info_%d\n", mname, j);
+ fprintf (tmpfp, "\t.long 0x%08x\n", ENCODE_TYPE_POS (patch_info->type, patch_info->ip.i));
+ j++;
+ break;
+ case MONO_PATCH_INFO_LABEL:
+ case MONO_PATCH_INFO_BB:
+ break;
+ default:
+ g_warning ("unable to handle jump info %d", patch_info->type);
+ g_assert_not_reached ();
+ }
+
+ }
+ }
+ /* NULL terminated array */
+ fprintf (tmpfp, "\t.long 0\n");
+
+ /* fixme: save the rest of the required infos */
+
+ g_free (mname);
+ mono_destroy_compile (cfg);
+
+ ccount++;
+ }
+
+ fclose (tmpfp);
+
+ com = g_strdup_printf ("as %s -o %s.o;ld -shared -o %s.so %s.o;rm %s.o;strip --strip-unneeded --discard-all %s.so",
+ tmpfname, tmpfname, image->name, tmpfname, tmpfname, image->name);
+
+ printf ("Executing the native assembler now:\n%s\n", com);
+ system (com);
+
+ printf ("Compile %d out of %d methods (%d%%)\n", ccount, mcount, (ccount*100)/mcount);
+ printf ("%d methods contains exception tables (%d%%)\n", ecount, (ecount*100)/mcount);
+ printf ("%d methods contains absolute addresses (%d%%)\n", abscount, (abscount*100)/mcount);
+ printf ("%d methods contains wrapper references (%d%%)\n", wrappercount, (wrappercount*100)/mcount);
+ printf ("%d methods contains lmf pointers (%d%%)\n", lmfcount, (lmfcount*100)/mcount);
+ printf ("%d methods has other problems (%d%%)\n", ocount, (ocount*100)/mcount);
+ //unlink (tmpfname);
+
+ return 0;
+}
View
131 mono/mini/arrays.cs
@@ -0,0 +1,131 @@
+using System;
+using System.Reflection;
+
+/*
+ * Regression tests for the mono JIT.
+ *
+ * Each test needs to be of the form:
+ *
+ * static int test_<result>_<name> ();
+ *
+ * where <result> is an integer (the value that needs to be returned by
+ * the method to make it pass.
+ * <name> is a user-displayed name used to identify the test.
+ *
+ * The tests can be driven in two ways:
+ * *) running the program directly: Main() uses reflection to find and invoke
+ * the test methods (this is useful mostly to check that the tests are correct)
+ * *) with the --regression switch of the jit (this is the preferred way since
+ * all the tests will be run with optimizations on and off)
+ *
+ * The reflection logic could be moved to a .dll since we need at least another
+ * regression test file written in IL code to have better control on how
+ * the IL code looks.
+ */
+
+class Tests {
+
+ static int Main () {
+ return TestDriver.RunTests (typeof (Tests));
+ }
+
+ static int test_10_create () {
+ int[] a = new int [10];
+ return a.Length;
+ }
+
+ static int test_0_unset_value () {
+ int[] a = new int [10];
+ return a [5];
+ }
+
+ static int test_3_set_value () {
+ int[] a = new int [10];
+ a [5] = 3;
+ return a [5];
+ }
+
+ static int test_0_char_array_1 () {
+ int value = -30;
+ char[] tmp = new char [20];
+ char[] digitLowerTable = new char[16];
+ tmp[0] = digitLowerTable[-(value % 10)];
+ return 0;
+ }
+
+ static int test_0_char_array_2 () {
+ int value = 5;
+ char[] tmp = new char [20];
+ char[] digitLowerTable = new char[16];
+ tmp[0] = digitLowerTable[value % 10];
+ return 0;
+ }
+
+ static int test_0_char_array_3 () {
+ int value = -1;
+ char[] tmp = new char [20];
+ char[] digitLowerTable = new char[16];
+ tmp [0] = digitLowerTable[value & 15];
+ return 0;
+ }
+
+ unsafe static int test_0_byte_array () {
+ byte [] src = new byte [8];
+ double ret;
+ byte *dst = (byte *)&ret;
+ int start = 0;
+
+ dst[0] = src[4 + start];
+
+ return 0;
+ }
+
+ public static int test_0_set_after_shift () {
+ int [] n = new int [1];
+ int b = 16;
+
+ n [0] = 100 + (1 << (16 - b));
+
+ if (n [0] != 101)
+ return 1;
+
+ return 0;
+ }
+
+ private Int32[] m_array = new int [10];
+
+ void setBit (int bitIndex, bool value) {
+ int index = bitIndex/32;
+ int shift = bitIndex%32;
+
+ Int32 theBit = 1 << shift;
+ if (value)
+ m_array[index] |= theBit;
+ else
+ m_array[index] &= ~theBit;
+ }
+
+ bool getBit (int bitIndex) {
+ int index = bitIndex/32;
+ int shift = bitIndex%32;
+
+ Int32 theBit = m_array[index] & (1 << shift);
+ return (theBit == 0) ? false : true;
+
+ }
+
+ public static int test_1_bit_index () {
+ Tests t = new Tests ();
+ t.setBit (0, true);
+ t.setBit (3, true);
+ if (t.getBit (1))
+ return 4;
+ if (!t.getBit (0))
+ return 5;
+ if (!t.getBit (3))
+ return 6;
+ return 1;
+ }
+
+}
+
View
426 mono/mini/basic-float.cs
@@ -0,0 +1,426 @@
+using System;
+using System.Reflection;
+
+/*
+ * Regression tests for the mono JIT.
+ *
+ * Each test needs to be of the form:
+ *
+ * static int test_<result>_<name> ();
+ *
+ * where <result> is an integer (the value that needs to be returned by
+ * the method to make it pass.
+ * <name> is a user-displayed name used to identify the test.
+ *
+ * The tests can be driven in two ways:
+ * *) running the program directly: Main() uses reflection to find and invoke
+ * the test methods (this is useful mostly to check that the tests are correct)
+ * *) with the --regression switch of the jit (this is the preferred way since
+ * all the tests will be run with optimizations on and off)
+ *
+ * The reflection logic could be moved to a .dll since we need at least another
+ * regression test file written in IL code to have better control on how
+ * the IL code looks.
+ */
+
+class Tests {
+
+ static int Main () {
+ return TestDriver.RunTests (typeof (Tests));
+ }
+
+ static int test_0_beq () {
+ double a = 2.0;
+ if (a != 2.0)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_bne_un () {
+ double a = 2.0;
+ if (a == 1.0)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_conv_r8 () {
+ double a = 2;
+ if (a != 2.0)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_conv_i4 () {
+ double a = 2.0;
+ int b = (int)a;
+ if (b != 2)
+ return 1;
+ return 0;
+ }
+
+ static int test_5_add () {
+ double a = 2.0;
+ double b = 3.0;
+ return (int)(a + b);
+ }
+
+ static int test_5_sub () {
+ double a = 8.0;
+ double b = 3.0;
+ return (int)(a - b);
+ }
+
+ static int test_24_mul () {
+ double a = 8.0;
+ double b = 3.0;
+ return (int)(a * b);
+ }
+
+ static int test_4_div () {
+ double a = 8.0;
+ double b = 2.0;
+ return (int)(a / b);
+ }
+
+ static int test_2_rem () {
+ double a = 8.0;
+ double b = 3.0;
+ return (int)(a % b);
+ }
+
+ static int test_2_neg () {
+ double a = -2.0;
+ return (int)(-a);
+ }
+
+ static int test_46_float_spill () {
+ // we overflow the FP stack
+ double a = 1;
+ double b = 2;
+ double c = 3;
+ double d = 4;
+ double e = 5;
+ double f = 6;
+ double g = 7;
+ double h = 8;
+ double i = 9;
+
+ return (int)(1.0 + (a + (b + (c + (d + (e + (f + (g + (h + i)))))))));
+ }
+
+ static int test_4_long_cast () {
+ long a = 1000;
+ double d = (double)a;
+ long b = (long)d;
+ if (b != 1000)
+ return 0;
+ return 4;
+ }
+
+
+ static int test_15_float_cmp () {
+ double a = 2.0;
+ double b = 1.0;
+ int result = 0;
+ bool val;
+
+ val = a == a;
+ if (!val)
+ return result;
+ result++;
+
+ val = a < a;
+ if (val)
+ return result;
+ result++;
+
+ val = a > a;
+ if (val)
+ return result;
+ result++;
+
+ val = a <= a;
+ if (!val)
+ return result;
+ result++;
+
+ val = a >= a;
+ if (!val)
+ return result;
+ result++;
+
+ val = b == a;
+ if (val)
+ return result;
+ result++;
+
+ val = b < a;
+ if (!val)
+ return result;
+ result++;
+
+ val = b > a;
+ if (val)
+ return result;
+ result++;
+
+ val = b <= a;
+ if (!val)
+ return result;
+ result++;
+
+ val = b >= a;
+ if (val)
+ return result;
+ result++;
+
+ val = a == b;
+ if (val)
+ return result;
+ result++;
+
+ val = a < b;
+ if (val)
+ return result;
+ result++;
+
+ val = a > b;
+ if (!val)
+ return result;
+ result++;
+
+ val = a <= b;
+ if (val)
+ return result;
+ result++;
+
+ val = a >= b;
+ if (!val)
+ return result;
+ result++;
+
+ return result;
+ }
+
+ static int test_15_float_cmp_un () {
+ double a = Double.NaN;
+ double b = 1.0;
+ int result = 0;
+ bool val;
+
+ val = a == a;
+ if (val)
+ return result;
+ result++;
+
+ val = a < a;
+ if (val)
+ return result;
+ result++;
+
+ val = a > a;
+ if (val)
+ return result;
+ result++;
+
+ val = a <= a;
+ if (val)
+ return result;
+ result++;
+
+ val = a >= a;
+ if (val)
+ return result;
+ result++;
+
+ val = b == a;
+ if (val)
+ return result;
+ result++;
+
+ val = b < a;
+ if (val)
+ return result;
+ result++;
+
+ val = b > a;
+ if (val)
+ return result;
+ result++;
+
+ val = b <= a;
+ if (val)
+ return result;
+ result++;
+
+ val = b >= a;
+ if (val)
+ return result;
+ result++;
+
+ val = a == b;
+ if (val)
+ return result;
+ result++;
+
+ val = a < b;
+ if (val)
+ return result;
+ result++;
+
+ val = a > b;
+ if (val)
+ return result;
+ result++;
+
+ val = a <= b;
+ if (val)
+ return result;
+ result++;
+
+ val = a >= b;
+ if (val)
+ return result;
+ result++;
+
+ return result;
+ }
+
+ static int test_15_float_branch () {
+ double a = 2.0;
+ double b = 1.0;
+ int result = 0;
+
+ if (!(a == a))
+ return result;
+ result++;
+
+ if (a < a)
+ return result;
+ result++;
+
+ if (a > a)
+ return result;
+ result++;
+
+ if (!(a <= a))
+ return result;
+ result++;
+
+ if (!(a >= a))
+ return result;
+ result++;
+
+ if (b == a)
+ return result;
+ result++;
+
+ if (!(b < a))
+ return result;
+ result++;
+
+ if (b > a)
+ return result;
+ result++;
+
+ if (!(b <= a))
+ return result;
+ result++;
+
+ if (b >= a)
+ return result;
+ result++;
+
+ if (a == b)
+ return result;
+ result++;
+
+ if (a < b)
+ return result;
+ result++;
+
+ if (!(a > b))
+ return result;
+ result++;
+
+ if (a <= b)
+ return result;
+ result++;
+
+ if (!(a >= b))
+ return result;
+ result++;
+
+ return result;
+ }
+
+ static int test_15_float_branch_un () {
+ double a = Double.NaN;
+ double b = 1.0;
+ int result = 0;
+
+ if (a == a)
+ return result;
+ result++;
+
+ if (a < a)
+ return result;
+ result++;
+
+ if (a > a)
+ return result;
+ result++;
+
+ if (a <= a)
+ return result;
+ result++;
+
+ if (a >= a)
+ return result;
+ result++;
+
+ if (b == a)
+ return result;
+ result++;
+
+ if (b < a)
+ return result;
+ result++;
+
+ if (b > a)
+ return result;
+ result++;
+
+ if (b <= a)
+ return result;
+ result++;
+
+ if (b >= a)
+ return result;
+ result++;
+
+ if (a == b)
+ return result;
+ result++;
+
+ if (a < b)
+ return result;
+ result++;
+
+ if (a > b)
+ return result;
+ result++;
+
+ if (a <= b)
+ return result;
+ result++;
+
+ if (a >= b)
+ return result;
+ result++;
+
+ return result;
+ }
+
+}
+
View
595 mono/mini/basic-long.cs
@@ -0,0 +1,595 @@
+using System;
+using System.Reflection;
+
+/*
+ * Regression tests for the mono JIT.
+ *
+ * Each test needs to be of the form:
+ *
+ * static int test_<result>_<name> ();
+ *
+ * where <result> is an integer (the value that needs to be returned by
+ * the method to make it pass.
+ * <name> is a user-displayed name used to identify the test.
+ *
+ * The tests can be driven in two ways:
+ * *) running the program directly: Main() uses reflection to find and invoke
+ * the test methods (this is useful mostly to check that the tests are correct)
+ * *) with the --regression switch of the jit (this is the preferred way since
+ * all the tests will be run with optimizations on and off)
+ *
+ * The reflection logic could be moved to a .dll since we need at least another
+ * regression test file written in IL code to have better control on how
+ * the IL code looks.
+ */
+
+class Tests {
+
+ static int Main () {
+ return TestDriver.RunTests (typeof (Tests));
+ }
+
+ static int test_0_beq () {
+ long a = 0xffffffffff;
+ if (a != 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_bne_un () {
+ long a = 0xffffffffff;
+ if (a == 0xfffffffffe)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_ble () {
+ long a = 0xffffffffff;
+ if (a > 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_ble_un () {
+ ulong a = 0xffffffffff;
+ if (a > 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_bge () {
+ long a = 0xffffffffff;
+ if (a < 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_bge_un () {
+ ulong a = 0xffffffffff;
+ if (a < 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_blt () {
+ long a = 0xfffffffffe;
+ if (a >= 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_blt_un () {
+ ulong a = 0xfffffffffe;
+ if (a >= 0xffffffffff)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_bgt () {
+ long a = 0xffffffffff;
+ if (a <= 0xfffffffffe)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_conv_to_i4 () {
+ long a = 0;
+
+ return (int)a;
+ }
+ static int test_0_conv_from_i4 () {
+ long a = 2;
+ if (a != 2)
+ return 1;
+
+ int b = 2;
+
+ if (a != b)
+ return 2;
+ return 0;
+ }
+
+ static int test_0_conv_from_i4_negative () {
+ long a = -2;
+ if (a != -2)
+ return 1;
+
+ int b = -2;
+
+ if (a != b)
+ return 2;
+ return 0;
+ }
+
+ /*
+ static int test_0_conv_from_r8 () {
+ double b = 2.0;
+ long a = (long)b;
+
+ if (a != 2)
+ return 1;
+ return 0;
+ }
+
+ static int test_0_conv_from_r4 () {
+ float b = 2.0F;
+ long a = (long)b;
+
+ if (a != 2)
+ return 1;
+ return 0;
+ }
+ */
+
+ static int test_8_and () {
+ long a = 0xffffffffff;
+ long b = 8;
+ return (int)(a & b);
+ }
+
+ static int test_8_and_imm () {
+ long a = 0xffffffffff;
+ return (int)(a & 8);
+ }
+
+ static int test_10_or () {
+ long a = 8;
+ long b = 2;
+ return (int)(a | b);
+ }
+
+ static int test_10_or_imm () {
+ long a = 8;
+ return (int)(a | 2);
+ }
+
+ static int test_5_xor () {
+ long a = 7;
+ long b = 2;
+ return (int)(a ^ b);
+ }
+
+ static int test_5_xor_imm () {
+ long a = 7;
+ return (int)(a ^ 2);
+ }
+
+ static int test_5_add () {
+ long a = 2;
+ long b = 3;
+ return (int)(a + b);
+ }
+
+ static int test_5_add_imm () {
+ long a = 2;
+ return (int)(a + 3);
+ }
+
+ static int test_0_add_imm_no_inc () {
+ // we can't blindly convert an add x, 1 to an inc x
+ long a = 0x1ffffffff;
+ long c;
+ c = a + 2;
+ if (c == ((a + 1) + 1))
+ return 0;
+ return 1;
+ }
+
+ static int test_5_sub () {
+ long a = 8;
+ long b = 3;
+ return (int)(a - b);
+ }
+
+ static int test_5_sub_imm () {
+ long a = 8;
+ return (int)(a - 3);
+ }
+
+ static int test_2_neg () {
+ long a = -2;
+ return (int)(-a);
+ }
+
+ static int test_0_shl () {
+ long a = 9;
+ int b = 1;
+
+ if ((a >> b) != 4)
+ return 1;
+
+
+ return 0;
+ }
+
+ static int test_1_rshift ()
+ {
+ long a = 9;
+ int b = 1;
+ a = -9;
+ if ((a >> b) != -5)
+ return 0;
+ return 1;
+ }
+
+ static int test_5_shift ()
+ {
+ long a = 9;
+ int b = 1;
+ int count = 0;
+
+ if ((a >> b) != 4)
+ return count;
+ count++;
+
+ if ((a >> 63) != 0)
+ return count;
+ count++;
+
+ if ((a << 1) != 18)
+ return count;
+ count++;
+
+ if ((a << b) != 18)
+ return count;
+ count++;
+
+ a = -9;
+ if ((a >> b) != -5)
+ return count;
+ count++;
+
+ return count;
+ }
+
+ static int test_1_simple_neg () {
+ long a = 9;
+
+ if (-a != -9)
+ return 0;
+ return 1;
+ }
+
+ static int test_2_compare () {
+ long a = 1;
+ long b = 1;
+
+ if (a != b)
+ return 0;
+ return 2;
+ }
+
+ static int test_9_alu ()
+ {
+ long a = 9, b = 6;
+ int count = 0;
+
+ if ((a + b) != 15)
+ return count;
+ count++;
+
+ if ((a - b) != 3)
+ return count;
+ count++;
+
+ if ((a & 8) != 8)
+ return count;
+ count++;
+
+ if ((a | 2) != 11