Permalink
Switch branches/tags
Find file
Fetching contributors…
Cannot retrieve contributors at this time
6505 lines (5830 sloc) 188 KB
/**
* \file
* x86 backend for the Mono code generator
*
* Authors:
* Paolo Molaro (lupus@ximian.com)
* Dietmar Maurer (dietmar@ximian.com)
* Patrik Torstensson
*
* Copyright 2003 Ximian, Inc.
* Copyright 2003-2011 Novell Inc.
* Copyright 2011 Xamarin Inc.
* Licensed under the MIT license. See LICENSE file in the project root for full license information.
*/
#include "mini.h"
#include <string.h>
#include <math.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <mono/metadata/abi-details.h>
#include <mono/metadata/appdomain.h>
#include <mono/metadata/debug-helpers.h>
#include <mono/metadata/threads.h>
#include <mono/metadata/profiler-private.h>
#include <mono/metadata/mono-debug.h>
#include <mono/metadata/gc-internals.h>
#include <mono/utils/mono-math.h>
#include <mono/utils/mono-counters.h>
#include <mono/utils/mono-mmap.h>
#include <mono/utils/mono-memory-model.h>
#include <mono/utils/mono-hwcap.h>
#include <mono/utils/mono-threads.h>
#include <mono/utils/unlocked.h>
#include "trace.h"
#include "mini-x86.h"
#include "cpu-x86.h"
#include "ir-emit.h"
#include "mini-gc.h"
#include "aot-runtime.h"
#include "mini-runtime.h"
#ifndef TARGET_WIN32
#ifdef MONO_XEN_OPT
static gboolean optimize_for_xen = TRUE;
#else
#define optimize_for_xen 0
#endif
#endif
/* The single step trampoline */
static gpointer ss_trampoline;
/* The breakpoint trampoline */
static gpointer bp_trampoline;
/* This mutex protects architecture specific caches */
#define mono_mini_arch_lock() mono_os_mutex_lock (&mini_arch_mutex)
#define mono_mini_arch_unlock() mono_os_mutex_unlock (&mini_arch_mutex)
static mono_mutex_t mini_arch_mutex;
#define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
#define ARGS_OFFSET 8
#ifdef TARGET_WIN32
/* Under windows, the default pinvoke calling convention is stdcall */
#define CALLCONV_IS_STDCALL(sig) ((sig)->pinvoke && ((sig)->call_convention == MONO_CALL_STDCALL || (sig)->call_convention == MONO_CALL_DEFAULT || (sig)->call_convention == MONO_CALL_THISCALL))
#else
#define CALLCONV_IS_STDCALL(sig) ((sig)->pinvoke && ((sig)->call_convention == MONO_CALL_STDCALL || (sig)->call_convention == MONO_CALL_THISCALL))
#endif
#define X86_IS_CALLEE_SAVED_REG(reg) (((reg) == X86_EBX) || ((reg) == X86_EDI) || ((reg) == X86_ESI))
#define OP_SEQ_POINT_BP_OFFSET 7
static guint8*
emit_load_aotconst (guint8 *start, guint8 *code, MonoCompile *cfg, MonoJumpInfo **ji, int dreg, int tramp_type, gconstpointer target);
const char*
mono_arch_regname (int reg)
{
switch (reg) {
case X86_EAX: return "%eax";
case X86_EBX: return "%ebx";
case X86_ECX: return "%ecx";
case X86_EDX: return "%edx";
case X86_ESP: return "%esp";
case X86_EBP: return "%ebp";
case X86_EDI: return "%edi";
case X86_ESI: return "%esi";
}
return "unknown";
}
const char*
mono_arch_fregname (int reg)
{
switch (reg) {
case 0:
return "%fr0";
case 1:
return "%fr1";
case 2:
return "%fr2";
case 3:
return "%fr3";
case 4:
return "%fr4";
case 5:
return "%fr5";
case 6:
return "%fr6";
case 7:
return "%fr7";
default:
return "unknown";
}
}
const char *
mono_arch_xregname (int reg)
{
switch (reg) {
case 0:
return "%xmm0";
case 1:
return "%xmm1";
case 2:
return "%xmm2";
case 3:
return "%xmm3";
case 4:
return "%xmm4";
case 5:
return "%xmm5";
case 6:
return "%xmm6";
case 7:
return "%xmm7";
default:
return "unknown";
}
}
void
mono_x86_patch (unsigned char* code, gpointer target)
{
x86_patch (code, (unsigned char*)target);
}
#define FLOAT_PARAM_REGS 0
static const guint32 thiscall_param_regs [] = { X86_ECX, X86_NREG };
static const guint32 *callconv_param_regs(MonoMethodSignature *sig)
{
if (!sig->pinvoke)
return NULL;
switch (sig->call_convention) {
case MONO_CALL_THISCALL:
return thiscall_param_regs;
default:
return NULL;
}
}
#if defined(TARGET_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
#define SMALL_STRUCTS_IN_REGS
static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
#endif
static void inline
add_general (guint32 *gr, const guint32 *param_regs, guint32 *stack_size, ArgInfo *ainfo)
{
ainfo->offset = *stack_size;
if (!param_regs || param_regs [*gr] == X86_NREG) {
ainfo->storage = ArgOnStack;
ainfo->nslots = 1;
(*stack_size) += sizeof (gpointer);
}
else {
ainfo->storage = ArgInIReg;
ainfo->reg = param_regs [*gr];
(*gr) ++;
}
}
static void inline
add_general_pair (guint32 *gr, const guint32 *param_regs , guint32 *stack_size, ArgInfo *ainfo)
{
ainfo->offset = *stack_size;
g_assert(!param_regs || param_regs[*gr] == X86_NREG);
ainfo->storage = ArgOnStack;
(*stack_size) += sizeof (gpointer) * 2;
ainfo->nslots = 2;
}
static void inline
add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
{
ainfo->offset = *stack_size;
if (*gr >= FLOAT_PARAM_REGS) {
ainfo->storage = ArgOnStack;
(*stack_size) += is_double ? 8 : 4;
ainfo->nslots = is_double ? 2 : 1;
}
else {
/* A double register */
if (is_double)
ainfo->storage = ArgInDoubleSSEReg;
else
ainfo->storage = ArgInFloatSSEReg;
ainfo->reg = *gr;
(*gr) += 1;
}
}
static void
add_valuetype (MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
gboolean is_return,
guint32 *gr, const guint32 *param_regs, guint32 *fr, guint32 *stack_size)
{
guint32 size;
MonoClass *klass;
klass = mono_class_from_mono_type (type);
size = mini_type_stack_size_full (&klass->byval_arg, NULL, sig->pinvoke);
#if defined(TARGET_WIN32)
/*
* Standard C and C++ doesn't allow empty structs, empty structs will always have a size of 1 byte.
* GCC have an extension to allow empty structs, https://gcc.gnu.org/onlinedocs/gcc/Empty-Structures.html.
* This cause a little dilemma since runtime build using none GCC compiler will not be compatible with
* GCC build C libraries and the other way around. On platforms where empty structs has size of 1 byte
* it must be represented in call and cannot be dropped.
*/
if (size == 0 && MONO_TYPE_ISSTRUCT (type) && sig->pinvoke) {
/* Empty structs (1 byte size) needs to be represented in a stack slot */
ainfo->pass_empty_struct = TRUE;
size = 1;
}
#endif
#ifdef SMALL_STRUCTS_IN_REGS
if (sig->pinvoke && is_return) {
MonoMarshalType *info;
info = mono_marshal_load_type_info (klass);
g_assert (info);
ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
/* Ignore empty struct return value, if used. */
if (info->num_fields == 0 && ainfo->pass_empty_struct) {
ainfo->storage = ArgValuetypeInReg;
return;
}
/*
* Windows x86 ABI for returning structs of size 4 or 8 bytes (regardless of type) dictates that
* values are passed in EDX:EAX register pairs, https://msdn.microsoft.com/en-us/library/984x0h58.aspx.
* This is different compared to for example float or double return types (not in struct) that will be returned
* in ST(0), https://msdn.microsoft.com/en-us/library/ha59cbfz.aspx.
*
* Apples OSX x86 ABI for returning structs of size 4 or 8 bytes uses a slightly different approach.
* If a struct includes only one scalar value, it will be handled with the same rules as scalar values.
* This means that structs with one float or double will be returned in ST(0). For more details,
* https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/LowLevelABI/130-IA-32_Function_Calling_Conventions/IA32.html.
*/
#if !defined(TARGET_WIN32)
/* Special case structs with only a float member */
if (info->num_fields == 1) {
int ftype = mini_get_underlying_type (info->fields [0].field->type)->type;
if ((info->native_size == 8) && (ftype == MONO_TYPE_R8)) {
ainfo->storage = ArgValuetypeInReg;
ainfo->pair_storage [0] = ArgOnDoubleFpStack;
return;
}
if ((info->native_size == 4) && (ftype == MONO_TYPE_R4)) {
ainfo->storage = ArgValuetypeInReg;
ainfo->pair_storage [0] = ArgOnFloatFpStack;
return;
}
}
#endif
if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
ainfo->storage = ArgValuetypeInReg;
ainfo->pair_storage [0] = ArgInIReg;
ainfo->pair_regs [0] = return_regs [0];
if (info->native_size > 4) {
ainfo->pair_storage [1] = ArgInIReg;
ainfo->pair_regs [1] = return_regs [1];
}
return;
}
}
#endif
if (param_regs && param_regs [*gr] != X86_NREG && !is_return) {
g_assert (size <= 4);
ainfo->storage = ArgValuetypeInReg;
ainfo->reg = param_regs [*gr];
(*gr)++;
return;
}
ainfo->offset = *stack_size;
ainfo->storage = ArgOnStack;
*stack_size += ALIGN_TO (size, sizeof (gpointer));
ainfo->nslots = ALIGN_TO (size, sizeof (gpointer)) / sizeof (gpointer);
}
/*
* get_call_info:
*
* Obtain information about a call according to the calling convention.
* For x86 ELF, see the "System V Application Binary Interface Intel386
* Architecture Processor Supplment, Fourth Edition" document for more
* information.
* For x86 win32, see https://msdn.microsoft.com/en-us/library/984x0h58.aspx.
*/
static CallInfo*
get_call_info_internal (CallInfo *cinfo, MonoMethodSignature *sig)
{
guint32 i, gr, fr, pstart;
const guint32 *param_regs;
MonoType *ret_type;
int n = sig->hasthis + sig->param_count;
guint32 stack_size = 0;
gboolean is_pinvoke = sig->pinvoke;
gr = 0;
fr = 0;
cinfo->nargs = n;
param_regs = callconv_param_regs(sig);
/* return value */
{
ret_type = mini_get_underlying_type (sig->ret);
switch (ret_type->type) {
case MONO_TYPE_I1:
case MONO_TYPE_U1:
case MONO_TYPE_I2:
case MONO_TYPE_U2:
case MONO_TYPE_I4:
case MONO_TYPE_U4:
case MONO_TYPE_I:
case MONO_TYPE_U:
case MONO_TYPE_PTR:
case MONO_TYPE_FNPTR:
case MONO_TYPE_OBJECT:
cinfo->ret.storage = ArgInIReg;
cinfo->ret.reg = X86_EAX;
break;
case MONO_TYPE_U8:
case MONO_TYPE_I8:
cinfo->ret.storage = ArgInIReg;
cinfo->ret.reg = X86_EAX;
cinfo->ret.is_pair = TRUE;
break;
case MONO_TYPE_R4:
cinfo->ret.storage = ArgOnFloatFpStack;
break;
case MONO_TYPE_R8:
cinfo->ret.storage = ArgOnDoubleFpStack;
break;
case MONO_TYPE_GENERICINST:
if (!mono_type_generic_inst_is_valuetype (ret_type)) {
cinfo->ret.storage = ArgInIReg;
cinfo->ret.reg = X86_EAX;
break;
}
if (mini_is_gsharedvt_type (ret_type)) {
cinfo->ret.storage = ArgOnStack;
cinfo->vtype_retaddr = TRUE;
break;
}
/* Fall through */
case MONO_TYPE_VALUETYPE:
case MONO_TYPE_TYPEDBYREF: {
guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
add_valuetype (sig, &cinfo->ret, ret_type, TRUE, &tmp_gr, NULL, &tmp_fr, &tmp_stacksize);
if (cinfo->ret.storage == ArgOnStack) {
cinfo->vtype_retaddr = TRUE;
/* The caller passes the address where the value is stored */
}
break;
}
case MONO_TYPE_VAR:
case MONO_TYPE_MVAR:
g_assert (mini_is_gsharedvt_type (ret_type));
cinfo->ret.storage = ArgOnStack;
cinfo->vtype_retaddr = TRUE;
break;
case MONO_TYPE_VOID:
cinfo->ret.storage = ArgNone;
break;
default:
g_error ("Can't handle as return value 0x%x", ret_type->type);
}
}
pstart = 0;
/*
* To simplify get_this_arg_reg () and LLVM integration, emit the vret arg after
* the first argument, allowing 'this' to be always passed in the first arg reg.
* Also do this if the first argument is a reference type, since virtual calls
* are sometimes made using calli without sig->hasthis set, like in the delegate
* invoke wrappers.
*/
if (cinfo->vtype_retaddr && !is_pinvoke && (sig->hasthis || (sig->param_count > 0 && MONO_TYPE_IS_REFERENCE (mini_get_underlying_type (sig->params [0]))))) {
if (sig->hasthis) {
add_general (&gr, param_regs, &stack_size, cinfo->args + 0);
} else {
add_general (&gr, param_regs, &stack_size, &cinfo->args [sig->hasthis + 0]);
pstart = 1;
}
cinfo->vret_arg_offset = stack_size;
add_general (&gr, NULL, &stack_size, &cinfo->ret);
cinfo->vret_arg_index = 1;
} else {
/* this */
if (sig->hasthis)
add_general (&gr, param_regs, &stack_size, cinfo->args + 0);
if (cinfo->vtype_retaddr)
add_general (&gr, NULL, &stack_size, &cinfo->ret);
}
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
fr = FLOAT_PARAM_REGS;
/* Emit the signature cookie just before the implicit arguments */
add_general (&gr, param_regs, &stack_size, &cinfo->sig_cookie);
}
for (i = pstart; i < sig->param_count; ++i) {
ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
MonoType *ptype;
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
/* We allways pass the sig cookie on the stack for simplicity */
/*
* Prevent implicit arguments + the sig cookie from being passed
* in registers.
*/
fr = FLOAT_PARAM_REGS;
/* Emit the signature cookie just before the implicit arguments */
add_general (&gr, param_regs, &stack_size, &cinfo->sig_cookie);
}
if (sig->params [i]->byref) {
add_general (&gr, param_regs, &stack_size, ainfo);
continue;
}
ptype = mini_get_underlying_type (sig->params [i]);
switch (ptype->type) {
case MONO_TYPE_I1:
case MONO_TYPE_U1:
add_general (&gr, param_regs, &stack_size, ainfo);
break;
case MONO_TYPE_I2:
case MONO_TYPE_U2:
add_general (&gr, param_regs, &stack_size, ainfo);
break;
case MONO_TYPE_I4:
case MONO_TYPE_U4:
add_general (&gr, param_regs, &stack_size, ainfo);
break;
case MONO_TYPE_I:
case MONO_TYPE_U:
case MONO_TYPE_PTR:
case MONO_TYPE_FNPTR:
case MONO_TYPE_OBJECT:
add_general (&gr, param_regs, &stack_size, ainfo);
break;
case MONO_TYPE_GENERICINST:
if (!mono_type_generic_inst_is_valuetype (ptype)) {
add_general (&gr, param_regs, &stack_size, ainfo);
break;
}
if (mini_is_gsharedvt_type (ptype)) {
/* gsharedvt arguments are passed by ref */
add_general (&gr, param_regs, &stack_size, ainfo);
g_assert (ainfo->storage == ArgOnStack);
ainfo->storage = ArgGSharedVt;
break;
}
/* Fall through */
case MONO_TYPE_VALUETYPE:
case MONO_TYPE_TYPEDBYREF:
add_valuetype (sig, ainfo, ptype, FALSE, &gr, param_regs, &fr, &stack_size);
break;
case MONO_TYPE_U8:
case MONO_TYPE_I8:
add_general_pair (&gr, param_regs, &stack_size, ainfo);
break;
case MONO_TYPE_R4:
add_float (&fr, &stack_size, ainfo, FALSE);
break;
case MONO_TYPE_R8:
add_float (&fr, &stack_size, ainfo, TRUE);
break;
case MONO_TYPE_VAR:
case MONO_TYPE_MVAR:
/* gsharedvt arguments are passed by ref */
g_assert (mini_is_gsharedvt_type (ptype));
add_general (&gr, param_regs, &stack_size, ainfo);
g_assert (ainfo->storage == ArgOnStack);
ainfo->storage = ArgGSharedVt;
break;
default:
g_error ("unexpected type 0x%x", ptype->type);
g_assert_not_reached ();
}
}
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
fr = FLOAT_PARAM_REGS;
/* Emit the signature cookie just before the implicit arguments */
add_general (&gr, param_regs, &stack_size, &cinfo->sig_cookie);
}
if (cinfo->vtype_retaddr) {
/* if the function returns a struct on stack, the called method already does a ret $0x4 */
cinfo->callee_stack_pop = 4;
} else if (CALLCONV_IS_STDCALL (sig)) {
/* Have to compensate for the stack space popped by the native callee */
cinfo->callee_stack_pop = stack_size;
}
if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
cinfo->need_stack_align = TRUE;
cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
stack_size += cinfo->stack_align_amount;
}
cinfo->stack_usage = stack_size;
cinfo->reg_usage = gr;
cinfo->freg_usage = fr;
return cinfo;
}
static CallInfo*
get_call_info (MonoMemPool *mp, MonoMethodSignature *sig)
{
int n = sig->hasthis + sig->param_count;
CallInfo *cinfo;
if (mp)
cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
else
cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
return get_call_info_internal (cinfo, sig);
}
/*
* mono_arch_get_argument_info:
* @csig: a method signature
* @param_count: the number of parameters to consider
* @arg_info: an array to store the result infos
*
* Gathers information on parameters such as size, alignment and
* padding. arg_info should be large enought to hold param_count + 1 entries.
*
* Returns the size of the argument area on the stack.
* This should be signal safe, since it is called from
* mono_arch_unwind_frame ().
* FIXME: The metadata calls might not be signal safe.
*/
int
mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
{
int len, k, args_size = 0;
int size, pad;
guint32 align;
int offset = 8;
CallInfo *cinfo;
/* Avoid g_malloc as it is not signal safe */
len = sizeof (CallInfo) + (sizeof (ArgInfo) * (csig->param_count + 1));
cinfo = (CallInfo*)g_newa (guint8*, len);
memset (cinfo, 0, len);
cinfo = get_call_info_internal (cinfo, csig);
arg_info [0].offset = offset;
if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 0) {
args_size += sizeof (gpointer);
offset += 4;
}
if (csig->hasthis) {
args_size += sizeof (gpointer);
offset += 4;
}
if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 1 && csig->hasthis) {
/* Emitted after this */
args_size += sizeof (gpointer);
offset += 4;
}
arg_info [0].size = args_size;
for (k = 0; k < param_count; k++) {
size = mini_type_stack_size_full (csig->params [k], &align, csig->pinvoke);
/* ignore alignment for now */
align = 1;
args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
arg_info [k].pad = pad;
args_size += size;
arg_info [k + 1].pad = 0;
arg_info [k + 1].size = size;
offset += pad;
arg_info [k + 1].offset = offset;
offset += size;
if (k == 0 && cinfo->vtype_retaddr && cinfo->vret_arg_index == 1 && !csig->hasthis) {
/* Emitted after the first arg */
args_size += sizeof (gpointer);
offset += 4;
}
}
if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
align = MONO_ARCH_FRAME_ALIGNMENT;
else
align = 4;
args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
arg_info [k].pad = pad;
return args_size;
}
gboolean
mono_arch_tail_call_supported (MonoCompile *cfg, MonoMethodSignature *caller_sig, MonoMethodSignature *callee_sig)
{
MonoType *callee_ret;
CallInfo *c1, *c2;
gboolean res;
if (cfg->compile_aot && !cfg->full_aot)
/* OP_TAILCALL doesn't work with AOT */
return FALSE;
c1 = get_call_info (NULL, caller_sig);
c2 = get_call_info (NULL, callee_sig);
/*
* Tail calls with more callee stack usage than the caller cannot be supported, since
* the extra stack space would be left on the stack after the tail call.
*/
res = c1->stack_usage >= c2->stack_usage;
callee_ret = mini_get_underlying_type (callee_sig->ret);
if (callee_ret && MONO_TYPE_ISSTRUCT (callee_ret) && c2->ret.storage != ArgValuetypeInReg)
/* An address on the callee's stack is passed as the first argument */
res = FALSE;
g_free (c1);
g_free (c2);
return res;
}
/*
* Initialize the cpu to execute managed code.
*/
void
mono_arch_cpu_init (void)
{
/* spec compliance requires running with double precision */
#ifndef _MSC_VER
guint16 fpcw;
__asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
fpcw &= ~X86_FPCW_PRECC_MASK;
fpcw |= X86_FPCW_PREC_DOUBLE;
__asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
__asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
#else
_control87 (_PC_53, MCW_PC);
#endif
}
/*
* Initialize architecture specific code.
*/
void
mono_arch_init (void)
{
mono_os_mutex_init_recursive (&mini_arch_mutex);
if (!mono_aot_only)
bp_trampoline = mini_get_breakpoint_trampoline ();
mono_aot_register_jit_icall ("mono_x86_throw_exception", mono_x86_throw_exception);
mono_aot_register_jit_icall ("mono_x86_throw_corlib_exception", mono_x86_throw_corlib_exception);
#if defined(MONO_ARCH_GSHAREDVT_SUPPORTED)
mono_aot_register_jit_icall ("mono_x86_start_gsharedvt_call", mono_x86_start_gsharedvt_call);
#endif
}
/*
* Cleanup architecture specific code.
*/
void
mono_arch_cleanup (void)
{
mono_os_mutex_destroy (&mini_arch_mutex);
}
/*
* This function returns the optimizations supported on this cpu.
*/
guint32
mono_arch_cpu_optimizations (guint32 *exclude_mask)
{
guint32 opts = 0;
*exclude_mask = 0;
if (mono_hwcap_x86_has_cmov) {
opts |= MONO_OPT_CMOV;
if (mono_hwcap_x86_has_fcmov)
opts |= MONO_OPT_FCMOV;
else
*exclude_mask |= MONO_OPT_FCMOV;
} else {
*exclude_mask |= MONO_OPT_CMOV;
}
if (mono_hwcap_x86_has_sse2)
opts |= MONO_OPT_SSE2;
else
*exclude_mask |= MONO_OPT_SSE2;
#ifdef MONO_ARCH_SIMD_INTRINSICS
/*SIMD intrinsics require at least SSE2.*/
if (!mono_hwcap_x86_has_sse2)
*exclude_mask |= MONO_OPT_SIMD;
#endif
return opts;
}
/*
* This function test for all SSE functions supported.
*
* Returns a bitmask corresponding to all supported versions.
*
*/
guint32
mono_arch_cpu_enumerate_simd_versions (void)
{
guint32 sse_opts = 0;
if (mono_hwcap_x86_has_sse1)
sse_opts |= SIMD_VERSION_SSE1;
if (mono_hwcap_x86_has_sse2)
sse_opts |= SIMD_VERSION_SSE2;
if (mono_hwcap_x86_has_sse3)
sse_opts |= SIMD_VERSION_SSE3;
if (mono_hwcap_x86_has_ssse3)
sse_opts |= SIMD_VERSION_SSSE3;
if (mono_hwcap_x86_has_sse41)
sse_opts |= SIMD_VERSION_SSE41;
if (mono_hwcap_x86_has_sse42)
sse_opts |= SIMD_VERSION_SSE42;
if (mono_hwcap_x86_has_sse4a)
sse_opts |= SIMD_VERSION_SSE4a;
return sse_opts;
}
/*
* Determine whenever the trap whose info is in SIGINFO is caused by
* integer overflow.
*/
gboolean
mono_arch_is_int_overflow (void *sigctx, void *info)
{
MonoContext ctx;
guint8* ip;
mono_sigctx_to_monoctx (sigctx, &ctx);
ip = (guint8*)ctx.eip;
if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
gint32 reg;
/* idiv REG */
switch (x86_modrm_rm (ip [1])) {
case X86_EAX:
reg = ctx.eax;
break;
case X86_ECX:
reg = ctx.ecx;
break;
case X86_EDX:
reg = ctx.edx;
break;
case X86_EBX:
reg = ctx.ebx;
break;
case X86_ESI:
reg = ctx.esi;
break;
case X86_EDI:
reg = ctx.edi;
break;
default:
g_assert_not_reached ();
reg = -1;
}
if (reg == -1)
return TRUE;
}
return FALSE;
}
GList *
mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
{
GList *vars = NULL;
int i;
for (i = 0; i < cfg->num_varinfo; i++) {
MonoInst *ins = cfg->varinfo [i];
MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
/* unused vars */
if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
continue;
if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
(ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
continue;
/* we dont allocate I1 to registers because there is no simply way to sign extend
* 8bit quantities in caller saved registers on x86 */
if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
g_assert (MONO_VARINFO (cfg, i)->reg == -1);
g_assert (i == vmv->idx);
vars = g_list_prepend (vars, vmv);
}
}
vars = mono_varlist_sort (cfg, vars, 0);
return vars;
}
GList *
mono_arch_get_global_int_regs (MonoCompile *cfg)
{
GList *regs = NULL;
/* we can use 3 registers for global allocation */
regs = g_list_prepend (regs, (gpointer)X86_EBX);
regs = g_list_prepend (regs, (gpointer)X86_ESI);
regs = g_list_prepend (regs, (gpointer)X86_EDI);
return regs;
}
/*
* mono_arch_regalloc_cost:
*
* Return the cost, in number of memory references, of the action of
* allocating the variable VMV into a register during global register
* allocation.
*/
guint32
mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
{
MonoInst *ins = cfg->varinfo [vmv->idx];
if (cfg->method->save_lmf)
/* The register is already saved */
return (ins->opcode == OP_ARG) ? 1 : 0;
else
/* push+pop+possible load if it is an argument */
return (ins->opcode == OP_ARG) ? 3 : 2;
}
static void
set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
{
static int inited = FALSE;
static int count = 0;
if (cfg->arch.need_stack_frame_inited) {
g_assert (cfg->arch.need_stack_frame == flag);
return;
}
cfg->arch.need_stack_frame = flag;
cfg->arch.need_stack_frame_inited = TRUE;
if (flag)
return;
if (!inited) {
mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
inited = TRUE;
}
++count;
//g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
}
static gboolean
needs_stack_frame (MonoCompile *cfg)
{
MonoMethodSignature *sig;
MonoMethodHeader *header;
gboolean result = FALSE;
#if defined(__APPLE__)
/*OSX requires stack frame code to have the correct alignment. */
return TRUE;
#endif
if (cfg->arch.need_stack_frame_inited)
return cfg->arch.need_stack_frame;
header = cfg->header;
sig = mono_method_signature (cfg->method);
if (cfg->disable_omit_fp)
result = TRUE;
else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
result = TRUE;
else if (cfg->method->save_lmf)
result = TRUE;
else if (cfg->stack_offset)
result = TRUE;
else if (cfg->param_area)
result = TRUE;
else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
result = TRUE;
else if (header->num_clauses)
result = TRUE;
else if (sig->param_count + sig->hasthis)
result = TRUE;
else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
result = TRUE;
else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)))
result = TRUE;
set_needs_stack_frame (cfg, result);
return cfg->arch.need_stack_frame;
}
/*
* Set var information according to the calling convention. X86 version.
* The locals var stuff should most likely be split in another method.
*/
void
mono_arch_allocate_vars (MonoCompile *cfg)
{
MonoMethodSignature *sig;
MonoMethodHeader *header;
MonoInst *inst;
guint32 locals_stack_size, locals_stack_align;
int i, offset;
gint32 *offsets;
CallInfo *cinfo;
header = cfg->header;
sig = mono_method_signature (cfg->method);
if (!cfg->arch.cinfo)
cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
cinfo = (CallInfo *)cfg->arch.cinfo;
cfg->frame_reg = X86_EBP;
offset = 0;
if (cfg->has_atomic_add_i4 || cfg->has_atomic_exchange_i4) {
/* The opcode implementations use callee-saved regs as scratch regs by pushing and pop-ing them, but that is not async safe */
cfg->used_int_regs |= (1 << X86_EBX) | (1 << X86_EDI) | (1 << X86_ESI);
}
/* Reserve space to save LMF and caller saved registers */
if (cfg->method->save_lmf) {
/* The LMF var is allocated normally */
} else {
if (cfg->used_int_regs & (1 << X86_EBX)) {
offset += 4;
}
if (cfg->used_int_regs & (1 << X86_EDI)) {
offset += 4;
}
if (cfg->used_int_regs & (1 << X86_ESI)) {
offset += 4;
}
}
switch (cinfo->ret.storage) {
case ArgValuetypeInReg:
/* Allocate a local to hold the result, the epilog will copy it to the correct place */
offset += 8;
cfg->ret->opcode = OP_REGOFFSET;
cfg->ret->inst_basereg = X86_EBP;
cfg->ret->inst_offset = - offset;
break;
default:
break;
}
/* Allocate locals */
offsets = mono_allocate_stack_slots (cfg, TRUE, &locals_stack_size, &locals_stack_align);
if (locals_stack_size > MONO_ARCH_MAX_FRAME_SIZE) {
char *mname = mono_method_full_name (cfg->method, TRUE);
mono_cfg_set_exception_invalid_program (cfg, g_strdup_printf ("Method %s stack is too big.", mname));
g_free (mname);
return;
}
if (locals_stack_align) {
int prev_offset = offset;
offset += (locals_stack_align - 1);
offset &= ~(locals_stack_align - 1);
while (prev_offset < offset) {
prev_offset += 4;
mini_gc_set_slot_type_from_fp (cfg, - prev_offset, SLOT_NOREF);
}
}
cfg->locals_min_stack_offset = - (offset + locals_stack_size);
cfg->locals_max_stack_offset = - offset;
/*
* EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
* have locals larger than 8 bytes we need to make sure that
* they have the appropriate offset.
*/
if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8) {
int extra_size = MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
offset += extra_size;
locals_stack_size += extra_size;
}
for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
if (offsets [i] != -1) {
MonoInst *inst = cfg->varinfo [i];
inst->opcode = OP_REGOFFSET;
inst->inst_basereg = X86_EBP;
inst->inst_offset = - (offset + offsets [i]);
//printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
}
}
offset += locals_stack_size;
/*
* Allocate arguments+return value
*/
switch (cinfo->ret.storage) {
case ArgOnStack:
if (cfg->vret_addr) {
/*
* In the new IR, the cfg->vret_addr variable represents the
* vtype return value.
*/
cfg->vret_addr->opcode = OP_REGOFFSET;
cfg->vret_addr->inst_basereg = cfg->frame_reg;
cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
if (G_UNLIKELY (cfg->verbose_level > 1)) {
printf ("vret_addr =");
mono_print_ins (cfg->vret_addr);
}
} else {
cfg->ret->opcode = OP_REGOFFSET;
cfg->ret->inst_basereg = X86_EBP;
cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
}
break;
case ArgValuetypeInReg:
break;
case ArgInIReg:
cfg->ret->opcode = OP_REGVAR;
cfg->ret->inst_c0 = cinfo->ret.reg;
cfg->ret->dreg = cinfo->ret.reg;
break;
case ArgNone:
case ArgOnFloatFpStack:
case ArgOnDoubleFpStack:
break;
default:
g_assert_not_reached ();
}
if (sig->call_convention == MONO_CALL_VARARG) {
g_assert (cinfo->sig_cookie.storage == ArgOnStack);
cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
}
for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
ArgInfo *ainfo = &cinfo->args [i];
inst = cfg->args [i];
if (inst->opcode != OP_REGVAR) {
inst->opcode = OP_REGOFFSET;
inst->inst_basereg = X86_EBP;
inst->inst_offset = ainfo->offset + ARGS_OFFSET;
}
}
cfg->stack_offset = offset;
}
void
mono_arch_create_vars (MonoCompile *cfg)
{
MonoType *sig_ret;
MonoMethodSignature *sig;
CallInfo *cinfo;
sig = mono_method_signature (cfg->method);
if (!cfg->arch.cinfo)
cfg->arch.cinfo = get_call_info (cfg->mempool, sig);
cinfo = (CallInfo *)cfg->arch.cinfo;
sig_ret = mini_get_underlying_type (sig->ret);
if (cinfo->ret.storage == ArgValuetypeInReg)
cfg->ret_var_is_local = TRUE;
if ((cinfo->ret.storage != ArgValuetypeInReg) && (MONO_TYPE_ISSTRUCT (sig_ret) || mini_is_gsharedvt_variable_type (sig_ret))) {
cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
}
if (cfg->gen_sdb_seq_points) {
MonoInst *ins;
ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
ins->flags |= MONO_INST_VOLATILE;
cfg->arch.ss_tramp_var = ins;
ins = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_LOCAL);
ins->flags |= MONO_INST_VOLATILE;
cfg->arch.bp_tramp_var = ins;
}
if (cfg->method->save_lmf) {
cfg->create_lmf_var = TRUE;
cfg->lmf_ir = TRUE;
}
cfg->arch_eh_jit_info = 1;
}
/*
* It is expensive to adjust esp for each individual fp argument pushed on the stack
* so we try to do it just once when we have multiple fp arguments in a row.
* We don't use this mechanism generally because for int arguments the generated code
* is slightly bigger and new generation cpus optimize away the dependency chains
* created by push instructions on the esp value.
* fp_arg_setup is the first argument in the execution sequence where the esp register
* is modified.
*/
static G_GNUC_UNUSED int
collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
{
int fp_space = 0;
MonoType *t;
for (; start_arg < sig->param_count; ++start_arg) {
t = mini_get_underlying_type (sig->params [start_arg]);
if (!t->byref && t->type == MONO_TYPE_R8) {
fp_space += sizeof (double);
*fp_arg_setup = start_arg;
} else {
break;
}
}
return fp_space;
}
static void
emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
{
MonoMethodSignature *tmp_sig;
int sig_reg;
/*
* mono_ArgIterator_Setup assumes the signature cookie is
* passed first and all the arguments which were before it are
* passed on the stack after the signature. So compensate by
* passing a different signature.
*/
tmp_sig = mono_metadata_signature_dup (call->signature);
tmp_sig->param_count -= call->signature->sentinelpos;
tmp_sig->sentinelpos = 0;
memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
if (cfg->compile_aot) {
sig_reg = mono_alloc_ireg (cfg);
MONO_EMIT_NEW_SIGNATURECONST (cfg, sig_reg, tmp_sig);
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, cinfo->sig_cookie.offset, sig_reg);
} else {
MONO_EMIT_NEW_STORE_MEMBASE_IMM (cfg, OP_STORE_MEMBASE_IMM, X86_ESP, cinfo->sig_cookie.offset, tmp_sig);
}
}
#ifdef ENABLE_LLVM
LLVMCallInfo*
mono_arch_get_llvm_call_info (MonoCompile *cfg, MonoMethodSignature *sig)
{
int i, n;
CallInfo *cinfo;
ArgInfo *ainfo;
LLVMCallInfo *linfo;
MonoType *t, *sig_ret;
n = sig->param_count + sig->hasthis;
cinfo = get_call_info (cfg->mempool, sig);
sig_ret = sig->ret;
linfo = mono_mempool_alloc0 (cfg->mempool, sizeof (LLVMCallInfo) + (sizeof (LLVMArgInfo) * n));
/*
* LLVM always uses the native ABI while we use our own ABI, the
* only difference is the handling of vtypes:
* - we only pass/receive them in registers in some cases, and only
* in 1 or 2 integer registers.
*/
if (cinfo->ret.storage == ArgValuetypeInReg) {
if (sig->pinvoke) {
cfg->exception_message = g_strdup ("pinvoke + vtypes");
cfg->disable_llvm = TRUE;
return linfo;
}
cfg->exception_message = g_strdup ("vtype ret in call");
cfg->disable_llvm = TRUE;
/*
linfo->ret.storage = LLVMArgVtypeInReg;
for (j = 0; j < 2; ++j)
linfo->ret.pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, cinfo->ret.pair_storage [j]);
*/
}
if (mini_type_is_vtype (sig_ret) && cinfo->ret.storage == ArgInIReg) {
/* Vtype returned using a hidden argument */
linfo->ret.storage = LLVMArgVtypeRetAddr;
linfo->vret_arg_index = cinfo->vret_arg_index;
}
if (mini_type_is_vtype (sig_ret) && cinfo->ret.storage != ArgInIReg) {
// FIXME:
cfg->exception_message = g_strdup ("vtype ret in call");
cfg->disable_llvm = TRUE;
}
for (i = 0; i < n; ++i) {
ainfo = cinfo->args + i;
if (i >= sig->hasthis)
t = sig->params [i - sig->hasthis];
else
t = &mono_defaults.int_class->byval_arg;
linfo->args [i].storage = LLVMArgNone;
switch (ainfo->storage) {
case ArgInIReg:
linfo->args [i].storage = LLVMArgNormal;
break;
case ArgInDoubleSSEReg:
case ArgInFloatSSEReg:
linfo->args [i].storage = LLVMArgNormal;
break;
case ArgOnStack:
if (mini_type_is_vtype (t)) {
if (mono_class_value_size (mono_class_from_mono_type (t), NULL) == 0)
/* LLVM seems to allocate argument space for empty structures too */
linfo->args [i].storage = LLVMArgNone;
else
linfo->args [i].storage = LLVMArgVtypeByVal;
} else {
linfo->args [i].storage = LLVMArgNormal;
}
break;
case ArgValuetypeInReg:
if (sig->pinvoke) {
cfg->exception_message = g_strdup ("pinvoke + vtypes");
cfg->disable_llvm = TRUE;
return linfo;
}
cfg->exception_message = g_strdup ("vtype arg");
cfg->disable_llvm = TRUE;
/*
linfo->args [i].storage = LLVMArgVtypeInReg;
for (j = 0; j < 2; ++j)
linfo->args [i].pair_storage [j] = arg_storage_to_llvm_arg_storage (cfg, ainfo->pair_storage [j]);
*/
break;
case ArgGSharedVt:
linfo->args [i].storage = LLVMArgGSharedVt;
break;
default:
cfg->exception_message = g_strdup ("ainfo->storage");
cfg->disable_llvm = TRUE;
break;
}
}
return linfo;
}
#endif
static void
emit_gc_param_slot_def (MonoCompile *cfg, int sp_offset, MonoType *t)
{
if (cfg->compute_gc_maps) {
MonoInst *def;
/* Needs checking if the feature will be enabled again */
g_assert_not_reached ();
/* On x86, the offsets are from the sp value before the start of the call sequence */
if (t == NULL)
t = &mono_defaults.int_class->byval_arg;
EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF (cfg, def, sp_offset, t);
}
}
void
mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
{
MonoType *sig_ret;
MonoInst *arg, *in;
MonoMethodSignature *sig;
int i, j, n;
CallInfo *cinfo;
int sentinelpos = 0, sp_offset = 0;
sig = call->signature;
n = sig->param_count + sig->hasthis;
sig_ret = mini_get_underlying_type (sig->ret);
cinfo = get_call_info (cfg->mempool, sig);
call->call_info = cinfo;
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
if (sig_ret && MONO_TYPE_ISSTRUCT (sig_ret)) {
if (cinfo->ret.storage == ArgValuetypeInReg && cinfo->ret.pair_storage[0] != ArgNone ) {
/*
* Tell the JIT to use a more efficient calling convention: call using
* OP_CALL, compute the result location after the call, and save the
* result there.
*/
call->vret_in_reg = TRUE;
#if defined(__APPLE__)
if (cinfo->ret.pair_storage [0] == ArgOnDoubleFpStack || cinfo->ret.pair_storage [0] == ArgOnFloatFpStack)
call->vret_in_reg_fp = TRUE;
#endif
if (call->vret_var)
NULLIFY_INS (call->vret_var);
}
}
// FIXME: Emit EMIT_NEW_GC_PARAM_SLOT_LIVENESS_DEF everywhere
/* Handle the case where there are no implicit arguments */
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
emit_sig_cookie (cfg, call, cinfo);
sp_offset = cinfo->sig_cookie.offset;
emit_gc_param_slot_def (cfg, sp_offset, NULL);
}
/* Arguments are pushed in the reverse order */
for (i = n - 1; i >= 0; i --) {
ArgInfo *ainfo = cinfo->args + i;
MonoType *orig_type, *t;
int argsize;
if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 1 && i == 0) {
MonoInst *vtarg;
/* Push the vret arg before the first argument */
MONO_INST_NEW (cfg, vtarg, OP_STORE_MEMBASE_REG);
vtarg->type = STACK_MP;
vtarg->inst_destbasereg = X86_ESP;
vtarg->sreg1 = call->vret_var->dreg;
vtarg->inst_offset = cinfo->ret.offset;
MONO_ADD_INS (cfg->cbb, vtarg);
emit_gc_param_slot_def (cfg, cinfo->ret.offset, NULL);
}
if (i >= sig->hasthis)
t = sig->params [i - sig->hasthis];
else
t = &mono_defaults.int_class->byval_arg;
orig_type = t;
t = mini_get_underlying_type (t);
MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
in = call->args [i];
arg->cil_code = in->cil_code;
arg->sreg1 = in->dreg;
arg->type = in->type;
g_assert (in->dreg != -1);
if (ainfo->storage == ArgGSharedVt) {
arg->opcode = OP_OUTARG_VT;
arg->sreg1 = in->dreg;
arg->klass = in->klass;
arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
sp_offset += 4;
MONO_ADD_INS (cfg->cbb, arg);
} else if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
guint32 align;
guint32 size;
g_assert (in->klass);
if (t->type == MONO_TYPE_TYPEDBYREF) {
size = sizeof (MonoTypedRef);
align = sizeof (gpointer);
}
else {
size = mini_type_stack_size_full (&in->klass->byval_arg, &align, sig->pinvoke);
}
if (size > 0 || ainfo->pass_empty_struct) {
arg->opcode = OP_OUTARG_VT;
arg->sreg1 = in->dreg;
arg->klass = in->klass;
arg->backend.size = size;
arg->inst_p0 = call;
arg->inst_p1 = mono_mempool_alloc (cfg->mempool, sizeof (ArgInfo));
memcpy (arg->inst_p1, ainfo, sizeof (ArgInfo));
MONO_ADD_INS (cfg->cbb, arg);
if (ainfo->storage != ArgValuetypeInReg) {
emit_gc_param_slot_def (cfg, ainfo->offset, orig_type);
}
}
} else {
switch (ainfo->storage) {
case ArgOnStack:
if (!t->byref) {
if (t->type == MONO_TYPE_R4) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER4_MEMBASE_REG, X86_ESP, ainfo->offset, in->dreg);
argsize = 4;
} else if (t->type == MONO_TYPE_R8) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORER8_MEMBASE_REG, X86_ESP, ainfo->offset, in->dreg);
argsize = 8;
} else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, ainfo->offset + 4, MONO_LVREG_MS (in->dreg));
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, ainfo->offset, MONO_LVREG_LS (in->dreg));
argsize = 4;
} else {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, ainfo->offset, in->dreg);
argsize = 4;
}
} else {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, ainfo->offset, in->dreg);
argsize = 4;
}
break;
case ArgInIReg:
arg->opcode = OP_MOVE;
arg->dreg = ainfo->reg;
MONO_ADD_INS (cfg->cbb, arg);
argsize = 0;
break;
default:
g_assert_not_reached ();
}
if (cfg->compute_gc_maps) {
if (argsize == 4) {
/* FIXME: The == STACK_OBJ check might be fragile ? */
if (sig->hasthis && i == 0 && call->args [i]->type == STACK_OBJ) {
/* this */
if (call->need_unbox_trampoline)
/* The unbox trampoline transforms this into a managed pointer */
emit_gc_param_slot_def (cfg, ainfo->offset, &mono_defaults.int_class->this_arg);
else
emit_gc_param_slot_def (cfg, ainfo->offset, &mono_defaults.object_class->byval_arg);
} else {
emit_gc_param_slot_def (cfg, ainfo->offset, orig_type);
}
} else {
/* i8/r8 */
for (j = 0; j < argsize; j += 4)
emit_gc_param_slot_def (cfg, ainfo->offset + j, NULL);
}
}
}
if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
/* Emit the signature cookie just before the implicit arguments */
emit_sig_cookie (cfg, call, cinfo);
emit_gc_param_slot_def (cfg, cinfo->sig_cookie.offset, NULL);
}
}
if (sig_ret && (MONO_TYPE_ISSTRUCT (sig_ret) || cinfo->vtype_retaddr)) {
MonoInst *vtarg;
if (cinfo->ret.storage == ArgValuetypeInReg) {
/* Already done */
}
else if (cinfo->ret.storage == ArgInIReg) {
NOT_IMPLEMENTED;
/* The return address is passed in a register */
MONO_INST_NEW (cfg, vtarg, OP_MOVE);
vtarg->sreg1 = call->inst.dreg;
vtarg->dreg = mono_alloc_ireg (cfg);
MONO_ADD_INS (cfg->cbb, vtarg);
mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
} else if (cinfo->vtype_retaddr && cinfo->vret_arg_index == 0) {
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, cinfo->ret.offset, call->vret_var->dreg);
emit_gc_param_slot_def (cfg, cinfo->ret.offset, NULL);
}
}
call->stack_usage = cinfo->stack_usage;
call->stack_align_amount = cinfo->stack_align_amount;
}
void
mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
{
MonoCallInst *call = (MonoCallInst*)ins->inst_p0;
ArgInfo *ainfo = ins->inst_p1;
int size = ins->backend.size;
if (ainfo->storage == ArgValuetypeInReg) {
int dreg = mono_alloc_ireg (cfg);
switch (size) {
case 1:
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU1_MEMBASE, dreg, src->dreg, 0);
break;
case 2:
MONO_EMIT_NEW_LOAD_MEMBASE_OP (cfg, OP_LOADU2_MEMBASE, dreg, src->dreg, 0);
break;
case 4:
MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
break;
case 3: /* FIXME */
default:
g_assert_not_reached ();
}
mono_call_inst_add_outarg_reg (cfg, call, dreg, ainfo->reg, FALSE);
}
else {
if (cfg->gsharedvt && mini_is_gsharedvt_klass (ins->klass)) {
/* Pass by addr */
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, ainfo->offset, src->dreg);
} else if (size <= 4) {
int dreg = mono_alloc_ireg (cfg);
if (ainfo->pass_empty_struct) {
//Pass empty struct value as 0 on platforms representing empty structs as 1 byte.
MONO_EMIT_NEW_ICONST (cfg, dreg, 0);
} else {
MONO_EMIT_NEW_LOAD_MEMBASE (cfg, dreg, src->dreg, 0);
}
MONO_EMIT_NEW_STORE_MEMBASE (cfg, OP_STORE_MEMBASE_REG, X86_ESP, ainfo->offset, dreg);
} else if (size <= 20) {
mini_emit_memcpy (cfg, X86_ESP, ainfo->offset, src->dreg, 0, size, 4);
} else {
// FIXME: Code growth
mini_emit_memcpy (cfg, X86_ESP, ainfo->offset, src->dreg, 0, size, 4);
}
}
}
void
mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
{
MonoType *ret = mini_get_underlying_type (mono_method_signature (method)->ret);
if (!ret->byref) {
if (ret->type == MONO_TYPE_R4) {
if (COMPILE_LLVM (cfg))
MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
/* Nothing to do */
return;
} else if (ret->type == MONO_TYPE_R8) {
if (COMPILE_LLVM (cfg))
MONO_EMIT_NEW_UNALU (cfg, OP_FMOVE, cfg->ret->dreg, val->dreg);
/* Nothing to do */
return;
} else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
if (COMPILE_LLVM (cfg))
MONO_EMIT_NEW_UNALU (cfg, OP_LMOVE, cfg->ret->dreg, val->dreg);
else {
MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, MONO_LVREG_LS (val->dreg));
MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, MONO_LVREG_MS (val->dreg));
}
return;
}
}
MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
}
/*
* Allow tracing to work with this interface (with an optional argument)
*/
void*
mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
{
guchar *code = p;
g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
/* if some args are passed in registers, we need to save them here */
x86_push_reg (code, X86_EBP);
if (cfg->compile_aot) {
x86_push_imm (code, cfg->method);
x86_mov_reg_imm (code, X86_EAX, func);
x86_call_reg (code, X86_EAX);
} else {
mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
x86_push_imm (code, cfg->method);
mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
x86_call_code (code, 0);
}
x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
return code;
}
enum {
SAVE_NONE,
SAVE_STRUCT,
SAVE_EAX,
SAVE_EAX_EDX,
SAVE_FP
};
void*
mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
{
guchar *code = p;
int arg_size = 0, stack_usage = 0, save_mode = SAVE_NONE;
MonoMethod *method = cfg->method;
MonoType *ret_type = mini_get_underlying_type (mono_method_signature (method)->ret);
switch (ret_type->type) {
case MONO_TYPE_VOID:
/* special case string .ctor icall */
if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class) {
save_mode = SAVE_EAX;
stack_usage = enable_arguments ? 8 : 4;
} else
save_mode = SAVE_NONE;
break;
case MONO_TYPE_I8:
case MONO_TYPE_U8:
save_mode = SAVE_EAX_EDX;
stack_usage = enable_arguments ? 16 : 8;
break;
case MONO_TYPE_R4:
case MONO_TYPE_R8:
save_mode = SAVE_FP;
stack_usage = enable_arguments ? 16 : 8;
break;
case MONO_TYPE_GENERICINST:
if (!mono_type_generic_inst_is_valuetype (ret_type)) {
save_mode = SAVE_EAX;
stack_usage = enable_arguments ? 8 : 4;
break;
}
/* Fall through */
case MONO_TYPE_VALUETYPE:
// FIXME: Handle SMALL_STRUCT_IN_REG here for proper alignment on darwin-x86
save_mode = SAVE_STRUCT;
stack_usage = enable_arguments ? 4 : 0;
break;
default:
save_mode = SAVE_EAX;
stack_usage = enable_arguments ? 8 : 4;
break;
}
x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage - 4);
switch (save_mode) {
case SAVE_EAX_EDX:
x86_push_reg (code, X86_EDX);
x86_push_reg (code, X86_EAX);
if (enable_arguments) {
x86_push_reg (code, X86_EDX);
x86_push_reg (code, X86_EAX);
arg_size = 8;
}
break;
case SAVE_EAX:
x86_push_reg (code, X86_EAX);
if (enable_arguments) {
x86_push_reg (code, X86_EAX);
arg_size = 4;
}
break;
case SAVE_FP:
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
if (enable_arguments) {
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
arg_size = 8;
}
break;
case SAVE_STRUCT:
if (enable_arguments) {
x86_push_membase (code, X86_EBP, 8);
arg_size = 4;
}
break;
case SAVE_NONE:
default:
break;
}
if (cfg->compile_aot) {
x86_push_imm (code, method);
x86_mov_reg_imm (code, X86_EAX, func);
x86_call_reg (code, X86_EAX);
} else {
mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
x86_push_imm (code, method);
mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
x86_call_code (code, 0);
}
x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
switch (save_mode) {
case SAVE_EAX_EDX:
x86_pop_reg (code, X86_EAX);
x86_pop_reg (code, X86_EDX);
break;
case SAVE_EAX:
x86_pop_reg (code, X86_EAX);
break;
case SAVE_FP:
x86_fld_membase (code, X86_ESP, 0, TRUE);
x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
break;
case SAVE_NONE:
default:
break;
}
x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - stack_usage);
return code;
}
#define EMIT_COND_BRANCH(ins,cond,sign) \
if (ins->inst_true_bb->native_offset) { \
x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
} else { \
mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
if ((cfg->opt & MONO_OPT_BRANCH) && \
x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
x86_branch8 (code, cond, 0, sign); \
else \
x86_branch32 (code, cond, 0, sign); \
}
/*
* Emit an exception if condition is fail and
* if possible do a directly branch to target
*/
#define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
do { \
MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
if (tins == NULL) { \
mono_add_patch_info (cfg, code - cfg->native_code, \
MONO_PATCH_INFO_EXC, exc_name); \
x86_branch32 (code, cond, 0, signed); \
} else { \
EMIT_COND_BRANCH (tins, cond, signed); \
} \
} while (0);
#define EMIT_FPCOMPARE(code) do { \
x86_fcompp (code); \
x86_fnstsw (code); \
} while (0);
static guint8*
emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
{
gboolean needs_paddings = TRUE;
guint32 pad_size;
MonoJumpInfo *jinfo = NULL;
if (cfg->abs_patches) {
jinfo = g_hash_table_lookup (cfg->abs_patches, data);
if (jinfo && jinfo->type == MONO_PATCH_INFO_JIT_ICALL_ADDR)
needs_paddings = FALSE;
}
if (cfg->compile_aot)
needs_paddings = FALSE;
/*The address must be 4 bytes aligned to avoid spanning multiple cache lines.
This is required for code patching to be safe on SMP machines.
*/
pad_size = (guint32)(code + 1 - cfg->native_code) & 0x3;
if (needs_paddings && pad_size)
x86_padding (code, 4 - pad_size);
mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
x86_call_code (code, 0);
return code;
}
#define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
/*
* mono_peephole_pass_1:
*
* Perform peephole opts which should/can be performed before local regalloc
*/
void
mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
{
MonoInst *ins, *n;
MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
MonoInst *last_ins = mono_inst_prev (ins, FILTER_IL_SEQ_POINT);
switch (ins->opcode) {
case OP_IADD_IMM:
case OP_ADD_IMM:
if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
/*
* X86_LEA is like ADD, but doesn't have the
* sreg1==dreg restriction.
*/
ins->opcode = OP_X86_LEA_MEMBASE;
ins->inst_basereg = ins->sreg1;
} else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
ins->opcode = OP_X86_INC_REG;
break;
case OP_SUB_IMM:
case OP_ISUB_IMM:
if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
ins->opcode = OP_X86_LEA_MEMBASE;
ins->inst_basereg = ins->sreg1;
ins->inst_imm = -ins->inst_imm;
} else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
ins->opcode = OP_X86_DEC_REG;
break;
case OP_COMPARE_IMM:
case OP_ICOMPARE_IMM:
/* OP_COMPARE_IMM (reg, 0)
* -->
* OP_X86_TEST_NULL (reg)
*/
if (!ins->inst_imm)
ins->opcode = OP_X86_TEST_NULL;
break;
case OP_X86_COMPARE_MEMBASE_IMM:
/*
* OP_STORE_MEMBASE_REG reg, offset(basereg)
* OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
* -->
* OP_STORE_MEMBASE_REG reg, offset(basereg)
* OP_COMPARE_IMM reg, imm
*
* Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
*/
if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
ins->inst_basereg == last_ins->inst_destbasereg &&
ins->inst_offset == last_ins->inst_offset) {
ins->opcode = OP_COMPARE_IMM;
ins->sreg1 = last_ins->sreg1;
/* check if we can remove cmp reg,0 with test null */
if (!ins->inst_imm)
ins->opcode = OP_X86_TEST_NULL;
}
break;
case OP_X86_PUSH_MEMBASE:
if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
last_ins->opcode == OP_STORE_MEMBASE_REG) &&
ins->inst_basereg == last_ins->inst_destbasereg &&
ins->inst_offset == last_ins->inst_offset) {
ins->opcode = OP_X86_PUSH;
ins->sreg1 = last_ins->sreg1;
}
break;
}
mono_peephole_ins (bb, ins);
}
}
void
mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
{
MonoInst *ins, *n;
MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
switch (ins->opcode) {
case OP_ICONST:
/* reg = 0 -> XOR (reg, reg) */
/* XOR sets cflags on x86, so we cant do it always */
if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
MonoInst *ins2;
ins->opcode = OP_IXOR;
ins->sreg1 = ins->dreg;
ins->sreg2 = ins->dreg;
/*
* Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
* since it takes 3 bytes instead of 7.
*/
for (ins2 = mono_inst_next (ins, FILTER_IL_SEQ_POINT); ins2; ins2 = ins2->next) {
if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
ins2->opcode = OP_STORE_MEMBASE_REG;
ins2->sreg1 = ins->dreg;
}
else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
ins2->opcode = OP_STOREI4_MEMBASE_REG;
ins2->sreg1 = ins->dreg;
}
else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
/* Continue iteration */
}
else
break;
}
}
break;
case OP_IADD_IMM:
case OP_ADD_IMM:
if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
ins->opcode = OP_X86_INC_REG;
break;
case OP_ISUB_IMM:
case OP_SUB_IMM:
if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
ins->opcode = OP_X86_DEC_REG;
break;
}
mono_peephole_ins (bb, ins);
}
}
#define NEW_INS(cfg,ins,dest,op) do { \
MONO_INST_NEW ((cfg), (dest), (op)); \
(dest)->cil_code = (ins)->cil_code; \
mono_bblock_insert_before_ins (bb, ins, (dest)); \
} while (0)
/*
* mono_arch_lowering_pass:
*
* Converts complex opcodes into simpler ones so that each IR instruction
* corresponds to one machine instruction.
*/
void
mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
{
MonoInst *ins, *next;
/*
* FIXME: Need to add more instructions, but the current machine
* description can't model some parts of the composite instructions like
* cdq.
*/
MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
switch (ins->opcode) {
case OP_IREM_IMM:
case OP_IDIV_IMM:
case OP_IDIV_UN_IMM:
case OP_IREM_UN_IMM:
/*
* Keep the cases where we could generated optimized code, otherwise convert
* to the non-imm variant.
*/
if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
break;
mono_decompose_op_imm (cfg, bb, ins);
break;
#ifdef MONO_ARCH_SIMD_INTRINSICS
case OP_EXPAND_I1: {
MonoInst *temp;
int temp_reg1 = mono_alloc_ireg (cfg);
int temp_reg2 = mono_alloc_ireg (cfg);
int original_reg = ins->sreg1;
NEW_INS (cfg, ins, temp, OP_ICONV_TO_U1);
temp->sreg1 = original_reg;
temp->dreg = temp_reg1;
NEW_INS (cfg, ins, temp, OP_SHL_IMM);
temp->sreg1 = temp_reg1;
temp->dreg = temp_reg2;
temp->inst_imm = 8;
NEW_INS (cfg, ins, temp, OP_IOR);
temp->sreg1 = temp->dreg = temp_reg2;
temp->sreg2 = temp_reg1;
ins->opcode = OP_EXPAND_I2;
ins->sreg1 = temp_reg2;
}
break;
#endif
default:
break;
}
}
bb->max_vreg = cfg->next_vreg;
}
static const int
branch_cc_table [] = {
X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
};
/* Maps CMP_... constants to X86_CC_... constants */
static const int
cc_table [] = {
X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
};
static const int
cc_signed_table [] = {
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, FALSE
};
static unsigned char*
emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
{
#define XMM_TEMP_REG 0
/*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
/*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
/* optimize by assigning a local var for this use so we avoid
* the stack manipulations */
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
if (size == 1)
x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
else if (size == 2)
x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
return code;
}
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
x86_fnstcw_membase(code, X86_ESP, 0);
x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
x86_fldcw_membase (code, X86_ESP, 2);
if (size == 8) {
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
x86_pop_reg (code, dreg);
/* FIXME: need the high register
* x86_pop_reg (code, dreg_high);
*/
} else {
x86_push_reg (code, X86_EAX); // SP = SP - 4
x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
x86_pop_reg (code, dreg);
}
x86_fldcw_membase (code, X86_ESP, 0);
x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
if (size == 1)
x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
else if (size == 2)
x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
return code;
}
static unsigned char*
mono_emit_stack_alloc (MonoCompile *cfg, guchar *code, MonoInst* tree)
{
int sreg = tree->sreg1;
int need_touch = FALSE;
#if defined(TARGET_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
need_touch = TRUE;
#endif
if (need_touch) {
guint8* br[5];
/*
* Under Windows:
* If requested stack size is larger than one page,
* perform stack-touch operation
*/
/*
* Generate stack probe code.
* Under Windows, it is necessary to allocate one page at a time,
* "touching" stack after each successful sub-allocation. This is
* because of the way stack growth is implemented - there is a
* guard page before the lowest stack page that is currently commited.
* Stack normally grows sequentially so OS traps access to the
* guard page and commits more pages when needed.
*/
x86_test_reg_imm (code, sreg, ~0xFFF);
br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
br[2] = code; /* loop */
x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
/*
* By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
* that follows only initializes the last part of the area.
*/
/* Same as the init code below with size==0x1000 */
if (tree->flags & MONO_INST_INIT) {
x86_push_reg (code, X86_EAX);
x86_push_reg (code, X86_ECX);
x86_push_reg (code, X86_EDI);
x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
if (cfg->param_area)
x86_lea_membase (code, X86_EDI, X86_ESP, 12 + ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
else
x86_lea_membase (code, X86_EDI, X86_ESP, 12);
x86_cld (code);
x86_prefix (code, X86_REP_PREFIX);
x86_stosl (code);
x86_pop_reg (code, X86_EDI);
x86_pop_reg (code, X86_ECX);
x86_pop_reg (code, X86_EAX);
}
x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
x86_patch (br[3], br[2]);
x86_test_reg_reg (code, sreg, sreg);
br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
br[1] = code; x86_jump8 (code, 0);
x86_patch (br[0], code);
x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
x86_patch (br[1], code);
x86_patch (br[4], code);
}
else
x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
if (tree->flags & MONO_INST_INIT) {
int offset = 0;
if (tree->dreg != X86_EAX && sreg != X86_EAX) {
x86_push_reg (code, X86_EAX);
offset += 4;
}
if (tree->dreg != X86_ECX && sreg != X86_ECX) {
x86_push_reg (code, X86_ECX);
offset += 4;
}
if (tree->dreg != X86_EDI && sreg != X86_EDI) {
x86_push_reg (code, X86_EDI);
offset += 4;
}
x86_shift_reg_imm (code, X86_SHR, sreg, 2);
if (sreg != X86_ECX)
x86_mov_reg_reg (code, X86_ECX, sreg, 4);
x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
if (cfg->param_area)
x86_lea_membase (code, X86_EDI, X86_ESP, offset + ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
else
x86_lea_membase (code, X86_EDI, X86_ESP, offset);
x86_cld (code);
x86_prefix (code, X86_REP_PREFIX);
x86_stosl (code);
if (tree->dreg != X86_EDI && sreg != X86_EDI)
x86_pop_reg (code, X86_EDI);
if (tree->dreg != X86_ECX && sreg != X86_ECX)
x86_pop_reg (code, X86_ECX);
if (tree->dreg != X86_EAX && sreg != X86_EAX)
x86_pop_reg (code, X86_EAX);
}
return code;
}
static guint8*
emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
{
/* Move return value to the target register */
switch (ins->opcode) {
case OP_CALL:
case OP_CALL_REG:
case OP_CALL_MEMBASE:
if (ins->dreg != X86_EAX)
x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
break;
default:
break;
}
return code;
}
#ifdef TARGET_MACH
static int tls_gs_offset;
#endif
gboolean
mono_arch_have_fast_tls (void)
{
#ifdef TARGET_MACH
static gboolean have_fast_tls = FALSE;
static gboolean inited = FALSE;
guint32 *ins;
if (mini_get_debug_options ()->use_fallback_tls)
return FALSE;
if (inited)
return have_fast_tls;
ins = (guint32*)pthread_getspecific;
/*
* We're looking for these two instructions:
*
* mov 0x4(%esp),%eax
* mov %gs:[offset](,%eax,4),%eax
*/
have_fast_tls = ins [0] == 0x0424448b && ins [1] == 0x85048b65;
tls_gs_offset = ins [2];
inited = TRUE;
return have_fast_tls;
#elif defined(TARGET_ANDROID)
return FALSE;
#else
if (mini_get_debug_options ()->use_fallback_tls)
return FALSE;
return TRUE;
#endif
}
static guint8*
mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
{
#if defined(TARGET_MACH)
x86_prefix (code, X86_GS_PREFIX);
x86_mov_reg_mem (code, dreg, tls_gs_offset + (tls_offset * 4), 4);
#elif defined(TARGET_WIN32)
/*
* See the Under the Hood article in the May 1996 issue of Microsoft Systems
* Journal and/or a disassembly of the TlsGet () function.
*/
x86_prefix (code, X86_FS_PREFIX);
x86_mov_reg_mem (code, dreg, 0x18, 4);
if (tls_offset < 64) {
x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
} else {
guint8 *buf [16];
g_assert (tls_offset < 0x440);
/* Load TEB->TlsExpansionSlots */
x86_mov_reg_membase (code, dreg, dreg, 0xf94, 4);
x86_test_reg_reg (code, dreg, dreg);
buf [0] = code;
x86_branch (code, X86_CC_EQ, code, TRUE);
x86_mov_reg_membase (code, dreg, dreg, (tls_offset * 4) - 0x100, 4);
x86_patch (buf [0], code);
}
#else
if (optimize_for_xen) {
x86_prefix (code, X86_GS_PREFIX);
x86_mov_reg_mem (code, dreg, 0, 4);
x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
} else {
x86_prefix (code, X86_GS_PREFIX);
x86_mov_reg_mem (code, dreg, tls_offset, 4);
}
#endif
return code;
}
static guint8*
mono_x86_emit_tls_set (guint8* code, int sreg, int tls_offset)
{
#if defined(TARGET_MACH)
x86_prefix (code, X86_GS_PREFIX);
x86_mov_mem_reg (code, tls_gs_offset + (tls_offset * 4), sreg, 4);
#elif defined(TARGET_WIN32)
g_assert_not_reached ();
#else
x86_prefix (code, X86_GS_PREFIX);
x86_mov_mem_reg (code, tls_offset, sreg, 4);
#endif
return code;
}
/*
* emit_setup_lmf:
*
* Emit code to initialize an LMF structure at LMF_OFFSET.
*/
static guint8*
emit_setup_lmf (MonoCompile *cfg, guint8 *code, gint32 lmf_offset, int cfa_offset)
{
/* save all caller saved regs */
x86_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, ebx), X86_EBX, sizeof (mgreg_t));
mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, ebx));
x86_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, edi), X86_EDI, sizeof (mgreg_t));
mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, edi));
x86_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, esi), X86_ESI, sizeof (mgreg_t));
mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, esi));
x86_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, ebp), X86_EBP, sizeof (mgreg_t));
/* save the current IP */
if (cfg->compile_aot) {
/* This pushes the current ip */
x86_call_imm (code, 0);
x86_pop_reg (code, X86_EAX);
} else {
mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
x86_mov_reg_imm (code, X86_EAX, 0);
}
x86_mov_membase_reg (code, cfg->frame_reg, lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, eip), X86_EAX, sizeof (mgreg_t));
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, eip), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, ebp), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, esi), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, edi), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, ebx), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, esp), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, method), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, lmf_addr), SLOT_NOREF);
mini_gc_set_slot_type_from_cfa (cfg, -cfa_offset + lmf_offset + MONO_STRUCT_OFFSET (MonoLMF, previous_lmf), SLOT_NOREF);
return code;
}
/* benchmark and set based on cpu */
#define LOOP_ALIGNMENT 8
#define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
#ifndef DISABLE_JIT
void
mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
{
MonoInst *ins;
MonoCallInst *call;
guint offset;
guint8 *code = cfg->native_code + cfg->code_len;
int max_len, cpos;
if (cfg->opt & MONO_OPT_LOOP) {
int pad, align = LOOP_ALIGNMENT;
/* set alignment depending on cpu */
if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
pad = align - pad;
/*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
x86_padding (code, pad);
cfg->code_len += pad;
bb->native_offset = cfg->code_len;
}
}
if (cfg->verbose_level > 2)
g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
cpos = bb->max_offset;
offset = code - cfg->native_code;
mono_debug_open_block (cfg, bb, offset);
if (mono_break_at_bb_method && mono_method_desc_full_match (mono_break_at_bb_method, cfg->method) && bb->block_num == mono_break_at_bb_bb_num)
x86_breakpoint (code);
MONO_BB_FOR_EACH_INS (bb, ins) {
offset = code - cfg->native_code;
max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
#define EXTRA_CODE_SPACE (16)
if (G_UNLIKELY (offset > (cfg->code_size - max_len - EXTRA_CODE_SPACE))) {
cfg->code_size *= 2;
cfg->native_code = mono_realloc_native_code(cfg);
code = cfg->native_code + offset;
cfg->stat_code_reallocs++;
}
if (cfg->debug_info)
mono_debug_record_line_number (cfg, ins, offset);
switch (ins->opcode) {
case OP_BIGMUL:
x86_mul_reg (code, ins->sreg2, TRUE);
break;
case OP_BIGMUL_UN:
x86_mul_reg (code, ins->sreg2, FALSE);
break;
case OP_X86_SETEQ_MEMBASE:
case OP_X86_SETNE_MEMBASE:
x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
ins->inst_basereg, ins->inst_offset, TRUE);
break;
case OP_STOREI1_MEMBASE_IMM:
x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
break;
case OP_STOREI2_MEMBASE_IMM:
x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
break;
case OP_STORE_MEMBASE_IMM:
case OP_STOREI4_MEMBASE_IMM:
x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
break;
case OP_STOREI1_MEMBASE_REG:
x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
break;
case OP_STOREI2_MEMBASE_REG:
x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
break;
case OP_STORE_MEMBASE_REG:
case OP_STOREI4_MEMBASE_REG:
x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
break;
case OP_STORE_MEM_IMM:
x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
break;
case OP_LOADU4_MEM:
x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
break;
case OP_LOAD_MEM:
case OP_LOADI4_MEM:
/* These are created by the cprop pass so they use inst_imm as the source */
x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
break;
case OP_LOADU1_MEM:
x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
break;
case OP_LOADU2_MEM:
x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
break;
case OP_LOAD_MEMBASE:
case OP_LOADI4_MEMBASE:
case OP_LOADU4_MEMBASE:
x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
break;
case OP_LOADU1_MEMBASE:
x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
break;
case OP_LOADI1_MEMBASE:
x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
break;
case OP_LOADU2_MEMBASE:
x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
break;
case OP_LOADI2_MEMBASE:
x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
break;
case OP_ICONV_TO_I1:
case OP_SEXT_I1:
x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
break;
case OP_ICONV_TO_I2:
case OP_SEXT_I2:
x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
break;
case OP_ICONV_TO_U1:
x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
break;
case OP_ICONV_TO_U2:
x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
break;
case OP_COMPARE:
case OP_ICOMPARE:
x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
break;
case OP_COMPARE_IMM:
case OP_ICOMPARE_IMM:
x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
break;
case OP_X86_COMPARE_MEMBASE_REG:
x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
break;
case OP_X86_COMPARE_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_COMPARE_MEMBASE8_IMM:
x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_COMPARE_REG_MEMBASE:
x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_X86_COMPARE_MEM_IMM:
x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_TEST_NULL:
x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
break;
case OP_X86_ADD_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_ADD_REG_MEMBASE:
x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_X86_SUB_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_SUB_REG_MEMBASE:
x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_X86_AND_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_OR_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_XOR_MEMBASE_IMM:
x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
break;
case OP_X86_ADD_MEMBASE_REG:
x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
break;
case OP_X86_SUB_MEMBASE_REG:
x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
break;
case OP_X86_AND_MEMBASE_REG:
x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
break;
case OP_X86_OR_MEMBASE_REG:
x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
break;
case OP_X86_XOR_MEMBASE_REG:
x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
break;
case OP_X86_INC_MEMBASE:
x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
break;
case OP_X86_INC_REG:
x86_inc_reg (code, ins->dreg);
break;
case OP_X86_DEC_MEMBASE:
x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
break;
case OP_X86_DEC_REG:
x86_dec_reg (code, ins->dreg);
break;
case OP_X86_MUL_REG_MEMBASE:
x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_X86_AND_REG_MEMBASE:
x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_X86_OR_REG_MEMBASE:
x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_X86_XOR_REG_MEMBASE:
x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
break;
case OP_BREAK:
x86_breakpoint (code);
break;
case OP_RELAXED_NOP:
x86_prefix (code, X86_REP_PREFIX);
x86_nop (code);
break;
case OP_HARD_NOP:
x86_nop (code);
break;
case OP_NOP:
case OP_DUMMY_USE:
case OP_DUMMY_STORE:
case OP_DUMMY_ICONST:
case OP_DUMMY_R8CONST:
case OP_NOT_REACHED:
case OP_NOT_NULL:
break;
case OP_IL_SEQ_POINT:
mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
break;
case OP_SEQ_POINT: {
int i;
if (cfg->compile_aot)
NOT_IMPLEMENTED;
/* Have to use ecx as a temp reg since this can occur after OP_SETRET */
/*
* We do this _before_ the breakpoint, so single stepping after
* a breakpoint is hit will step to the next IL offset.
*/
if (ins->flags & MONO_INST_SINGLE_STEP_LOC) {
MonoInst *var = cfg->arch.ss_tramp_var;
guint8 *br [1];
g_assert (var);
g_assert (var->opcode == OP_REGOFFSET);
/* Load ss_tramp_var */
/* This is equal to &ss_trampoline */
x86_mov_reg_membase (code, X86_ECX, var->inst_basereg, var->inst_offset, sizeof (mgreg_t));
x86_mov_reg_membase (code, X86_ECX, X86_ECX, 0, sizeof (mgreg_t));
x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
br[0] = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
x86_call_reg (code, X86_ECX);
x86_patch (br [0], code);
}
/*
* Many parts of sdb depend on the ip after the single step trampoline call to be equal to the seq point offset.
* This means we have to put the loading of bp_tramp_var after the offset.
*/
mono_add_seq_point (cfg, bb, ins, code - cfg->native_code);
MonoInst *var = cfg->arch.bp_tramp_var;
g_assert (var);
g_assert (var->opcode == OP_REGOFFSET);
/* Load the address of the bp trampoline */
/* This needs to be constant size */
guint8 *start = code;
x86_mov_reg_membase (code, X86_ECX, var->inst_basereg, var->inst_offset, 4);
if (code < start + OP_SEQ_POINT_BP_OFFSET) {
int size = start + OP_SEQ_POINT_BP_OFFSET - code;
x86_padding (code, size);
}
/*
* A placeholder for a possible breakpoint inserted by
* mono_arch_set_breakpoint ().
*/
for (i = 0; i < 2; ++i)
x86_nop (code);
/*
* Add an additional nop so skipping the bp doesn't cause the ip to point
* to another IL offset.
*/
x86_nop (code);
break;
}
case OP_ADDCC:
case OP_IADDCC:
case OP_IADD:
x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
break;
case OP_ADC:
case OP_IADC:
x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
break;
case OP_ADDCC_IMM:
case OP_ADD_IMM:
case OP_IADD_IMM:
x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
break;
case OP_ADC_IMM:
case OP_IADC_IMM:
x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
break;
case OP_SUBCC:
case OP_ISUBCC:
case OP_ISUB:
x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
break;
case OP_SBB:
case OP_ISBB:
x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
break;
case OP_SUBCC_IMM:
case OP_SUB_IMM:
case OP_ISUB_IMM:
x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
break;
case OP_SBB_IMM:
case OP_ISBB_IMM:
x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
break;
case OP_IAND:
x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
break;
case OP_AND_IMM:
case OP_IAND_IMM:
x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
break;
case OP_IDIV:
case OP_IREM:
/*
* The code is the same for div/rem, the allocator will allocate dreg
* to RAX/RDX as appropriate.
*/
if (ins->sreg2 == X86_EDX) {
/* cdq clobbers this */
x86_push_reg (code, ins->sreg2);
x86_cdq (code);
x86_div_membase (code, X86_ESP, 0, TRUE);
x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
} else {
x86_cdq (code);
x86_div_reg (code, ins->sreg2, TRUE);
}
break;
case OP_IDIV_UN:
case OP_IREM_UN:
if (ins->sreg2 == X86_EDX) {
x86_push_reg (code, ins->sreg2);
x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
x86_div_membase (code, X86_ESP, 0, FALSE);
x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
} else {
x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
x86_div_reg (code, ins->sreg2, FALSE);
}
break;
case OP_DIV_IMM:
x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
x86_cdq (code);
x86_div_reg (code, ins->sreg2, TRUE);
break;
case OP_IREM_IMM: {
int power = mono_is_power_of_two (ins->inst_imm);
g_assert (ins->sreg1 == X86_EAX);
g_assert (ins->dreg == X86_EAX);
g_assert (power >= 0);
if (power == 1) {
/* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
x86_cdq (code);
x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
/*
* If the divident is >= 0, this does not nothing. If it is positive, it
* it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
*/
x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
} else if (power == 0) {
x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
} else {
/* Based on gcc code */
/* Add compensation for negative dividents */
x86_cdq (code);
x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
/* Compute remainder */
x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
/* Remove compensation */
x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
}
break;
}
case OP_IOR:
x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
break;
case OP_OR_IMM:
case OP_IOR_IMM:
x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
break;
case OP_IXOR:
x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
break;
case OP_XOR_IMM:
case OP_IXOR_IMM:
x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
break;
case OP_ISHL:
g_assert (ins->sreg2 == X86_ECX);
x86_shift_reg (code, X86_SHL, ins->dreg);
break;
case OP_ISHR:
g_assert (ins->sreg2 == X86_ECX);
x86_shift_reg (code, X86_SAR, ins->dreg);
break;
case OP_SHR_IMM:
case OP_ISHR_IMM:
x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
break;
case OP_SHR_UN_IMM:
case OP_ISHR_UN_IMM:
x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
break;
case OP_ISHR_UN:
g_assert (ins->sreg2 == X86_ECX);
x86_shift_reg (code, X86_SHR, ins->dreg);
break;
case OP_SHL_IMM:
case OP_ISHL_IMM:
x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
break;
case OP_LSHL: {
guint8 *jump_to_end;
/* handle shifts below 32 bits */
x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
x86_shift_reg (code, X86_SHL, ins->sreg1);
x86_test_reg_imm (code, X86_ECX, 32);
jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
/* handle shift over 32 bit */
x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
x86_clear_reg (code, ins->sreg1);
x86_patch (jump_to_end, code);
}
break;
case OP_LSHR: {
guint8 *jump_to_end;
/* handle shifts below 32 bits */
x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
x86_shift_reg (code, X86_SAR, ins->backend.reg3);
x86_test_reg_imm (code, X86_ECX, 32);
jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
/* handle shifts over 31 bits */
x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
x86_patch (jump_to_end, code);
}
break;
case OP_LSHR_UN: {
guint8 *jump_to_end;
/* handle shifts below 32 bits */
x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
x86_shift_reg (code, X86_SHR, ins->backend.reg3);
x86_test_reg_imm (code, X86_ECX, 32);
jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
/* handle shifts over 31 bits */
x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
x86_clear_reg (code, ins->backend.reg3);
x86_patch (jump_to_end, code);
}
break;
case OP_LSHL_IMM:
if (ins->inst_imm >= 32) {
x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
x86_clear_reg (code, ins->sreg1);
x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
} else {
x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
}
break;
case OP_LSHR_IMM:
if (ins->inst_imm >= 32) {
x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
} else {
x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
}
break;
case OP_LSHR_UN_IMM:
if (ins->inst_imm >= 32) {
x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
x86_clear_reg (code, ins->backend.reg3);
x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
} else {
x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
}
break;
case OP_INOT:
x86_not_reg (code, ins->sreg1);
break;
case OP_INEG:
x86_neg_reg (code, ins->sreg1);
break;
case OP_IMUL:
x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
break;
case OP_MUL_IMM:
case OP_IMUL_IMM:
switch (ins->inst_imm) {
case 2:
/* MOV r1, r2 */
/* ADD r1, r1 */
if (ins->dreg != ins->sreg1)
x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
break;
case 3:
/* LEA r1, [r2 + r2*2] */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
break;
case 5:
/* LEA r1, [r2 + r2*4] */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
break;
case 6:
/* LEA r1, [r2 + r2*2] */
/* ADD r1, r1 */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
break;
case 9:
/* LEA r1, [r2 + r2*8] */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
break;
case 10:
/* LEA r1, [r2 + r2*4] */
/* ADD r1, r1 */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
break;
case 12:
/* LEA r1, [r2 + r2*2] */
/* SHL r1, 2 */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
break;
case 25:
/* LEA r1, [r2 + r2*4] */
/* LEA r1, [r1 + r1*4] */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
break;
case 100:
/* LEA r1, [r2 + r2*4] */
/* SHL r1, 2 */
/* LEA r1, [r1 + r1*4] */
x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
break;
default:
x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
break;
}
break;
case OP_IMUL_OVF:
x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
break;
case OP_IMUL_OVF_UN: {
/* the mul operation and the exception check should most likely be split */
int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
/*g_assert (ins->sreg2 == X86_EAX);
g_assert (ins->dreg == X86_EAX);*/
if (ins->sreg2 == X86_EAX) {
non_eax_reg = ins->sreg1;
} else if (ins->sreg1 == X86_EAX) {
non_eax_reg = ins->sreg2;
} else {
/* no need to save since we're going to store to it anyway */
if (ins->dreg != X86_EAX) {
saved_eax = TRUE;
x86_push_reg (code, X86_EAX);
}
x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
non_eax_reg = ins->sreg2;
}
if (ins->dreg == X86_EDX) {
if (!saved_eax) {
saved_eax = TRUE;
x86_push_reg (code, X86_EAX);
}
} else {
saved_edx = TRUE;
x86_push_reg (code, X86_EDX);
}
x86_mul_reg (code, non_eax_reg, FALSE);
/* save before the check since pop and mov don't change the flags */
if (ins->dreg != X86_EAX)
x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
if (saved_edx)
x86_pop_reg (code, X86_EDX);
if (saved_eax)
x86_pop_reg (code, X86_EAX);
EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
break;
}
case OP_ICONST:
x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
break;
case OP_AOTCONST:
g_assert_not_reached ();
mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
x86_mov_reg_imm (code, ins->dreg, 0);
break;
case OP_JUMP_TABLE:
mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
x86_mov_reg_imm (code, ins->dreg, 0);
break;
case OP_LOAD_GOTADDR:
g_assert (ins->dreg == MONO_ARCH_GOT_REG);
code = mono_arch_emit_load_got_addr (cfg->native_code, code, cfg, NULL);
break;
case OP_GOT_ENTRY:
mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
break;
case OP_X86_PUSH_GOT_ENTRY:
mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
break;
case OP_MOVE:
if (ins->dreg != ins->sreg1)
x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
break;
case OP_TAILCALL: {
MonoCallInst *call = (MonoCallInst*)ins;
int pos = 0, i;
ins->flags |= MONO_INST_GC_CALLSITE;
ins->backend.pc_offset = code - cfg->native_code;
/* reset offset to make max_len work */
offset = code - cfg->native_code;
g_assert (!cfg->method->save_lmf);
/* restore callee saved registers */
for (i = 0; i < X86_NREG; ++i)
if (X86_IS_CALLEE_SAVED_REG (i) && cfg->used_int_regs & (1 << i))
pos -= 4;
if (cfg->used_int_regs & (1 << X86_ESI)) {
x86_mov_reg_membase (code, X86_ESI, X86_EBP, pos, 4);
pos += 4;
}
if (cfg->used_int_regs & (1 << X86_EDI)) {
x86_mov_reg_membase (code, X86_EDI, X86_EBP, pos, 4);
pos += 4;
}
if (cfg->used_int_regs & (1 << X86_EBX)) {
x86_mov_reg_membase (code, X86_EBX, X86_EBP, pos, 4);
pos += 4;
}
/* Copy arguments on the stack to our argument area */
for (i = 0; i < call->stack_usage - call->stack_align_amount; i += 4) {
x86_mov_reg_membase (code, X86_EAX, X86_ESP, i, 4);
x86_mov_membase_reg (code, X86_EBP, 8 + i, X86_EAX, 4);
}
/* restore ESP/EBP */
x86_leave (code);
offset = code - cfg->native_code;
mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, call->method);
x86_jump32 (code, 0);
ins->flags |= MONO_INST_GC_CALLSITE;
cfg->disable_aot = TRUE;
break;
}
case OP_CHECK_THIS:
/* ensure ins->sreg1 is not NULL
* note that cmp DWORD PTR [eax], eax is one byte shorter than
* cmp DWORD PTR [eax], 0
*/
x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
break;
case OP_ARGLIST: {
int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
x86_push_reg (code, hreg);
x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
x86_pop_reg (code, hreg);
break;
}
case OP_FCALL:
case OP_LCALL:
case OP_VCALL:
case OP_VCALL2:
case OP_VOIDCALL:
case OP_CALL:
case OP_FCALL_REG:
case OP_LCALL_REG:
case OP_VCALL_REG:
case OP_VCALL2_REG:
case OP_VOIDCALL_REG:
case OP_CALL_REG:
case OP_FCALL_MEMBASE:
case OP_LCALL_MEMBASE:
case OP_VCALL_MEMBASE:
case OP_VCALL2_MEMBASE:
case OP_VOIDCALL_MEMBASE:
case OP_CALL_MEMBASE: {
CallInfo *cinfo;
call = (MonoCallInst*)ins;
cinfo = (CallInfo*)call->call_info;
switch (ins->opcode) {
case OP_FCALL:
case OP_LCALL:
case OP_VCALL:
case OP_VCALL2:
case OP_VOIDCALL:
case OP_CALL:
if (ins->flags & MONO_INST_HAS_METHOD)
code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
else
code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
break;
case OP_FCALL_REG:
case OP_LCALL_REG:
case OP_VCALL_REG:
case OP_VCALL2_REG:
case OP_VOIDCALL_REG:
case OP_CALL_REG:
x86_call_reg (code, ins->sreg1);
break;
case OP_FCALL_MEMBASE:
case OP_LCALL_MEMBASE:
case OP_VCALL_MEMBASE:
case OP_VCALL2_MEMBASE:
case OP_VOIDCALL_MEMBASE:
case OP_CALL_MEMBASE:
x86_call_membase (code, ins->sreg1, ins->inst_offset);
break;
default:
g_assert_not_reached ();
break;
}
ins->flags |= MONO_INST_GC_CALLSITE;
ins->backend.pc_offset = code - cfg->native_code;
if (cinfo->callee_stack_pop) {
/* Have to compensate for the stack space popped by the callee */
x86_alu_reg_imm (code, X86_SUB, X86_ESP, cinfo->callee_stack_pop);
}
code = emit_move_return_value (cfg, ins, code);
break;
}
case OP_X86_LEA:
x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
break;
case OP_X86_LEA_MEMBASE:
x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
break;
case OP_X86_XCHG:
x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
break;
case OP_LOCALLOC:
/* keep alignment */
x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
code = mono_emit_stack_alloc (cfg, code, ins);
x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
if (cfg->param_area)
x86_alu_reg_imm (code, X86_ADD, ins->dreg, ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
break;
case OP_LOCALLOC_IMM: {
guint32 size = ins->inst_imm;
size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
if (ins->flags & MONO_INST_INIT) {
/* FIXME: Optimize this */
x86_mov_reg_imm (code, ins->dreg, size);
ins->sreg1 = ins->dreg;
code = mono_emit_stack_alloc (cfg, code, ins);
x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
} else {
x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
}
if (cfg->param_area)
x86_alu_reg_imm (code, X86_ADD, ins->dreg, ALIGN_TO (cfg->param_area, MONO_ARCH_FRAME_ALIGNMENT));
break;
}
case OP_THROW: {
x86_alu_reg_imm (