Skip to content

Commit

Permalink
tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP
Browse files Browse the repository at this point in the history
Always true when movbe is available, otherwise leave
this to generic code.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed Jan 7, 2021
1 parent 655a650 commit d2ef1b8
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 75 deletions.
119 changes: 45 additions & 74 deletions tcg/i386/tcg-target.c.inc
Expand Up @@ -154,13 +154,12 @@ bool have_bmi1;
bool have_popcnt;
bool have_avx1;
bool have_avx2;
bool have_movbe;

#ifdef CONFIG_CPUID_H
static bool have_movbe;
static bool have_bmi2;
static bool have_lzcnt;
#else
# define have_movbe 0
# define have_bmi2 0
# define have_lzcnt 0
#endif
Expand Down Expand Up @@ -1986,13 +1985,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
int seg, bool is64, MemOp memop)
{
const MemOp real_bswap = memop & MO_BSWAP;
MemOp bswap = real_bswap;
bool use_movbe = false;
int rexw = is64 * P_REXW;
int movop = OPC_MOVL_GvEv;

if (have_movbe && real_bswap) {
bswap = 0;
/* Do big-endian loads with movbe. */
if (memop & MO_BSWAP) {
tcg_debug_assert(have_movbe);
use_movbe = true;
movop = OPC_MOVBE_GyMy;
}

Expand All @@ -2006,42 +2006,41 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
base, index, 0, ofs);
break;
case MO_UW:
tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
base, index, 0, ofs);
if (real_bswap) {
tcg_out_rolw_8(s, datalo);
}
break;
case MO_SW:
if (real_bswap) {
if (have_movbe) {
if (use_movbe) {
/* There is no extending movbe; only low 16-bits are modified. */
if (datalo != base && datalo != index) {
/* XOR breaks dependency chains. */
tgen_arithr(s, ARITH_XOR, datalo, datalo);
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
base, index, 0, ofs);
tcg_out_rolw_8(s, datalo);
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs);
tcg_out_ext16u(s, datalo, datalo);
}
tcg_out_modrm(s, OPC_MOVSWL + rexw, datalo, datalo);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
base, index, 0, ofs);
}
break;
case MO_SW:
if (use_movbe) {
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs);
tcg_out_ext16s(s, datalo, datalo, rexw);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVSWL + rexw + seg,
datalo, base, index, 0, ofs);
}
break;
case MO_UL:
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
break;
#if TCG_TARGET_REG_BITS == 64
case MO_SL:
if (real_bswap) {
tcg_out_modrm_sib_offset(s, movop + seg, datalo,
if (use_movbe) {
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + seg, datalo,
base, index, 0, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
tcg_out_ext32s(s, datalo, datalo);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
Expand All @@ -2053,12 +2052,9 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
} else {
if (real_bswap) {
int t = datalo;
if (use_movbe) {
TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
Expand All @@ -2073,14 +2069,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
tcg_out_modrm_sib_offset(s, movop + seg, datalo,
base, index, 0, ofs);
}
if (bswap) {
tcg_out_bswap32(s, datalo);
tcg_out_bswap32(s, datahi);
}
}
break;
default:
tcg_abort();
g_assert_not_reached();
}
}

Expand Down Expand Up @@ -2128,24 +2120,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
int seg, MemOp memop)
{
/* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value
instead of moving to the scratch. But as it is, the L constraint
means that TCG_REG_L0 is definitely free here. */
const TCGReg scratch = TCG_REG_L0;
const MemOp real_bswap = memop & MO_BSWAP;
MemOp bswap = real_bswap;
bool use_movbe = false;
int movop = OPC_MOVL_EvGv;

if (have_movbe && real_bswap) {
bswap = 0;
/*
* Do big-endian stores with movbe or softmmu.
* User-only without movbe will have its swapping done generically.
*/
if (memop & MO_BSWAP) {
tcg_debug_assert(have_movbe);
use_movbe = true;
movop = OPC_MOVBE_MyGy;
}

switch (memop & MO_SIZE) {
case MO_8:
/* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
Use the scratch register if necessary. */
/*
* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
* TODO: Adjust constraints such that this is is forced,
* then we won't need a scratch at all for user-only.
*/
if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
datalo = scratch;
Expand All @@ -2154,43 +2149,19 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
datalo, base, index, 0, ofs);
break;
case MO_16:
if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, movop + P_DATA16 + seg, datalo,
base, index, 0, ofs);
break;
case MO_32:
if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
break;
case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
if (bswap) {
tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
base, index, 0, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
tcg_out_modrm_sib_offset(s, OPC_MOVL_EvGv + seg, scratch,
base, index, 0, ofs + 4);
} else {
if (real_bswap) {
int t = datalo;
if (use_movbe) {
TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
Expand All @@ -2201,7 +2172,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
break;
default:
tcg_abort();
g_assert_not_reached();
}
}

Expand Down
3 changes: 2 additions & 1 deletion tcg/i386/tcg-target.h
Expand Up @@ -101,6 +101,7 @@ extern bool have_bmi1;
extern bool have_popcnt;
extern bool have_avx1;
extern bool have_avx2;
extern bool have_movbe;

/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
Expand Down Expand Up @@ -225,7 +226,7 @@ static inline void tb_target_set_jmp_target(uintptr_t tc_ptr,

#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)

#define TCG_TARGET_HAS_MEMORY_BSWAP 1
#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe

#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_LABELS
Expand Down

0 comments on commit d2ef1b8

Please sign in to comment.