Skip to content

Commit

Permalink
tcg/i386: Improve TSTNE/TESTEQ vs powers of two
Browse files Browse the repository at this point in the history
Use "test x,x" when the bit is one of the 4 sign bits.
Use "bt imm,x" otherwise.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed Feb 3, 2024
1 parent 303214a commit d3d1c30
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 8 deletions.
6 changes: 3 additions & 3 deletions tcg/i386/tcg-target-con-set.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ C_O0_I2(L, L)
C_O0_I2(qi, r)
C_O0_I2(re, r)
C_O0_I2(ri, r)
C_O0_I2(r, re)
C_O0_I2(r, reT)
C_O0_I2(s, L)
C_O0_I2(x, r)
C_O0_I3(L, L, L)
Expand All @@ -34,7 +34,7 @@ C_O1_I1(r, r)
C_O1_I1(x, r)
C_O1_I1(x, x)
C_O1_I2(q, 0, qi)
C_O1_I2(q, r, re)
C_O1_I2(q, r, reT)
C_O1_I2(r, 0, ci)
C_O1_I2(r, 0, r)
C_O1_I2(r, 0, re)
Expand All @@ -50,7 +50,7 @@ C_N1_I2(r, r, r)
C_N1_I2(r, r, rW)
C_O1_I3(x, 0, x, x)
C_O1_I3(x, x, x, x)
C_O1_I4(r, r, re, r, 0)
C_O1_I4(r, r, reT, r, 0)
C_O1_I4(r, r, r, ri, ri)
C_O2_I1(r, r, L)
C_O2_I2(a, d, a, r)
Expand Down
1 change: 1 addition & 0 deletions tcg/i386/tcg-target-con-str.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS) /* qemu_st8_i32 data */
*/
CONST('e', TCG_CT_CONST_S32)
CONST('I', TCG_CT_CONST_I32)
CONST('T', TCG_CT_CONST_TST)
CONST('W', TCG_CT_CONST_WSZ)
CONST('Z', TCG_CT_CONST_U32)
54 changes: 49 additions & 5 deletions tcg/i386/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
#define TCG_CT_CONST_U32 0x200
#define TCG_CT_CONST_I32 0x400
#define TCG_CT_CONST_WSZ 0x800
#define TCG_CT_CONST_TST 0x1000

/* Registers used with L constraint, which are the first argument
registers on x86_64, and two random call clobbered registers on
Expand Down Expand Up @@ -202,7 +203,8 @@ static bool tcg_target_const_match(int64_t val, int ct,
return 1;
}
if (type == TCG_TYPE_I32) {
if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) {
if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 |
TCG_CT_CONST_I32 | TCG_CT_CONST_TST)) {
return 1;
}
} else {
Expand All @@ -215,6 +217,17 @@ static bool tcg_target_const_match(int64_t val, int ct,
if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
return 1;
}
/*
* This will be used in combination with TCG_CT_CONST_S32,
* so "normal" TESTQ is already matched. Also accept:
* TESTQ -> TESTL (uint32_t)
* TESTQ -> BT (is_power_of_2)
*/
if ((ct & TCG_CT_CONST_TST)
&& is_tst_cond(cond)
&& (val == (uint32_t)val || is_power_of_2(val))) {
return 1;
}
}
if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
return 1;
Expand Down Expand Up @@ -396,6 +409,7 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
#define OPC_SHRD_Ib (0xac | P_EXT)
#define OPC_TESTB (0x84)
#define OPC_TESTL (0x85)
#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
#define OPC_UD2 (0x0b | P_EXT)
Expand Down Expand Up @@ -442,6 +456,12 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_GRP3_Ev (0xf7)
#define OPC_GRP5 (0xff)
#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
#define OPC_GRPBT (0xba | P_EXT)

#define OPC_GRPBT_BT 4
#define OPC_GRPBT_BTS 5
#define OPC_GRPBT_BTR 6
#define OPC_GRPBT_BTC 7

/* Group 1 opcode extensions for 0x80-0x83.
These are also used as modifiers for OPC_ARITH. */
Expand Down Expand Up @@ -1454,7 +1474,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small)
static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
TCGArg arg2, int const_arg2, int rexw)
{
int jz;
int jz, js;

if (!is_tst_cond(cond)) {
if (!const_arg2) {
Expand All @@ -1469,24 +1489,48 @@ static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
}

jz = tcg_cond_to_jcc[cond];
js = (cond == TCG_COND_TSTNE ? JCC_JS : JCC_JNS);

if (!const_arg2) {
tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2);
return jz;
}

if (arg2 <= 0xff && (TCG_TARGET_REG_BITS == 64 || arg1 < 4)) {
if (arg2 == 0x80) {
tcg_out_modrm(s, OPC_TESTB | P_REXB_R, arg1, arg1);
return js;
}
tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, arg1);
tcg_out8(s, arg2);
return jz;
}

if ((arg2 & ~0xff00) == 0 && arg1 < 4) {
if (arg2 == 0x8000) {
tcg_out_modrm(s, OPC_TESTB, arg1 + 4, arg1 + 4);
return js;
}
tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, arg1 + 4);
tcg_out8(s, arg2 >> 8);
return jz;
}

if (is_power_of_2(rexw ? arg2 : (uint32_t)arg2)) {
int jc = (cond == TCG_COND_TSTNE ? JCC_JB : JCC_JAE);
int sh = ctz64(arg2);

rexw = (sh & 32 ? P_REXW : 0);
if ((sh & 31) == 31) {
tcg_out_modrm(s, OPC_TESTL | rexw, arg1, arg1);
return js;
} else {
tcg_out_modrm(s, OPC_GRPBT | rexw, OPC_GRPBT_BT, arg1);
tcg_out8(s, sh);
return jc;
}
}

if (rexw) {
if (arg2 == (uint32_t)arg2) {
rexw = 0;
Expand Down Expand Up @@ -3399,7 +3443,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)

case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
return C_O0_I2(r, re);
return C_O0_I2(r, reT);

case INDEX_op_bswap16_i32:
case INDEX_op_bswap16_i64:
Expand Down Expand Up @@ -3447,11 +3491,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_setcond_i64:
case INDEX_op_negsetcond_i32:
case INDEX_op_negsetcond_i64:
return C_O1_I2(q, r, re);
return C_O1_I2(q, r, reT);

case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
return C_O1_I4(r, r, re, r, 0);
return C_O1_I4(r, r, reT, r, 0);

case INDEX_op_div2_i32:
case INDEX_op_div2_i64:
Expand Down

0 comments on commit d3d1c30

Please sign in to comment.