Skip to content

Commit

Permalink
target/i386: add X86_SPECIALs for MOVSX and MOVZX
Browse files Browse the repository at this point in the history
Usually the registers are just moved into s->T0 without much care for
their operand size.  However, in some cases we can get more efficient
code if the operand fetching logic syncs with the emission function
on what is nicer.

All the current uses are mostly demonstrative and only reduce the code
in the emission functions, because the instructions do not support
memory operands.  However the logic is generic and applies to several
more instructions such as MOVSXD (aka movslq), one-byte shift
instructions, multiplications, XLAT, and indirect calls/jumps.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
bonzini committed Dec 29, 2023
1 parent 5baf564 commit 8a36bbc
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 24 deletions.
18 changes: 13 additions & 5 deletions target/i386/tcg/decode-new.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@
#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
#define avx_movx .special = X86_SPECIAL_AVXExtMov,
#define sextT0 .special = X86_SPECIAL_SExtT0,
#define zextT0 .special = X86_SPECIAL_ZExtT0,

#define vex1 .vex_class = 1,
#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
Expand Down Expand Up @@ -571,8 +573,8 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
[5] = {
X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
{},
X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
{},
},
[6] = {
Expand All @@ -583,10 +585,10 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
{},
},
[7] = {
X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
{},
},
};
Expand Down Expand Up @@ -1905,6 +1907,12 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
}
break;

case X86_SPECIAL_SExtT0:
case X86_SPECIAL_ZExtT0:
/* Handled in gen_load. */
assert(decode.op[1].unit == X86_OP_INT);
break;

default:
break;
}
Expand Down
4 changes: 4 additions & 0 deletions target/i386/tcg/decode-new.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ typedef enum X86InsnSpecial {
* become P/P/Q/N, and size "x" becomes "q".
*/
X86_SPECIAL_MMX,

/* When loaded into s->T0, register operand 1 is zero/sign extended. */
X86_SPECIAL_SExtT0,
X86_SPECIAL_ZExtT0,
} X86InsnSpecial;

/*
Expand Down
42 changes: 23 additions & 19 deletions target/i386/tcg/emit.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,30 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v)
break;
case X86_OP_INT:
if (op->has_ea) {
gen_op_ld_v(s, op->ot, v, s->A0);
if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) {
gen_op_ld_v(s, op->ot | MO_SIGN, v, s->A0);
} else {
gen_op_ld_v(s, op->ot, v, s->A0);
}

} else if (op->ot == MO_8 && byte_reg_is_xH(s, op->n)) {
if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) {
tcg_gen_sextract_tl(v, cpu_regs[op->n - 4], 8, 8);
} else {
tcg_gen_extract_tl(v, cpu_regs[op->n - 4], 8, 8);
}

} else if (op->ot < MO_TL && v == s->T0 &&
(decode->e.special == X86_SPECIAL_SExtT0 ||
decode->e.special == X86_SPECIAL_ZExtT0)) {
if (decode->e.special == X86_SPECIAL_SExtT0) {
tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot | MO_SIGN);
} else {
tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot);
}

} else {
gen_op_mov_v_reg(s, op->ot, v, op->n);
tcg_gen_mov_tl(v, cpu_regs[op->n]);
}
break;
case X86_OP_IMM:
Expand Down Expand Up @@ -1084,9 +1105,6 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
* Shifts larger than operand size get zeros.
*/
tcg_gen_ext8u_tl(s->A0, s->T1);
if (TARGET_LONG_BITS == 64 && ot == MO_32) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
tcg_gen_shr_tl(s->T0, s->T0, s->A0);

tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
Expand Down Expand Up @@ -1428,19 +1446,11 @@ static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec

static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
if (ot < MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
gen_helper_pdep(s->T0, s->T0, s->T1);
}

static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
{
MemOp ot = decode->op[1].ot;
if (ot < MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
gen_helper_pext(s->T0, s->T0, s->T1);
}

Expand Down Expand Up @@ -1796,9 +1806,6 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)

mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
if (ot != MO_64) {
tcg_gen_ext32s_tl(s->T0, s->T0);
}
tcg_gen_sar_tl(s->T0, s->T0, s->T1);
}

Expand Down Expand Up @@ -1873,9 +1880,6 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)

mask = ot == MO_64 ? 63 : 31;
tcg_gen_andi_tl(s->T1, s->T1, mask);
if (ot != MO_64) {
tcg_gen_ext32u_tl(s->T0, s->T0);
}
tcg_gen_shr_tl(s->T0, s->T0, s->T1);
}

Expand Down

0 comments on commit 8a36bbc

Please sign in to comment.