Skip to content

Commit

Permalink
tcg: Add muluh and mulsh opcodes
Browse files Browse the repository at this point in the history
Use them in places where mulu2 and muls2 are used.
Optimize mulx2 with dead low part to mulxh.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
  • Loading branch information
rth7680 committed Sep 2, 2013
1 parent 4ff78e0 commit 0327152
Show file tree
Hide file tree
Showing 16 changed files with 128 additions and 11 deletions.
4 changes: 4 additions & 0 deletions tcg/aarch64/tcg-target.h
Expand Up @@ -61,6 +61,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#define TCG_TARGET_HAS_div_i64 0
#define TCG_TARGET_HAS_rem_i64 0
Expand All @@ -87,6 +89,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0

enum {
TCG_AREG0 = TCG_REG_X19,
Expand Down
2 changes: 2 additions & 0 deletions tcg/arm/tcg-target.h
Expand Up @@ -80,6 +80,8 @@ extern bool use_idiv_instructions;
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_div_i32 use_idiv_instructions
#define TCG_TARGET_HAS_rem_i32 0

Expand Down
2 changes: 2 additions & 0 deletions tcg/hppa/tcg-target.h
Expand Up @@ -100,6 +100,8 @@ typedef enum {
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */
Expand Down
4 changes: 4 additions & 0 deletions tcg/i386/tcg-target.h
Expand Up @@ -96,6 +96,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
Expand All @@ -122,6 +124,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
#endif

#define TCG_TARGET_deposit_i32_valid(ofs, len) \
Expand Down
4 changes: 4 additions & 0 deletions tcg/ia64/tcg-target.h
Expand Up @@ -146,6 +146,10 @@ typedef enum {
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_mulsh_i64 0

#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
Expand Down
2 changes: 2 additions & 0 deletions tcg/mips/tcg-target.h
Expand Up @@ -89,6 +89,8 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
Expand Down
20 changes: 20 additions & 0 deletions tcg/optimize.c
Expand Up @@ -198,6 +198,8 @@ static TCGOpcode op_to_mov(TCGOpcode op)

static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
{
uint64_t l64, h64;

switch (op) {
CASE_OP_32_64(add):
return x + y;
Expand Down Expand Up @@ -290,6 +292,18 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
case INDEX_op_ext32u_i64:
return (uint32_t)x;

case INDEX_op_muluh_i32:
return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
case INDEX_op_mulsh_i32:
return ((int64_t)(int32_t)x * (int32_t)y) >> 32;

case INDEX_op_muluh_i64:
mulu64(&l64, &h64, x, y);
return h64;
case INDEX_op_mulsh_i64:
muls64(&l64, &h64, x, y);
return h64;

default:
fprintf(stderr,
"Unrecognized operation %d in do_constant_folding.\n", op);
Expand Down Expand Up @@ -531,6 +545,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
CASE_OP_32_64(muluh):
CASE_OP_32_64(mulsh):
swap_commutative(args[0], &args[1], &args[2]);
break;
CASE_OP_32_64(brcond):
Expand Down Expand Up @@ -771,6 +787,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
switch (op) {
CASE_OP_32_64(and):
CASE_OP_32_64(mul):
CASE_OP_32_64(muluh):
CASE_OP_32_64(mulsh):
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
s->gen_opc_buf[op_index] = op_to_movi(op);
Expand Down Expand Up @@ -882,6 +900,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
CASE_OP_32_64(muluh):
CASE_OP_32_64(mulsh):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
s->gen_opc_buf[op_index] = op_to_movi(op);
Expand Down
2 changes: 2 additions & 0 deletions tcg/ppc/tcg-target.h
Expand Up @@ -96,6 +96,8 @@ typedef enum {
#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#define TCG_AREG0 TCG_REG_R27

Expand Down
4 changes: 4 additions & 0 deletions tcg/ppc64/tcg-target.h
Expand Up @@ -95,6 +95,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rem_i64 0
Expand All @@ -118,6 +120,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0

#define TCG_AREG0 TCG_REG_R27

Expand Down
4 changes: 4 additions & 0 deletions tcg/s390/tcg-target.h
Expand Up @@ -69,6 +69,8 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#define TCG_TARGET_HAS_div2_i64 1
#define TCG_TARGET_HAS_rot_i64 1
Expand All @@ -94,6 +96,8 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0

extern bool tcg_target_deposit_valid(int ofs, int len);
#define TCG_TARGET_deposit_i32_valid tcg_target_deposit_valid
Expand Down
4 changes: 4 additions & 0 deletions tcg/sparc/tcg-target.h
Expand Up @@ -107,6 +107,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div_i64 1
Expand Down Expand Up @@ -134,6 +136,8 @@ typedef enum {
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
#endif

#define TCG_AREG0 TCG_REG_I0
Expand Down
40 changes: 36 additions & 4 deletions tcg/tcg-op.h
Expand Up @@ -1039,10 +1039,18 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i32();

tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
TCGV_LOW(arg1), TCGV_LOW(arg2));
/* Allow the optimizer room to replace mulu2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
if (TCG_TARGET_HAS_mulu2_i32) {
tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
TCGV_LOW(arg1), TCGV_LOW(arg2));
/* Allow the optimizer room to replace mulu2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
} else {
tcg_debug_assert(TCG_TARGET_HAS_muluh_i32);
tcg_gen_op3_i32(INDEX_op_mul_i32, TCGV_LOW(t0),
TCGV_LOW(arg1), TCGV_LOW(arg2));
tcg_gen_op3_i32(INDEX_op_muluh_i32, TCGV_HIGH(t0),
TCGV_LOW(arg1), TCGV_LOW(arg2));
}

tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
Expand Down Expand Up @@ -2401,6 +2409,12 @@ static inline void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh,
tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace mulu2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
} else if (TCG_TARGET_HAS_muluh_i32) {
TCGv_i32 t = tcg_temp_new_i32();
tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
tcg_gen_op3_i32(INDEX_op_muluh_i32, rh, arg1, arg2);
tcg_gen_mov_i32(rl, t);
tcg_temp_free_i32(t);
} else {
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
Expand All @@ -2420,6 +2434,12 @@ static inline void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh,
tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace muls2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
} else if (TCG_TARGET_HAS_mulsh_i32) {
TCGv_i32 t = tcg_temp_new_i32();
tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2);
tcg_gen_op3_i32(INDEX_op_mulsh_i32, rh, arg1, arg2);
tcg_gen_mov_i32(rl, t);
tcg_temp_free_i32(t);
} else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_mulu2_i32) {
TCGv_i32 t0 = tcg_temp_new_i32();
TCGv_i32 t1 = tcg_temp_new_i32();
Expand Down Expand Up @@ -2499,6 +2519,12 @@ static inline void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh,
tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace mulu2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
} else if (TCG_TARGET_HAS_muluh_i64) {
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
tcg_gen_op3_i64(INDEX_op_muluh_i64, rh, arg1, arg2);
tcg_gen_mov_i64(rl, t);
tcg_temp_free_i64(t);
} else if (TCG_TARGET_HAS_mulu2_i64) {
TCGv_i64 t0 = tcg_temp_new_i64();
TCGv_i64 t1 = tcg_temp_new_i64();
Expand Down Expand Up @@ -2540,6 +2566,12 @@ static inline void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh,
tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2);
/* Allow the optimizer room to replace muls2 with two moves. */
tcg_gen_op0(INDEX_op_nop);
} else if (TCG_TARGET_HAS_mulsh_i64) {
TCGv_i64 t = tcg_temp_new_i64();
tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2);
tcg_gen_op3_i64(INDEX_op_mulsh_i64, rh, arg1, arg2);
tcg_gen_mov_i64(rl, t);
tcg_temp_free_i64(t);
} else {
TCGv_i64 t0 = tcg_temp_new_i64();
int sizemask = 0;
Expand Down
4 changes: 4 additions & 0 deletions tcg/tcg-opc.h
Expand Up @@ -91,6 +91,8 @@ DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_add2_i32))
DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_HAS_sub2_i32))
DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_mulu2_i32))
DEF(muls2_i32, 2, 2, 0, IMPL(TCG_TARGET_HAS_muls2_i32))
DEF(muluh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i32))
DEF(mulsh_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i32))
DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))

Expand Down Expand Up @@ -167,6 +169,8 @@ DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
DEF(mulu2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulu2_i64))
DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
DEF(muluh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_muluh_i64))
DEF(mulsh_i64, 1, 2, 0, IMPL(TCG_TARGET_HAS_mulsh_i64))

/* QEMU specific */
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
Expand Down
36 changes: 30 additions & 6 deletions tcg/tcg.c
Expand Up @@ -1252,12 +1252,13 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
static void tcg_liveness_analysis(TCGContext *s)
{
int i, op_index, nb_args, nb_iargs, nb_oargs, arg, nb_ops;
TCGOpcode op, op_new;
TCGOpcode op, op_new, op_new2;
TCGArg *args;
const TCGOpDef *def;
uint8_t *dead_temps, *mem_temps;
uint16_t dead_args;
uint8_t sync_args;
bool have_op_new2;

s->gen_opc_ptr++; /* skip end */

Expand Down Expand Up @@ -1394,29 +1395,52 @@ static void tcg_liveness_analysis(TCGContext *s)
goto do_not_remove;

case INDEX_op_mulu2_i32:
op_new = INDEX_op_mul_i32;
op_new2 = INDEX_op_muluh_i32;
have_op_new2 = TCG_TARGET_HAS_muluh_i32;
goto do_mul2;
case INDEX_op_muls2_i32:
op_new = INDEX_op_mul_i32;
op_new2 = INDEX_op_mulsh_i32;
have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
goto do_mul2;
case INDEX_op_mulu2_i64:
op_new = INDEX_op_mul_i64;
op_new2 = INDEX_op_muluh_i64;
have_op_new2 = TCG_TARGET_HAS_muluh_i64;
goto do_mul2;
case INDEX_op_muls2_i64:
op_new = INDEX_op_mul_i64;
op_new2 = INDEX_op_mulsh_i64;
have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
goto do_mul2;
do_mul2:
args -= 4;
nb_iargs = 2;
nb_oargs = 2;
/* Likewise, test for the high part of the operation dead. */
if (dead_temps[args[1]] && !mem_temps[args[1]]) {
if (dead_temps[args[0]] && !mem_temps[args[0]]) {
/* Both parts of the operation are dead. */
goto do_remove;
}
/* The high part of the operation is dead; generate the low. */
s->gen_opc_buf[op_index] = op = op_new;
args[1] = args[2];
args[2] = args[3];
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
/* Fall through and mark the single-word operation live. */
nb_oargs = 1;
} else if (have_op_new2 && dead_temps[args[0]]
&& !mem_temps[args[0]]) {
/* The low part of the operation is dead; generate the high. */
s->gen_opc_buf[op_index] = op = op_new2;
args[0] = args[1];
args[1] = args[2];
args[2] = args[3];
} else {
goto do_not_remove;
}
assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
/* Mark the single-word operation live. */
nb_oargs = 1;
goto do_not_remove;

default:
Expand Down
2 changes: 2 additions & 0 deletions tcg/tcg.h
Expand Up @@ -85,6 +85,8 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
/* Turn some undef macros into true macros. */
#define TCG_TARGET_HAS_add2_i32 1
#define TCG_TARGET_HAS_sub2_i32 1
Expand Down
5 changes: 4 additions & 1 deletion tcg/tci/tcg-target.h
Expand Up @@ -76,6 +76,8 @@
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0

#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_bswap16_i64 1
Expand All @@ -100,13 +102,14 @@
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_TARGET_HAS_muls2_i64 0

#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
#endif /* TCG_TARGET_REG_BITS == 64 */

/* Number of registers available.
Expand Down

0 comments on commit 0327152

Please sign in to comment.