From a4ce099a7a4b4734c372f6bf28f3362e370f23c1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 30 Mar 2014 17:14:02 -0700 Subject: [PATCH] tcg: Implement insert_op_before Rather reserving space in the op stream for optimization, let the optimizer add ops as necessary. Reviewed-by: Bastian Koppelmann Signed-off-by: Richard Henderson --- tcg/optimize.c | 57 +++++++++++++++++++++++++++++++------------------- tcg/tcg-op.c | 21 ------------------- tcg/tcg-op.h | 1 - 3 files changed, 35 insertions(+), 44 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 973fbb472905..067917c396f4 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -67,6 +67,37 @@ static void reset_temp(TCGArg temp) temps[temp].mask = -1; } +static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, + TCGOpcode opc, int nargs) +{ + int oi = s->gen_next_op_idx; + int pi = s->gen_next_parm_idx; + int prev = old_op->prev; + int next = old_op - s->gen_op_buf; + TCGOp *new_op; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); + s->gen_next_op_idx = oi + 1; + s->gen_next_parm_idx = pi + nargs; + + new_op = &s->gen_op_buf[oi]; + *new_op = (TCGOp){ + .opc = opc, + .args = pi, + .prev = prev, + .next = next + }; + if (prev >= 0) { + s->gen_op_buf[prev].next = oi; + } else { + s->gen_first_op_idx = oi; + } + old_op->prev = oi; + + return new_op; +} + /* Reset all temporaries, given that there are NB_TEMPS of them. */ static void reset_all_temps(int nb_temps) { @@ -1108,8 +1139,8 @@ static void tcg_constant_folding(TCGContext *s) uint64_t a = ((uint64_t)ah << 32) | al; uint64_t b = ((uint64_t)bh << 32) | bl; TCGArg rl, rh; - TCGOp *op2; - TCGArg *args2; + TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); + TCGArg *args2 = &s->gen_opparam_buf[op2->args]; if (opc == INDEX_op_add2_i32) { a += b; @@ -1117,15 +1148,6 @@ static void tcg_constant_folding(TCGContext *s) a -= b; } - /* We emit the extra nop when we emit the add2/sub2. */ - op2 = &s->gen_op_buf[oi_next]; - assert(op2->opc == INDEX_op_nop); - - /* But we still have to allocate args for the op. */ - op2->args = s->gen_next_parm_idx; - s->gen_next_parm_idx += 2; - args2 = &s->gen_opparam_buf[op2->args]; - rl = args[0]; rh = args[1]; tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a); @@ -1144,17 +1166,8 @@ static void tcg_constant_folding(TCGContext *s) uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; TCGArg rl, rh; - TCGOp *op2; - TCGArg *args2; - - /* We emit the extra nop when we emit the mulu2. */ - op2 = &s->gen_op_buf[oi_next]; - assert(op2->opc == INDEX_op_nop); - - /* But we still have to allocate args for the op. */ - op2->args = s->gen_next_parm_idx; - s->gen_next_parm_idx += 2; - args2 = &s->gen_opparam_buf[op2->args]; + TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); + TCGArg *args2 = &s->gen_opparam_buf[op2->args]; rl = args[0]; rh = args[1]; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index cbaa15ccb9b3..afa351dc70ce 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -57,11 +57,6 @@ static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) }; } -void tcg_gen_op0(TCGContext *ctx, TCGOpcode opc) -{ - tcg_emit_op(ctx, opc, -1); -} - void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1) { int pi = ctx->gen_next_parm_idx; @@ -571,8 +566,6 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, { if (TCG_TARGET_HAS_add2_i32) { tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace add2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -590,8 +583,6 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, { if (TCG_TARGET_HAS_sub2_i32) { tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace sub2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -608,8 +599,6 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_mulu2_i32) { tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_muluh_i32) { TCGv_i32 t = tcg_temp_new_i32(); tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); @@ -632,8 +621,6 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_muls2_i32) { tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace muls2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_mulsh_i32) { TCGv_i32 t = tcg_temp_new_i32(); tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); @@ -1648,8 +1635,6 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, { if (TCG_TARGET_HAS_add2_i64) { tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace add2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1668,8 +1653,6 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, { if (TCG_TARGET_HAS_sub2_i64) { tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace sub2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1687,8 +1670,6 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_mulu2_i64) { tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_muluh_i64) { TCGv_i64 t = tcg_temp_new_i64(); tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); @@ -1708,8 +1689,6 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_muls2_i64) { tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace muls2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_mulsh_i64) { TCGv_i64 t = tcg_temp_new_i64(); tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index eacfd8aad1e8..96adf9af6a72 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -28,7 +28,6 @@ /* Basic output routines. Not for general consumption. */ -void tcg_gen_op0(TCGContext *, TCGOpcode); void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg); void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg); void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg);