From 40475087a5ee80f5251dac6087142458d8dc7d99 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Oct 2012 16:40:41 +0200 Subject: [PATCH 01/61] test-i386: QEMU_PACKED is not defined here Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tests/tcg/test-i386.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c index 6dc730d882fd..b18fe20c7686 100644 --- a/tests/tcg/test-i386.c +++ b/tests/tcg/test-i386.c @@ -925,7 +925,7 @@ void test_fbcd(double a) void test_fenv(void) { - struct QEMU_PACKED { + struct __attribute__((__packed__)) { uint16_t fpuc; uint16_t dummy1; uint16_t fpus; @@ -935,7 +935,7 @@ void test_fenv(void) uint32_t ignored[4]; long double fpregs[8]; } float_env32; - struct QEMU_PACKED { + struct __attribute__((__packed__)) { uint16_t fpuc; uint16_t fpus; uint16_t fptag; @@ -1280,7 +1280,7 @@ void test_segs(void) struct { uint32_t offset; uint16_t seg; - } QEMU_PACKED segoff; + } __attribute__((__packed__)) segoff; ldt.entry_number = 1; ldt.base_addr = (unsigned long)&seg_data1; From 1b99f83e3946c447eefb3417ec1ea4c2f3b44582 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Oct 2012 16:40:21 +0200 Subject: [PATCH 02/61] test-i386: make it compile with a recent gcc Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- tests/tcg/test-i386.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c index b18fe20c7686..b05572b73422 100644 --- a/tests/tcg/test-i386.c +++ b/tests/tcg/test-i386.c @@ -209,7 +209,7 @@ static inline long i2l(long v) #define TEST_LEA16(STR)\ {\ asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\ - : "=wq" (res)\ + : "=r" (res)\ : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ printf("lea %s = %08lx\n", STR, res);\ } @@ -1828,7 +1828,7 @@ void test_exceptions(void) printf("lock nop exception:\n"); if (setjmp(jmp_env) == 0) { /* now execute an invalid instruction */ - asm volatile(".byte 0xf0, 0x90"); /* lock nop */ + asm volatile(".byte 0xf0, 0x90"); } printf("INT exception:\n"); From 93ab25d7d129fbe47a99fd8c91292ea99bff747e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 01:56:03 +0200 Subject: [PATCH 03/61] target-i386: use OT_* consistently Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 88 +++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 112c3102a070..94e14342d31c 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -323,17 +323,17 @@ static inline void gen_op_mov_reg_T1(int ot, int reg) static inline void gen_op_mov_reg_A0(int size, int reg) { switch(size) { - case 0: + case OT_BYTE: tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_A0, 0, 16); break; default: /* XXX this shouldn't be reached; abort? */ - case 1: + case OT_WORD: /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a mov. */ tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0); break; #ifdef TARGET_X86_64 - case 2: + case OT_LONG: tcg_gen_mov_tl(cpu_regs[reg], cpu_A0); break; #endif @@ -398,11 +398,11 @@ static inline void gen_op_jmp_T0(void) static inline void gen_op_add_reg_im(int size, int reg, int32_t val) { switch(size) { - case 0: + case OT_BYTE: tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16); break; - case 1: + case OT_WORD: tcg_gen_addi_tl(cpu_tmp0, cpu_regs[reg], val); /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a nop. */ @@ -410,7 +410,7 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val) tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); break; #ifdef TARGET_X86_64 - case 2: + case OT_LONG: tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val); break; #endif @@ -420,11 +420,11 @@ static inline void gen_op_add_reg_im(int size, int reg, int32_t val) static inline void gen_op_add_reg_T0(int size, int reg) { switch(size) { - case 0: + case OT_BYTE: tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], cpu_tmp0, 0, 16); break; - case 1: + case OT_WORD: tcg_gen_add_tl(cpu_tmp0, cpu_regs[reg], cpu_T[0]); /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a nop. */ @@ -432,7 +432,7 @@ static inline void gen_op_add_reg_T0(int size, int reg) tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0); break; #ifdef TARGET_X86_64 - case 2: + case OT_LONG: tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]); break; #endif @@ -506,14 +506,14 @@ static inline void gen_op_lds_T0_A0(int idx) { int mem_index = (idx >> 2) - 1; switch(idx & 3) { - case 0: + case OT_BYTE: tcg_gen_qemu_ld8s(cpu_T[0], cpu_A0, mem_index); break; - case 1: + case OT_WORD: tcg_gen_qemu_ld16s(cpu_T[0], cpu_A0, mem_index); break; default: - case 2: + case OT_LONG: tcg_gen_qemu_ld32s(cpu_T[0], cpu_A0, mem_index); break; } @@ -523,17 +523,17 @@ static inline void gen_op_ld_v(int idx, TCGv t0, TCGv a0) { int mem_index = (idx >> 2) - 1; switch(idx & 3) { - case 0: + case OT_BYTE: tcg_gen_qemu_ld8u(t0, a0, mem_index); break; - case 1: + case OT_WORD: tcg_gen_qemu_ld16u(t0, a0, mem_index); break; - case 2: + case OT_LONG: tcg_gen_qemu_ld32u(t0, a0, mem_index); break; default: - case 3: + case OT_QUAD: /* Should never happen on 32-bit targets. */ #ifdef TARGET_X86_64 tcg_gen_qemu_ld64(t0, a0, mem_index); @@ -562,17 +562,17 @@ static inline void gen_op_st_v(int idx, TCGv t0, TCGv a0) { int mem_index = (idx >> 2) - 1; switch(idx & 3) { - case 0: + case OT_BYTE: tcg_gen_qemu_st8(t0, a0, mem_index); break; - case 1: + case OT_WORD: tcg_gen_qemu_st16(t0, a0, mem_index); break; - case 2: + case OT_LONG: tcg_gen_qemu_st32(t0, a0, mem_index); break; default: - case 3: + case OT_QUAD: /* Should never happen on 32-bit targets. */ #ifdef TARGET_X86_64 tcg_gen_qemu_st64(t0, a0, mem_index); @@ -710,21 +710,31 @@ static inline void gen_op_jz_ecx(int size, int label1) static void gen_helper_in_func(int ot, TCGv v, TCGv_i32 n) { switch (ot) { - case 0: gen_helper_inb(v, n); break; - case 1: gen_helper_inw(v, n); break; - case 2: gen_helper_inl(v, n); break; + case OT_BYTE: + gen_helper_inb(v, n); + break; + case OT_WORD: + gen_helper_inw(v, n); + break; + case OT_LONG: + gen_helper_inl(v, n); + break; } - } static void gen_helper_out_func(int ot, TCGv_i32 v, TCGv_i32 n) { switch (ot) { - case 0: gen_helper_outb(v, n); break; - case 1: gen_helper_outw(v, n); break; - case 2: gen_helper_outl(v, n); break; + case OT_BYTE: + gen_helper_outb(v, n); + break; + case OT_WORD: + gen_helper_outw(v, n); + break; + case OT_LONG: + gen_helper_outl(v, n); + break; } - } static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, @@ -741,13 +751,13 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, state_saved = 1; tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); switch (ot) { - case 0: + case OT_BYTE: gen_helper_check_iob(cpu_env, cpu_tmp2_i32); break; - case 1: + case OT_WORD: gen_helper_check_iow(cpu_env, cpu_tmp2_i32); break; - case 2: + case OT_LONG: gen_helper_check_iol(cpu_env, cpu_tmp2_i32); break; } @@ -1781,34 +1791,34 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, if (is_right) { switch (ot) { - case 0: + case OT_BYTE: gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 1: + case OT_WORD: gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 2: + case OT_LONG: gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #ifdef TARGET_X86_64 - case 3: + case OT_QUAD: gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #endif } } else { switch (ot) { - case 0: + case OT_BYTE: gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 1: + case OT_WORD: gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; - case 2: + case OT_LONG: gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #ifdef TARGET_X86_64 - case 3: + case OT_QUAD: gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); break; #endif From d824df34e8cdd2fbe55258f26731d7ef3ac7ced2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Oct 2012 18:02:41 +0200 Subject: [PATCH 04/61] target-i386: introduce gen_ext_tl Introduce a function that abstracts extracting an 8, 16, 32 or 64-bit value with or without sign, generalizing gen_extu and gen_exts. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 146 ++++++++++------------------------------ 1 file changed, 37 insertions(+), 109 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 94e14342d31c..ccb06e27f59f 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -659,38 +659,45 @@ static inline void gen_op_movl_T0_Dshift(int ot) tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot); }; -static void gen_extu(int ot, TCGv reg) +static TCGv gen_ext_tl(TCGv dst, TCGv src, int size, bool sign) { - switch(ot) { + switch (size) { case OT_BYTE: - tcg_gen_ext8u_tl(reg, reg); - break; + if (sign) { + tcg_gen_ext8s_tl(dst, src); + } else { + tcg_gen_ext8u_tl(dst, src); + } + return dst; case OT_WORD: - tcg_gen_ext16u_tl(reg, reg); - break; + if (sign) { + tcg_gen_ext16s_tl(dst, src); + } else { + tcg_gen_ext16u_tl(dst, src); + } + return dst; +#ifdef TARGET_X86_64 case OT_LONG: - tcg_gen_ext32u_tl(reg, reg); - break; + if (sign) { + tcg_gen_ext32s_tl(dst, src); + } else { + tcg_gen_ext32u_tl(dst, src); + } + return dst; +#endif default: - break; + return src; } } +static void gen_extu(int ot, TCGv reg) +{ + gen_ext_tl(reg, reg, ot, false); +} + static void gen_exts(int ot, TCGv reg) { - switch(ot) { - case OT_BYTE: - tcg_gen_ext8s_tl(reg, reg); - break; - case OT_WORD: - tcg_gen_ext16s_tl(reg, reg); - break; - case OT_LONG: - tcg_gen_ext32s_tl(reg, reg); - break; - default: - break; - } + gen_ext_tl(reg, reg, ot, true); } static inline void gen_op_jnz_ecx(int size, int label1) @@ -966,54 +973,15 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) switch(jcc_op) { case JCC_Z: fast_jcc_z: - switch(size) { - case 0: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xff); - t0 = cpu_tmp0; - break; - case 1: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffff); - t0 = cpu_tmp0; - break; -#ifdef TARGET_X86_64 - case 2: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0xffffffff); - t0 = cpu_tmp0; - break; -#endif - default: - t0 = cpu_cc_dst; - break; - } + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false); tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1); break; case JCC_S: fast_jcc_s: - switch(size) { - case 0: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); - break; - case 1: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x8000); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); - break; -#ifdef TARGET_X86_64 - case 2: - tcg_gen_andi_tl(cpu_tmp0, cpu_cc_dst, 0x80000000); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_tmp0, - 0, l1); - break; -#endif - default: - tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, cpu_cc_dst, - 0, l1); - break; - } + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true); + tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1); break; - + case JCC_B: cond = inv ? TCG_COND_GEU : TCG_COND_LTU; goto fast_jcc_b; @@ -1021,28 +989,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) cond = inv ? TCG_COND_GTU : TCG_COND_LEU; fast_jcc_b: tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); - switch(size) { - case 0: - t0 = cpu_tmp0; - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xff); - tcg_gen_andi_tl(t0, cpu_cc_src, 0xff); - break; - case 1: - t0 = cpu_tmp0; - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffff); - tcg_gen_andi_tl(t0, cpu_cc_src, 0xffff); - break; -#ifdef TARGET_X86_64 - case 2: - t0 = cpu_tmp0; - tcg_gen_andi_tl(cpu_tmp4, cpu_tmp4, 0xffffffff); - tcg_gen_andi_tl(t0, cpu_cc_src, 0xffffffff); - break; -#endif - default: - t0 = cpu_cc_src; - break; - } + gen_extu(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); break; @@ -1053,28 +1001,8 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) cond = inv ? TCG_COND_GT : TCG_COND_LE; fast_jcc_l: tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); - switch(size) { - case 0: - t0 = cpu_tmp0; - tcg_gen_ext8s_tl(cpu_tmp4, cpu_tmp4); - tcg_gen_ext8s_tl(t0, cpu_cc_src); - break; - case 1: - t0 = cpu_tmp0; - tcg_gen_ext16s_tl(cpu_tmp4, cpu_tmp4); - tcg_gen_ext16s_tl(t0, cpu_cc_src); - break; -#ifdef TARGET_X86_64 - case 2: - t0 = cpu_tmp0; - tcg_gen_ext32s_tl(cpu_tmp4, cpu_tmp4); - tcg_gen_ext32s_tl(t0, cpu_cc_src); - break; -#endif - default: - t0 = cpu_cc_src; - break; - } + gen_exts(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); break; From 91642ff80607ad90c66ba044fe91e4a53b09bdbb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 01:22:09 +0200 Subject: [PATCH 05/61] target-i386: factor setting of s->cc_op handling for string functions Set it to the appropriate CC_OP_SUBx constant in gen_scas/gen_cmps. In the repz case it can be overridden to CC_OP_DYNAMIC after generating the code. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index ccb06e27f59f..9ac66b984fca 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1112,6 +1112,7 @@ static inline void gen_scas(DisasContext *s, int ot) gen_op_cmpl_T0_T1_cc(); gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_EDI); + s->cc_op = CC_OP_SUBB + ot; } static inline void gen_cmps(DisasContext *s, int ot) @@ -1124,6 +1125,7 @@ static inline void gen_cmps(DisasContext *s, int ot) gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_ESI); gen_op_add_reg_T0(s->aflag, R_EDI); + s->cc_op = CC_OP_SUBB + ot; } static inline void gen_ins(DisasContext *s, int ot) @@ -1194,11 +1196,12 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ l2 = gen_jz_ecx_string(s, next_eip); \ gen_ ## op(s, ot); \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ - gen_op_set_cc_op(CC_OP_SUBB + ot); \ - gen_jcc1(s, CC_OP_SUBB + ot, (JCC_Z << 1) | (nz ^ 1), l2); \ + gen_op_set_cc_op(s->cc_op); \ + gen_jcc1(s, s->cc_op, (JCC_Z << 1) | (nz ^ 1), l2); \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ gen_jmp(s, cur_eip); \ + s->cc_op = CC_OP_DYNAMIC; \ } GEN_REPZ(movs) @@ -6088,7 +6091,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); } else { gen_scas(s, ot); - s->cc_op = CC_OP_SUBB + ot; } break; @@ -6104,7 +6106,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); } else { gen_cmps(s, ot); - s->cc_op = CC_OP_SUBB + ot; } break; case 0x6c: /* insS */ From b27fc131fe8dc18924904e4dd0b82dfd77dc51c7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 01:36:45 +0200 Subject: [PATCH 06/61] target-i386: drop cc_op argument of gen_jcc1 As in the gen_repz_scas/gen_repz_cmps case, delay setting CC_OP_DYNAMIC in gen_jcc until after code generation. All of gen_jcc1/is_fast_jcc/gen_setcc_slow_T0 now work on s->cc_op, which makes things a bit easier to follow and to patch. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 9ac66b984fca..48a3255a401f 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -954,7 +954,7 @@ static int is_fast_jcc_case(DisasContext *s, int b) /* generate a conditional jump to label 'l1' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ -static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) +static inline void gen_jcc1(DisasContext *s, int b, int l1) { int inv, jcc_op, size, cond; TCGv t0; @@ -962,14 +962,14 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) inv = b & 1; jcc_op = (b >> 1) & 7; - switch(cc_op) { + switch (s->cc_op) { /* we optimize the cmp/jcc case */ case CC_OP_SUBB: case CC_OP_SUBW: case CC_OP_SUBL: case CC_OP_SUBQ: - size = cc_op - CC_OP_SUBB; + size = s->cc_op - CC_OP_SUBB; switch(jcc_op) { case JCC_Z: fast_jcc_z: @@ -1053,10 +1053,10 @@ static inline void gen_jcc1(DisasContext *s, int cc_op, int b, int l1) case CC_OP_SARQ: switch(jcc_op) { case JCC_Z: - size = (cc_op - CC_OP_ADDB) & 3; + size = (s->cc_op - CC_OP_ADDB) & 3; goto fast_jcc_z; case JCC_S: - size = (cc_op - CC_OP_ADDB) & 3; + size = (s->cc_op - CC_OP_ADDB) & 3; goto fast_jcc_s; default: goto slow_jcc; @@ -1197,7 +1197,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ gen_ ## op(s, ot); \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ gen_op_set_cc_op(s->cc_op); \ - gen_jcc1(s, s->cc_op, (JCC_Z << 1) | (nz ^ 1), l2); \ + gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ gen_jmp(s, cur_eip); \ @@ -2303,13 +2303,15 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) static inline void gen_jcc(DisasContext *s, int b, target_ulong val, target_ulong next_eip) { - int l1, l2, cc_op; + int l1, l2; - cc_op = s->cc_op; - gen_update_cc_op(s); + if (s->cc_op != CC_OP_DYNAMIC) { + gen_op_set_cc_op(s->cc_op); + } if (s->jmp_opt) { l1 = gen_new_label(); - gen_jcc1(s, cc_op, b, l1); + gen_jcc1(s, b, l1); + s->cc_op = CC_OP_DYNAMIC; gen_goto_tb(s, 0, next_eip); @@ -2320,7 +2322,8 @@ static inline void gen_jcc(DisasContext *s, int b, l1 = gen_new_label(); l2 = gen_new_label(); - gen_jcc1(s, cc_op, b, l1); + gen_jcc1(s, b, l1); + s->cc_op = CC_OP_DYNAMIC; gen_jmp_im(next_eip); tcg_gen_br(l2); @@ -2343,7 +2346,7 @@ static void gen_setcc(DisasContext *s, int b) t0 = tcg_temp_local_new(); tcg_gen_movi_tl(t0, 0); l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); + gen_jcc1(s, b ^ 1, l1); tcg_gen_movi_tl(t0, 1); gen_set_label(l1); tcg_gen_mov_tl(cpu_T[0], t0); @@ -6027,7 +6030,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, }; op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, op1, l1); + gen_jcc1(s, op1, l1); gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg)); gen_set_label(l1); } @@ -6418,7 +6421,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (ot == OT_LONG) { /* XXX: specific Intel behaviour ? */ l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); + gen_jcc1(s, b ^ 1, l1); tcg_gen_mov_tl(cpu_regs[reg], t0); gen_set_label(l1); tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); @@ -6426,7 +6429,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, #endif { l1 = gen_new_label(); - gen_jcc1(s, s->cc_op, b ^ 1, l1); + gen_jcc1(s, b ^ 1, l1); gen_op_mov_reg_v(ot, reg, t0); gen_set_label(l1); } From 52320e15dbb0c2531501a924972e63cdb59742a7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 00:18:55 +0200 Subject: [PATCH 07/61] target-i386: move carry computation for inc/dec closer to gen_op_set_cc_op This ensures the invariant that cpu_cc_op matches s->cc_op when calling the helpers. The next patches need this because gen_compute_eflags and gen_compute_eflags_c will take care of setting cpu_cc_op. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 48a3255a401f..ed373c3ad6cf 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1373,6 +1373,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_ld_T0_A0(ot + s1->mem_index); if (s1->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s1->cc_op); + gen_compute_eflags_c(cpu_cc_src); if (c > 0) { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); s1->cc_op = CC_OP_INCB + ot; @@ -1384,7 +1385,6 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); - gen_compute_eflags_c(cpu_cc_src); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } From 0ff6addd92979b9759efa1c0945526e6ac78ce5b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 00:18:55 +0200 Subject: [PATCH 08/61] target-i386: move eflags computation closer to gen_op_set_cc_op This ensures the invariant that cpu_cc_op matches s->cc_op when calling the helpers. The next patches need this because gen_compute_eflags and gen_compute_eflags_c will take care of setting cpu_cc_op. Always compute EFLAGS first since it is needed whenever the shift is non-zero, i.e. most of the time. This makes it possible to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly removes cases where s->cc_op becomes CC_OP_DYNAMIC. These are slow and we want to avoid them: CC_OP_EFLAGS is quite efficient once we paid the initial cost of computing the flags. Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op and discarding cpu_cc_dst. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index ed373c3ad6cf..0970954217eb 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1597,14 +1597,16 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_v(ot, op1, t0); } - /* update eflags */ + /* update eflags. It is needed anyway most of the time, do it always. */ if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_discard_tl(cpu_cc_dst); + s->cc_op = CC_OP_EFLAGS; label2 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); - gen_compute_eflags(cpu_cc_src); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); tcg_gen_xor_tl(cpu_tmp0, t2, t0); tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); @@ -1615,12 +1617,8 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, } tcg_gen_andi_tl(t0, t0, CC_C); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); - - tcg_gen_discard_tl(cpu_cc_dst); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); - + gen_set_label(label2); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ tcg_temp_free(t0); tcg_temp_free(t1); @@ -1684,6 +1682,9 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, gen_op_set_cc_op(s->cc_op); gen_compute_eflags(cpu_cc_src); + tcg_gen_discard_tl(cpu_cc_dst); + s->cc_op = CC_OP_EFLAGS; + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); tcg_gen_xor_tl(cpu_tmp0, t1, t0); tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); @@ -1694,10 +1695,6 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, } tcg_gen_andi_tl(t0, t0, CC_C); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); - - tcg_gen_discard_tl(cpu_cc_dst); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); - s->cc_op = CC_OP_EFLAGS; } tcg_temp_free(t0); From f5847c912d62d60a9917ed1e88cd6d4548fd40f3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 00:18:55 +0200 Subject: [PATCH 09/61] target-i386: compute eflags outside rcl/rcr helper Always compute EFLAGS first since it is needed whenever the shift is non-zero, i.e. most of the time. This makes it possible to remove some writes of CC_OP_EFLAGS to cpu_cc_op and more importantly removes cases where s->cc_op becomes CC_OP_DYNAMIC. Also, we can remove cc_tmp and just modify cc_src from within the helper. Finally, always follow gen_compute_eflags(cpu_cc_src) by setting s->cc_op and discarding cpu_cc_dst. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/cpu.h | 1 - target-i386/shift_helper_template.h | 12 ++++-------- target-i386/translate.c | 20 ++++---------------- 3 files changed, 8 insertions(+), 25 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 7577e4f8bb56..cd35cd52c099 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -764,7 +764,6 @@ typedef struct CPUX86State { XMMReg xmm_regs[CPU_NB_REGS]; XMMReg xmm_t0; MMXReg mmx_t0; - target_ulong cc_tmp; /* temporary for rcr/rcl */ /* sysenter registers */ uint32_t sysenter_cs; diff --git a/target-i386/shift_helper_template.h b/target-i386/shift_helper_template.h index dda0da30cf8e..cf91a2d284d5 100644 --- a/target-i386/shift_helper_template.h +++ b/target-i386/shift_helper_template.h @@ -55,7 +55,7 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0, count = rclb_table[count]; #endif if (count) { - eflags = helper_cc_compute_all(env, CC_OP); + eflags = env->cc_src; t0 &= DATA_MASK; src = t0; res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1)); @@ -63,11 +63,9 @@ target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0, res |= t0 >> (DATA_BITS + 1 - count); } t0 = res; - env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + env->cc_src = (eflags & ~(CC_C | CC_O)) | (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | ((src >> (DATA_BITS - count)) & CC_C); - } else { - env->cc_tmp = -1; } return t0; } @@ -86,7 +84,7 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0, count = rclb_table[count]; #endif if (count) { - eflags = helper_cc_compute_all(env, CC_OP); + eflags = env->cc_src; t0 &= DATA_MASK; src = t0; res = (t0 >> count) | @@ -95,11 +93,9 @@ target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0, res |= t0 << (DATA_BITS + 1 - count); } t0 = res; - env->cc_tmp = (eflags & ~(CC_C | CC_O)) | + env->cc_src = (eflags & ~(CC_C | CC_O)) | (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | ((src >> (count - 1)) & CC_C); - } else { - env->cc_tmp = -1; } return t0; } diff --git a/target-i386/translate.c b/target-i386/translate.c index 0970954217eb..80483c0ffd03 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -51,7 +51,7 @@ /* global register indexes */ static TCGv_ptr cpu_env; -static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp; +static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ @@ -1706,10 +1706,11 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, int is_right) { - int label1; - if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); + gen_compute_eflags(cpu_cc_src); + tcg_gen_discard_tl(cpu_cc_dst); + s->cc_op = CC_OP_EFLAGS; /* load */ if (op1 == OR_TMP0) @@ -1757,17 +1758,6 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, gen_op_st_T0_A0(ot + s->mem_index); else gen_op_mov_reg_T0(ot, op1); - - /* update eflags */ - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_cc_tmp, -1, label1); - - tcg_gen_mov_tl(cpu_cc_src, cpu_cc_tmp); - tcg_gen_discard_tl(cpu_cc_dst); - tcg_gen_movi_i32(cpu_cc_op, CC_OP_EFLAGS); - - gen_set_label(label1); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ } /* XXX: add faster immediate case */ @@ -7763,8 +7753,6 @@ void optimize_flags_init(void) "cc_src"); cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst), "cc_dst"); - cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_tmp), - "cc_tmp"); #ifdef TARGET_X86_64 cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, From 6fa38ed219587723fcab9b878f42269489d51705 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 6 Oct 2012 00:18:55 +0200 Subject: [PATCH 10/61] target-i386: clean up sahf Discard CC_DST and set s->cc_op immediately after computing EFLAGS. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 80483c0ffd03..64564e0712f9 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6502,10 +6502,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_compute_eflags(cpu_cc_src); + tcg_gen_discard_tl(cpu_cc_dst); + s->cc_op = CC_OP_EFLAGS; + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); - s->cc_op = CC_OP_EFLAGS; break; case 0x9f: /* lahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) From 5bdb91b0dd66b7e0fdfc801601c433ad4752aeb0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Oct 2012 13:35:40 +0200 Subject: [PATCH 11/61] target-i386: use gen_jcc1 to compile loopz Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 64564e0712f9..6fcd0f6e2029 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -6896,13 +6896,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_set_cc_op(s->cc_op); gen_op_add_reg_im(s->aflag, R_ECX, -1); gen_op_jz_ecx(s->aflag, l3); - gen_compute_eflags(cpu_tmp0); - tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_Z); - if (b == 0) { - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1); - } else { - tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, l1); - } + gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); break; case 2: /* loop */ gen_op_add_reg_im(s->aflag, R_ECX, -1); From c7b3c87397a3458d3d26499c483e0badaf79849c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Oct 2012 18:29:21 +0200 Subject: [PATCH 12/61] target-i386: factor gen_op_set_cc_op/tcg_gen_discard_tl around computing flags Before computing flags we need to store the cc_op to memory. Move this to gen_compute_eflags_c and gen_compute_eflags rather than doing it all over the place. Alo, after computing the flags in cpu_cc_src we are in EFLAGS mode. Set s->cc_op and discard cpu_cc_dst in gen_compute_eflags, rather than doing it all over the place. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 103 +++++++++++++++------------------------- 1 file changed, 37 insertions(+), 66 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 6fcd0f6e2029..89f290822bf9 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -834,55 +834,63 @@ static void gen_op_update_neg_cc(void) } /* compute eflags.C to reg */ -static void gen_compute_eflags_c(TCGv reg) +static void gen_compute_eflags_c(DisasContext *s, TCGv reg) { + if (s->cc_op != CC_OP_DYNAMIC) { + gen_op_set_cc_op(s->cc_op); + } gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); } -/* compute all eflags to cc_src */ -static void gen_compute_eflags(TCGv reg) +/* compute all eflags to reg */ +static void gen_compute_eflags(DisasContext *s, TCGv reg) { + if (s->cc_op != CC_OP_DYNAMIC) { + gen_op_set_cc_op(s->cc_op); + } gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); + if (TCGV_EQUAL(reg, cpu_cc_src)) { + tcg_gen_discard_tl(cpu_cc_dst); + s->cc_op = CC_OP_EFLAGS; + } tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); } static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); switch(jcc_op) { case JCC_O: - gen_compute_eflags(cpu_T[0]); + gen_compute_eflags(s, cpu_T[0]); tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_B: - gen_compute_eflags_c(cpu_T[0]); + gen_compute_eflags_c(s, cpu_T[0]); break; case JCC_Z: - gen_compute_eflags(cpu_T[0]); + gen_compute_eflags(s, cpu_T[0]); tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_BE: - gen_compute_eflags(cpu_tmp0); + gen_compute_eflags(s, cpu_tmp0); tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6); tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_S: - gen_compute_eflags(cpu_T[0]); + gen_compute_eflags(s, cpu_T[0]); tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_P: - gen_compute_eflags(cpu_T[0]); + gen_compute_eflags(s, cpu_T[0]); tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_L: - gen_compute_eflags(cpu_tmp0); + gen_compute_eflags(s, cpu_tmp0); tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); @@ -890,7 +898,7 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) break; default: case JCC_LE: - gen_compute_eflags(cpu_tmp0); + gen_compute_eflags(s, cpu_tmp0); tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */ tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */ @@ -1278,9 +1286,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) } switch(op) { case OP_ADCL: - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_compute_eflags_c(cpu_tmp4); + gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) @@ -1295,9 +1301,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) s1->cc_op = CC_OP_DYNAMIC; break; case OP_SBBL: - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_compute_eflags_c(cpu_tmp4); + gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) @@ -1371,9 +1375,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_mov_TN_reg(ot, 0, d); else gen_op_ld_T0_A0(ot + s1->mem_index); - if (s1->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s1->cc_op); - gen_compute_eflags_c(cpu_cc_src); + gen_compute_eflags_c(s1, cpu_cc_src); if (c > 0) { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); s1->cc_op = CC_OP_INCB + ot; @@ -1598,11 +1600,8 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, } /* update eflags. It is needed anyway most of the time, do it always. */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); - tcg_gen_discard_tl(cpu_cc_dst); - s->cc_op = CC_OP_EFLAGS; + gen_compute_eflags(s, cpu_cc_src); + assert(s->cc_op == CC_OP_EFLAGS); label2 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); @@ -1678,12 +1677,8 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, if (op2 != 0) { /* update eflags */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - - gen_compute_eflags(cpu_cc_src); - tcg_gen_discard_tl(cpu_cc_dst); - s->cc_op = CC_OP_EFLAGS; + gen_compute_eflags(s, cpu_cc_src); + assert(s->cc_op == CC_OP_EFLAGS); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); tcg_gen_xor_tl(cpu_tmp0, t1, t0); @@ -1708,9 +1703,8 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, { if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); - tcg_gen_discard_tl(cpu_cc_dst); - s->cc_op = CC_OP_EFLAGS; + gen_compute_eflags(s, cpu_cc_src); + assert(s->cc_op == CC_OP_EFLAGS); /* load */ if (op1 == OR_TMP0) @@ -6499,12 +6493,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; gen_op_mov_TN_reg(OT_BYTE, 0, R_AH); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); - tcg_gen_discard_tl(cpu_cc_dst); - s->cc_op = CC_OP_EFLAGS; - + gen_compute_eflags(s, cpu_cc_src); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); @@ -6512,33 +6501,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x9f: /* lahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_T[0]); + gen_compute_eflags(s, cpu_T[0]); /* Note: gen_compute_eflags() only gives the condition codes */ tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02); gen_op_mov_reg_T0(OT_BYTE, R_AH); break; case 0xf5: /* cmc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s, cpu_cc_src); tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); - s->cc_op = CC_OP_EFLAGS; break; case 0xf8: /* clc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s, cpu_cc_src); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); - s->cc_op = CC_OP_EFLAGS; break; case 0xf9: /* stc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s, cpu_cc_src); tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); - s->cc_op = CC_OP_EFLAGS; break; case 0xfc: /* cld */ tcg_gen_movi_i32(cpu_tmp2_i32, 1); @@ -6866,9 +6844,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0xd6: /* salc */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags_c(cpu_T[0]); + gen_compute_eflags_c(s, cpu_T[0]); tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); gen_op_mov_reg_T0(OT_BYTE, R_EAX); break; @@ -6892,8 +6868,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, switch(b) { case 0: /* loopnz */ case 1: /* loopz */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); gen_op_add_reg_im(s->aflag, R_ECX, -1); gen_op_jz_ecx(s->aflag, l3); gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); @@ -7432,12 +7406,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } else { gen_op_mov_reg_v(ot, rm, t0); } - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); - gen_compute_eflags(cpu_cc_src); + gen_compute_eflags(s, cpu_cc_src); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); - s->cc_op = CC_OP_EFLAGS; tcg_temp_free(t0); tcg_temp_free(t1); tcg_temp_free(t2); From fee71888a29ab9f31b23386383812a4f5c953829 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 16 Jan 2013 16:23:46 -0800 Subject: [PATCH 13/61] target-i386: Name the cc_op enumeration Signed-off-by: Richard Henderson --- target-i386/cpu.h | 4 ++-- target-i386/translate.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index cd35cd52c099..8c4c6052998a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -582,7 +582,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPU_INTERRUPT_TPR CPU_INTERRUPT_TGT_INT_3 -enum { +typedef enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ @@ -637,7 +637,7 @@ enum { CC_OP_SARQ, CC_OP_NB, -}; +} CCOp; typedef struct SegmentCache { uint32_t selector; diff --git a/target-i386/translate.c b/target-i386/translate.c index 89f290822bf9..cf71878e8b23 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -89,7 +89,7 @@ typedef struct DisasContext { int rex_x, rex_b; #endif int ss32; /* 32 bit stack segment */ - int cc_op; /* current CC operation */ + CCOp cc_op; /* current CC operation */ int addseg; /* non zero if either DS/ES/SS have a non zero base */ int f_st; /* currently unused */ int vm86; /* vm86 mode */ From 3ca51d07dae5b2d2301431c55b08d4faaad95d91 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 12:30:52 -0800 Subject: [PATCH 14/61] target-i386: Introduce set_cc_op This will provide a good hook into which we can consolidate all of the cc variable discards. Signed-off-by: Richard Henderson --- target-i386/translate.c | 134 +++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 65 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index cf71878e8b23..6df76d6a389d 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -173,6 +173,11 @@ enum { OR_A0, /* temporary register used when doing address evaluation */ }; +static inline void set_cc_op(DisasContext *s, CCOp op) +{ + s->cc_op = op; +} + static inline void gen_op_movl_T0_0(void) { tcg_gen_movi_tl(cpu_T[0], 0); @@ -799,7 +804,7 @@ static inline void gen_update_cc_op(DisasContext *s) { if (s->cc_op != CC_OP_DYNAMIC) { gen_op_set_cc_op(s->cc_op); - s->cc_op = CC_OP_DYNAMIC; + set_cc_op(s, CC_OP_DYNAMIC); } } @@ -852,7 +857,7 @@ static void gen_compute_eflags(DisasContext *s, TCGv reg) gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); if (TCGV_EQUAL(reg, cpu_cc_src)) { tcg_gen_discard_tl(cpu_cc_dst); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); } @@ -1120,7 +1125,7 @@ static inline void gen_scas(DisasContext *s, int ot) gen_op_cmpl_T0_T1_cc(); gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_EDI); - s->cc_op = CC_OP_SUBB + ot; + set_cc_op(s, CC_OP_SUBB + ot); } static inline void gen_cmps(DisasContext *s, int ot) @@ -1133,7 +1138,7 @@ static inline void gen_cmps(DisasContext *s, int ot) gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_ESI); gen_op_add_reg_T0(s->aflag, R_EDI); - s->cc_op = CC_OP_SUBB + ot; + set_cc_op(s, CC_OP_SUBB + ot); } static inline void gen_ins(DisasContext *s, int ot) @@ -1209,7 +1214,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ gen_jmp(s, cur_eip); \ - s->cc_op = CC_OP_DYNAMIC; \ + set_cc_op(s, CC_OP_DYNAMIC); \ } GEN_REPZ(movs) @@ -1298,7 +1303,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot); - s1->cc_op = CC_OP_DYNAMIC; + set_cc_op(s1, CC_OP_DYNAMIC); break; case OP_SBBL: gen_compute_eflags_c(s1, cpu_tmp4); @@ -1313,7 +1318,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot); - s1->cc_op = CC_OP_DYNAMIC; + set_cc_op(s1, CC_OP_DYNAMIC); break; case OP_ADDL: gen_op_addl_T0_T1(); @@ -1322,7 +1327,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update2_cc(); - s1->cc_op = CC_OP_ADDB + ot; + set_cc_op(s1, CC_OP_ADDB + ot); break; case OP_SUBL: tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); @@ -1331,7 +1336,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update2_cc(); - s1->cc_op = CC_OP_SUBB + ot; + set_cc_op(s1, CC_OP_SUBB + ot); break; default: case OP_ANDL: @@ -1341,7 +1346,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update1_cc(); - s1->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_ORL: tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); @@ -1350,7 +1355,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update1_cc(); - s1->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_XORL: tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_T[1]); @@ -1359,11 +1364,11 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) else gen_op_st_T0_A0(ot + s1->mem_index); gen_op_update1_cc(); - s1->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_CMPL: gen_op_cmpl_T0_T1_cc(); - s1->cc_op = CC_OP_SUBB + ot; + set_cc_op(s1, CC_OP_SUBB + ot); break; } } @@ -1378,10 +1383,10 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_compute_eflags_c(s1, cpu_cc_src); if (c > 0) { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); - s1->cc_op = CC_OP_INCB + ot; + set_cc_op(s1, CC_OP_INCB + ot); } else { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], -1); - s1->cc_op = CC_OP_DECB + ot; + set_cc_op(s1, CC_OP_DECB + ot); } if (d != OR_TMP0) gen_op_mov_reg_T0(ot, d); @@ -1468,7 +1473,7 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, } gen_set_label(shift_label); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */ tcg_temp_free(t0); tcg_temp_free(t1); @@ -1519,10 +1524,7 @@ static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, if (op2 != 0) { tcg_gen_mov_tl(cpu_cc_src, cpu_tmp4); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - if (is_right) - s->cc_op = CC_OP_SARB + ot; - else - s->cc_op = CC_OP_SHLB + ot; + set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); } } @@ -1875,7 +1877,7 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); } gen_set_label(label2); - s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ + set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */ tcg_temp_free(t0); tcg_temp_free(t1); @@ -2292,7 +2294,7 @@ static inline void gen_jcc(DisasContext *s, int b, if (s->jmp_opt) { l1 = gen_new_label(); gen_jcc1(s, b, l1); - s->cc_op = CC_OP_DYNAMIC; + set_cc_op(s, CC_OP_DYNAMIC); gen_goto_tb(s, 0, next_eip); @@ -2304,7 +2306,7 @@ static inline void gen_jcc(DisasContext *s, int b, l1 = gen_new_label(); l2 = gen_new_label(); gen_jcc1(s, b, l1); - s->cc_op = CC_OP_DYNAMIC; + set_cc_op(s, CC_OP_DYNAMIC); gen_jmp_im(next_eip); tcg_gen_br(l2); @@ -3792,8 +3794,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1); - if (b == 0x17) - s->cc_op = CC_OP_EFLAGS; + if (b == 0x17) { + set_cc_op(s, CC_OP_EFLAGS); + } break; case 0x338: /* crc32 */ crc32: @@ -3995,7 +3998,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, val = cpu_ldub_code(env, s->pc++); if ((b & 0xfc) == 0x60) { /* pcmpXstrX */ - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); if (s->dflag == 2) /* The helper must use entire 64-bit gp registers */ @@ -4116,7 +4119,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, break; } if (b == 0x2e || b == 0x2f) { - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } } } @@ -4300,7 +4303,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, xor_zero: /* xor reg, reg optimisation */ gen_op_movl_T0_0(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); gen_op_mov_reg_T0(ot, reg); gen_op_update1_cc(); break; @@ -4415,7 +4418,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, val = insn_get(env, s, ot); gen_op_movl_T1_im(val); gen_op_testl_T0_T1_cc(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); break; case 2: /* not */ tcg_gen_not_tl(cpu_T[0], cpu_T[0]); @@ -4433,7 +4436,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T0(ot, rm); } gen_op_update_neg_cc(); - s->cc_op = CC_OP_SUBB + ot; + set_cc_op(s, CC_OP_SUBB + ot); break; case 4: /* mul */ switch(ot) { @@ -4446,7 +4449,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T0(OT_WORD, R_EAX); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00); - s->cc_op = CC_OP_MULB; + set_cc_op(s, CC_OP_MULB); break; case OT_WORD: gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); @@ -4459,7 +4462,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); gen_op_mov_reg_T0(OT_WORD, R_EDX); tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); - s->cc_op = CC_OP_MULW; + set_cc_op(s, CC_OP_MULW); break; default: case OT_LONG: @@ -4491,12 +4494,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); } #endif - s->cc_op = CC_OP_MULL; + set_cc_op(s, CC_OP_MULL); break; #ifdef TARGET_X86_64 case OT_QUAD: gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]); - s->cc_op = CC_OP_MULQ; + set_cc_op(s, CC_OP_MULQ); break; #endif } @@ -4513,7 +4516,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]); tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); - s->cc_op = CC_OP_MULB; + set_cc_op(s, CC_OP_MULB); break; case OT_WORD: gen_op_mov_TN_reg(OT_WORD, 1, R_EAX); @@ -4527,7 +4530,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16); gen_op_mov_reg_T0(OT_WORD, R_EDX); - s->cc_op = CC_OP_MULW; + set_cc_op(s, CC_OP_MULW); break; default: case OT_LONG: @@ -4561,12 +4564,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); } #endif - s->cc_op = CC_OP_MULL; + set_cc_op(s, CC_OP_MULL); break; #ifdef TARGET_X86_64 case OT_QUAD: gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]); - s->cc_op = CC_OP_MULQ; + set_cc_op(s, CC_OP_MULQ); break; #endif } @@ -4747,7 +4750,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); gen_op_mov_TN_reg(ot, 1, reg); gen_op_testl_T0_T1_cc(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); break; case 0xa8: /* test eAX, Iv */ @@ -4761,7 +4764,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_TN_reg(ot, 0, OR_EAX); gen_op_movl_T1_im(val); gen_op_testl_T0_T1_cc(); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); break; case 0x98: /* CWDE/CBW */ @@ -4862,7 +4865,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0); } gen_op_mov_reg_T0(ot, reg); - s->cc_op = CC_OP_MULB + ot; + set_cc_op(s, CC_OP_MULB + ot); break; case 0x1c0: case 0x1c1: /* xadd Ev, Gv */ @@ -4889,7 +4892,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T1(ot, reg); } gen_op_update2_cc(); - s->cc_op = CC_OP_ADDB + ot; + set_cc_op(s, CC_OP_ADDB + ot); break; case 0x1b0: case 0x1b1: /* cmpxchg Ev, Gv */ @@ -4941,7 +4944,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_set_label(label2); tcg_gen_mov_tl(cpu_cc_src, t0); tcg_gen_mov_tl(cpu_cc_dst, t2); - s->cc_op = CC_OP_SUBB + ot; + set_cc_op(s, CC_OP_SUBB + ot); tcg_temp_free(t0); tcg_temp_free(t1); tcg_temp_free(t2); @@ -4973,7 +4976,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_cmpxchg8b(cpu_env, cpu_A0); } - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; /**************************/ @@ -5925,14 +5928,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_set_cc_op(s->cc_op); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fucomi_ST0_FT0(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x1e: /* fcomi */ if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fcomi_ST0_FT0(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x28: /* ffree sti */ gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg)); @@ -5989,7 +5992,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fucomi_ST0_FT0(cpu_env); gen_helper_fpop(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x3e: /* fcomip */ if (s->cc_op != CC_OP_DYNAMIC) @@ -5997,7 +6000,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fcomi_ST0_FT0(cpu_env); gen_helper_fpop(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x10 ... 0x13: /* fcmovxx */ case 0x18 ... 0x1b: @@ -6277,13 +6280,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!s->pe) { /* real mode */ gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag)); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } else if (s->vm86) { if (s->iopl != 3) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag)); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } } else { if (s->cc_op != CC_OP_DYNAMIC) @@ -6291,7 +6294,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_jmp_im(pc_start - s->cs_base); gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag), tcg_const_i32(s->pc - s->cs_base)); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); } gen_eob(s); break; @@ -6483,7 +6486,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } } gen_pop_update(s); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); /* abort translation because TF/AC flag may change */ gen_jmp_im(s->pc - s->cs_base); gen_eob(s); @@ -6606,7 +6609,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); break; } - s->cc_op = CC_OP_SARB + ot; + set_cc_op(s, CC_OP_SARB + ot); if (op != 0) { if (mod != 3) gen_op_st_T0_A0(ot + s->mem_index); @@ -6653,7 +6656,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_movi_tl(cpu_cc_dst, 1); gen_set_label(label1); tcg_gen_discard_tl(cpu_cc_src); - s->cc_op = CC_OP_LOGICB + ot; + set_cc_op(s, CC_OP_LOGICB + ot); } tcg_temp_free(t0); } @@ -6666,7 +6669,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_helper_daa(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x2f: /* das */ if (CODE64(s)) @@ -6674,7 +6677,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_helper_das(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x37: /* aaa */ if (CODE64(s)) @@ -6682,7 +6685,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_helper_aaa(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x3f: /* aas */ if (CODE64(s)) @@ -6690,7 +6693,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cc_op != CC_OP_DYNAMIC) gen_op_set_cc_op(s->cc_op); gen_helper_aas(cpu_env); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0xd4: /* aam */ if (CODE64(s)) @@ -6700,7 +6703,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base); } else { gen_helper_aam(cpu_env, tcg_const_i32(val)); - s->cc_op = CC_OP_LOGICB; + set_cc_op(s, CC_OP_LOGICB); } break; case 0xd5: /* aad */ @@ -6708,7 +6711,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, goto illegal_op; val = cpu_ldub_code(env, s->pc++); gen_helper_aad(cpu_env, tcg_const_i32(val)); - s->cc_op = CC_OP_LOGICB; + set_cc_op(s, CC_OP_LOGICB); break; /************************/ /* misc */ @@ -6967,8 +6970,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_jmp_im(pc_start - s->cs_base); gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag)); /* condition codes are modified only in long mode */ - if (s->lma) - s->cc_op = CC_OP_EFLAGS; + if (s->lma) { + set_cc_op(s, CC_OP_EFLAGS); + } gen_eob(s); } break; @@ -7053,7 +7057,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } else { gen_helper_verw(cpu_env, cpu_T[0]); } - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; default: goto illegal_op; @@ -7438,7 +7442,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1); gen_op_mov_reg_v(ot, reg, t0); gen_set_label(label1); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); tcg_temp_free(t0); } break; @@ -7681,7 +7685,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot)); gen_op_mov_reg_T0(ot, reg); - s->cc_op = CC_OP_EFLAGS; + set_cc_op(s, CC_OP_EFLAGS); break; case 0x10e ... 0x10f: /* 3DNow! instructions, ignore prefixes */ From e207582f6660e0e2d10a2e79e664e456e80b2887 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 12:34:26 -0800 Subject: [PATCH 15/61] target-i386: Don't clobber s->cc_op in gen_update_cc_op Use a dirty flag to know whether env->cc_op is up to date, rather than forcing s->cc_op to DYNAMIC and losing info. Signed-off-by: Richard Henderson --- target-i386/translate.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 6df76d6a389d..cabdeda371af 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -90,6 +90,7 @@ typedef struct DisasContext { #endif int ss32; /* 32 bit stack segment */ CCOp cc_op; /* current CC operation */ + bool cc_op_dirty; int addseg; /* non zero if either DS/ES/SS have a non zero base */ int f_st; /* currently unused */ int vm86; /* vm86 mode */ @@ -173,9 +174,27 @@ enum { OR_A0, /* temporary register used when doing address evaluation */ }; -static inline void set_cc_op(DisasContext *s, CCOp op) +static void set_cc_op(DisasContext *s, CCOp op) { - s->cc_op = op; + if (s->cc_op != op) { + s->cc_op = op; + /* The DYNAMIC setting is translator only, and should never be + stored. Thus we always consider it clean. */ + s->cc_op_dirty = (op != CC_OP_DYNAMIC); + } +} + +static inline void gen_op_set_cc_op(int32_t val) +{ + tcg_gen_movi_i32(cpu_cc_op, val); +} + +static void gen_update_cc_op(DisasContext *s) +{ + if (s->cc_op_dirty) { + gen_op_set_cc_op(s->cc_op); + s->cc_op_dirty = false; + } } static inline void gen_op_movl_T0_0(void) @@ -444,11 +463,6 @@ static inline void gen_op_add_reg_T0(int size, int reg) } } -static inline void gen_op_set_cc_op(int32_t val) -{ - tcg_gen_movi_i32(cpu_cc_op, val); -} - static inline void gen_op_addl_A0_reg_sN(int shift, int reg) { tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]); @@ -800,14 +814,6 @@ static inline void gen_movs(DisasContext *s, int ot) gen_op_add_reg_T0(s->aflag, R_EDI); } -static inline void gen_update_cc_op(DisasContext *s) -{ - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - set_cc_op(s, CC_OP_DYNAMIC); - } -} - static void gen_op_update1_cc(void) { tcg_gen_discard_tl(cpu_cc_src); @@ -7816,6 +7822,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, dc->tf = (flags >> TF_SHIFT) & 1; dc->singlestep_enabled = env->singlestep_enabled; dc->cc_op = CC_OP_DYNAMIC; + dc->cc_op_dirty = false; dc->cs_base = cs_base; dc->tb = tb; dc->popl_esp_hack = 0; From 773cdfccb835cc82aca2b2ff34277b4bf58d6bb9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 12:43:12 -0800 Subject: [PATCH 16/61] target-i386: Use gen_update_cc_op everywhere All of the conditional calls to gen_op_set_cc_op go away, and gen_op_set_cc_op itself gets inlined into its only remaining caller. Signed-off-by: Richard Henderson --- target-i386/translate.c | 177 +++++++++++++--------------------------- 1 file changed, 57 insertions(+), 120 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index cabdeda371af..9dd3081a8f4e 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -184,15 +184,10 @@ static void set_cc_op(DisasContext *s, CCOp op) } } -static inline void gen_op_set_cc_op(int32_t val) -{ - tcg_gen_movi_i32(cpu_cc_op, val); -} - static void gen_update_cc_op(DisasContext *s) { if (s->cc_op_dirty) { - gen_op_set_cc_op(s->cc_op); + tcg_gen_movi_i32(cpu_cc_op, s->cc_op); s->cc_op_dirty = false; } } @@ -771,8 +766,7 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, state_saved = 0; if (s->pe && (s->cpl > s->iopl || s->vm86)) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); state_saved = 1; tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); @@ -790,8 +784,7 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip, } if(s->flags & HF_SVMI_MASK) { if (!state_saved) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); } svm_flags |= (1 << (4 + ot)); @@ -847,9 +840,7 @@ static void gen_op_update_neg_cc(void) /* compute eflags.C to reg */ static void gen_compute_eflags_c(DisasContext *s, TCGv reg) { - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - } + gen_update_cc_op(s); gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); } @@ -857,9 +848,7 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg) /* compute all eflags to reg */ static void gen_compute_eflags(DisasContext *s, TCGv reg) { - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - } + gen_update_cc_op(s); gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); if (TCGV_EQUAL(reg, cpu_cc_src)) { tcg_gen_discard_tl(cpu_cc_dst); @@ -1215,7 +1204,7 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ l2 = gen_jz_ecx_string(s, next_eip); \ gen_ ## op(s, ot); \ gen_op_add_reg_im(s->aflag, R_ECX, -1); \ - gen_op_set_cc_op(s->cc_op); \ + gen_update_cc_op(s); \ gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ @@ -1449,10 +1438,8 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_T0(ot, op1); } - /* update eflags if non zero shift */ - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - } + /* update eflags */ + gen_update_cc_op(s); tcg_gen_mov_tl(t1, cpu_T[0]); @@ -1709,8 +1696,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, int is_right) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_compute_eflags(s, cpu_cc_src); assert(s->cc_op == CC_OP_EFLAGS); @@ -1869,8 +1855,7 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, } /* update eflags */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); label2 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); @@ -2294,9 +2279,7 @@ static inline void gen_jcc(DisasContext *s, int b, { int l1, l2; - if (s->cc_op != CC_OP_DYNAMIC) { - gen_op_set_cc_op(s->cc_op); - } + gen_update_cc_op(s); if (s->jmp_opt) { l1 = gen_new_label(); gen_jcc1(s, b, l1); @@ -2375,8 +2358,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip) { if (s->pe && !s->vm86) { /* XXX: optimize by finding processor state dynamically */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32); @@ -2405,8 +2387,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start, /* no SVM activated; fast case */ if (likely(!(s->flags & HF_SVMI_MASK))) return; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type), tcg_const_i64(param)); @@ -2653,8 +2634,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno)); s->is_jmp = DISAS_TB_JUMP; @@ -2665,8 +2645,7 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) static void gen_interrupt(DisasContext *s, int intno, target_ulong cur_eip, target_ulong next_eip) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno), tcg_const_i32(next_eip - cur_eip)); @@ -2675,8 +2654,7 @@ static void gen_interrupt(DisasContext *s, int intno, static void gen_debug(DisasContext *s, target_ulong cur_eip) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(cur_eip); gen_helper_debug(cpu_env); s->is_jmp = DISAS_TB_JUMP; @@ -2686,8 +2664,7 @@ static void gen_debug(DisasContext *s, target_ulong cur_eip) if needed */ static void gen_eob(DisasContext *s) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); if (s->tb->flags & HF_INHIBIT_IRQ_MASK) { gen_helper_reset_inhibit_irq(cpu_env); } @@ -4695,8 +4672,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_ldu_T0_A0(OT_WORD + s->mem_index); do_lcall: if (s->pe && !s->vm86) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1], @@ -4722,8 +4698,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_ldu_T0_A0(OT_WORD + s->mem_index); do_ljmp: if (s->pe && !s->vm86) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1], @@ -4967,8 +4942,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) goto illegal_op; gen_jmp_im(pc_start - s->cs_base); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_cmpxchg16b(cpu_env, cpu_A0); } else @@ -4977,8 +4951,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_features & CPUID_CX8)) goto illegal_op; gen_jmp_im(pc_start - s->cs_base); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_cmpxchg8b(cpu_env, cpu_A0); } @@ -5651,8 +5624,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x0c: /* fldenv mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; @@ -5662,8 +5634,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_helper_fldcw(cpu_env, cpu_tmp2_i32); break; case 0x0e: /* fnstenv mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; @@ -5673,27 +5644,23 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_T0_A0(OT_WORD + s->mem_index); break; case 0x1d: /* fldt mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fldt_ST0(cpu_env, cpu_A0); break; case 0x1f: /* fstpt mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fstt_ST0(cpu_env, cpu_A0); gen_helper_fpop(cpu_env); break; case 0x2c: /* frstor mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; case 0x2e: /* fnsave mem */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag)); break; @@ -5703,14 +5670,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_st_T0_A0(OT_WORD + s->mem_index); break; case 0x3c: /* fbld */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fbld_ST0(cpu_env, cpu_A0); break; case 0x3e: /* fbstp */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fbst_ST0(cpu_env, cpu_A0); gen_helper_fpop(cpu_env); @@ -5748,8 +5713,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, switch(rm) { case 0: /* fnop */ /* check exceptions (FreeBSD FPU probe) */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fwait(cpu_env); break; @@ -5930,15 +5894,13 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x1d: /* fucomi */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fucomi_ST0_FT0(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x1e: /* fcomi */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fcomi_ST0_FT0(cpu_env); set_cc_op(s, CC_OP_EFLAGS); @@ -5993,16 +5955,14 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x3d: /* fucomip */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fucomi_ST0_FT0(cpu_env); gen_helper_fpop(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x3e: /* fcomip */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); gen_helper_fcomi_ST0_FT0(cpu_env); gen_helper_fpop(cpu_env); @@ -6255,8 +6215,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, s->pc += 2; do_lret: if (s->pe && !s->vm86) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag), tcg_const_i32(val)); @@ -6295,8 +6254,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, set_cc_op(s, CC_OP_EFLAGS); } } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag), tcg_const_i32(s->pc - s->cs_base)); @@ -6434,8 +6392,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->vm86 && s->iopl != 3) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_read_eflags(cpu_T[0], cpu_env); gen_push_T0(s); } @@ -6672,32 +6629,28 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x27: /* daa */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_daa(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x2f: /* das */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_das(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x37: /* aaa */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_aaa(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x3f: /* aas */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_helper_aas(cpu_env); set_cc_op(s, CC_OP_EFLAGS); break; @@ -6739,8 +6692,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, (HF_MP_MASK | HF_TS_MASK)) { gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fwait(cpu_env); } @@ -6759,8 +6711,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0xce: /* into */ if (CODE64(s)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start)); break; @@ -6906,8 +6857,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (b & 2) { gen_helper_rdmsr(cpu_env); @@ -6917,8 +6867,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x131: /* rdtsc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (use_icount) gen_io_start(); @@ -6929,8 +6878,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0x133: /* rdpmc */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_rdpmc(cpu_env); break; @@ -6984,8 +6932,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; #endif case 0x1a2: /* cpuid */ - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_cpuid(cpu_env); break; @@ -6993,8 +6940,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start)); s->is_jmp = DISAS_TB_JUMP; @@ -7056,8 +7002,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!s->pe || s->vm86) goto illegal_op; gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); if (op == 4) { gen_helper_verr(cpu_env, cpu_T[0]); } else { @@ -7095,8 +7040,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); #ifdef TARGET_X86_64 if (s->aflag == 2) { @@ -7156,8 +7100,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 2: /* lgdt */ case 3: /* lidt */ if (mod == 3) { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); switch(rm) { case 0: /* VMRUN */ @@ -7285,8 +7228,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (s->cpl != 0) { gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } else { - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); gen_helper_invlpg(cpu_env, cpu_A0); @@ -7319,8 +7261,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 1: /* rdtscp */ if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) goto illegal_op; - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (use_icount) gen_io_start(); @@ -7436,8 +7377,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, reg = ((modrm >> 3) & 7) | rex_r; gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0); t0 = tcg_temp_local_new(); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); if (b == 0x102) { gen_helper_lar(t0, cpu_env, cpu_T[0]); } else { @@ -7502,8 +7442,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 3: case 4: case 8: - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); if (b & 2) { gen_op_mov_TN_reg(ot, 0, rm); @@ -7592,8 +7531,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; } gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2))); break; @@ -7606,8 +7544,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, break; } gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - if (s->cc_op != CC_OP_DYNAMIC) - gen_op_set_cc_op(s->cc_op); + gen_update_cc_op(s); gen_jmp_im(pc_start - s->cs_base); gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2))); From 1608ecca95188dcf4f78072be48f41dbe2062b25 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Oct 2012 18:42:59 +0200 Subject: [PATCH 17/61] target-i386: add helper functions to get other flags Introduce new functions to extract PF, SF, OF, ZF in addition to CF. These provide single entry points for optimizing accesses to a single flag. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 48 ++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 9dd3081a8f4e..9bbe969cd646 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -857,21 +857,49 @@ static void gen_compute_eflags(DisasContext *s, TCGv reg) tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); } +/* compute eflags.P to reg */ +static void gen_compute_eflags_p(DisasContext *s, TCGv reg) +{ + gen_compute_eflags(s, reg); + tcg_gen_shri_tl(reg, reg, 2); + tcg_gen_andi_tl(reg, reg, 1); +} + +/* compute eflags.S to reg */ +static void gen_compute_eflags_s(DisasContext *s, TCGv reg) +{ + gen_compute_eflags(s, reg); + tcg_gen_shri_tl(reg, reg, 7); + tcg_gen_andi_tl(reg, reg, 1); +} + +/* compute eflags.O to reg */ +static void gen_compute_eflags_o(DisasContext *s, TCGv reg) +{ + gen_compute_eflags(s, reg); + tcg_gen_shri_tl(reg, reg, 11); + tcg_gen_andi_tl(reg, reg, 1); +} + +/* compute eflags.Z to reg */ +static void gen_compute_eflags_z(DisasContext *s, TCGv reg) +{ + gen_compute_eflags(s, reg); + tcg_gen_shri_tl(reg, reg, 6); + tcg_gen_andi_tl(reg, reg, 1); +} + static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) { switch(jcc_op) { case JCC_O: - gen_compute_eflags(s, cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 11); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + gen_compute_eflags_o(s, cpu_T[0]); break; case JCC_B: gen_compute_eflags_c(s, cpu_T[0]); break; case JCC_Z: - gen_compute_eflags(s, cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 6); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + gen_compute_eflags_z(s, cpu_T[0]); break; case JCC_BE: gen_compute_eflags(s, cpu_tmp0); @@ -880,14 +908,10 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_S: - gen_compute_eflags(s, cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 7); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + gen_compute_eflags_s(s, cpu_T[0]); break; case JCC_P: - gen_compute_eflags(s, cpu_T[0]); - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 2); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + gen_compute_eflags_p(s, cpu_T[0]); break; case JCC_L: gen_compute_eflags(s, cpu_tmp0); From d229edce1c58e6bb13d386bef4c31fc2e3850cb6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:03:26 -0800 Subject: [PATCH 18/61] target-i386: do not compute eflags multiple times consecutively After calling gen_compute_eflags, leave the computed value in cc_reg_src and set cc_op to CC_OP_EFLAGS. The next few patches will remove anyway most calls to gen_compute_eflags. As a result of this change it is more natural to remove the register argument from gen_compute_eflags and change all the callers. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 72 ++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 9bbe969cd646..6204764a5caf 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -845,47 +845,48 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg) tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); } -/* compute all eflags to reg */ -static void gen_compute_eflags(DisasContext *s, TCGv reg) +/* compute all eflags to cc_src */ +static void gen_compute_eflags(DisasContext *s) { + if (s->cc_op == CC_OP_EFLAGS) { + return; + } gen_update_cc_op(s); gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); - if (TCGV_EQUAL(reg, cpu_cc_src)) { - tcg_gen_discard_tl(cpu_cc_dst); - set_cc_op(s, CC_OP_EFLAGS); - } - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); + tcg_gen_discard_tl(cpu_cc_dst); + set_cc_op(s, CC_OP_EFLAGS); + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); } /* compute eflags.P to reg */ static void gen_compute_eflags_p(DisasContext *s, TCGv reg) { - gen_compute_eflags(s, reg); - tcg_gen_shri_tl(reg, reg, 2); + gen_compute_eflags(s); + tcg_gen_shri_tl(reg, cpu_cc_src, 2); tcg_gen_andi_tl(reg, reg, 1); } /* compute eflags.S to reg */ static void gen_compute_eflags_s(DisasContext *s, TCGv reg) { - gen_compute_eflags(s, reg); - tcg_gen_shri_tl(reg, reg, 7); + gen_compute_eflags(s); + tcg_gen_shri_tl(reg, cpu_cc_src, 7); tcg_gen_andi_tl(reg, reg, 1); } /* compute eflags.O to reg */ static void gen_compute_eflags_o(DisasContext *s, TCGv reg) { - gen_compute_eflags(s, reg); - tcg_gen_shri_tl(reg, reg, 11); + gen_compute_eflags(s); + tcg_gen_shri_tl(reg, cpu_cc_src, 11); tcg_gen_andi_tl(reg, reg, 1); } /* compute eflags.Z to reg */ static void gen_compute_eflags_z(DisasContext *s, TCGv reg) { - gen_compute_eflags(s, reg); - tcg_gen_shri_tl(reg, reg, 6); + gen_compute_eflags(s); + tcg_gen_shri_tl(reg, cpu_cc_src, 6); tcg_gen_andi_tl(reg, reg, 1); } @@ -902,9 +903,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) gen_compute_eflags_z(s, cpu_T[0]); break; case JCC_BE: - gen_compute_eflags(s, cpu_tmp0); - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 6); - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + gen_compute_eflags(s); + tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_S: @@ -914,18 +915,18 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) gen_compute_eflags_p(s, cpu_T[0]); break; case JCC_L: - gen_compute_eflags(s, cpu_tmp0); - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ - tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 7); /* CC_S */ + gen_compute_eflags(s); + tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; default: case JCC_LE: - gen_compute_eflags(s, cpu_tmp0); - tcg_gen_shri_tl(cpu_T[0], cpu_tmp0, 11); /* CC_O */ - tcg_gen_shri_tl(cpu_tmp4, cpu_tmp0, 7); /* CC_S */ - tcg_gen_shri_tl(cpu_tmp0, cpu_tmp0, 6); /* CC_Z */ + gen_compute_eflags(s); + tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */ + tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */ + tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */ tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4); tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); @@ -1619,7 +1620,7 @@ static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, } /* update eflags. It is needed anyway most of the time, do it always. */ - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); assert(s->cc_op == CC_OP_EFLAGS); label2 = gen_new_label(); @@ -1696,7 +1697,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, if (op2 != 0) { /* update eflags */ - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); assert(s->cc_op == CC_OP_EFLAGS); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); @@ -1720,8 +1721,7 @@ static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, int is_right) { - gen_update_cc_op(s); - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); assert(s->cc_op == CC_OP_EFLAGS); /* load */ @@ -6483,7 +6483,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; gen_op_mov_TN_reg(OT_BYTE, 0, R_AH); - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); tcg_gen_andi_tl(cpu_T[0], cpu_T[0], CC_S | CC_Z | CC_A | CC_P | CC_C); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_T[0]); @@ -6491,21 +6491,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x9f: /* lahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; - gen_compute_eflags(s, cpu_T[0]); + gen_compute_eflags(s); /* Note: gen_compute_eflags() only gives the condition codes */ - tcg_gen_ori_tl(cpu_T[0], cpu_T[0], 0x02); + tcg_gen_ori_tl(cpu_T[0], cpu_cc_src, 0x02); gen_op_mov_reg_T0(OT_BYTE, R_AH); break; case 0xf5: /* cmc */ - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); break; case 0xf8: /* clc */ - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); break; case 0xf9: /* stc */ - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); break; case 0xfc: /* cld */ @@ -7381,7 +7381,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } else { gen_op_mov_reg_v(ot, rm, t0); } - gen_compute_eflags(s, cpu_cc_src); + gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); tcg_temp_free(t0); From ccfcdd09bf91aabe039d2dae0b5ec3a05f083e59 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:07:10 -0800 Subject: [PATCH 19/61] target-i386: no need to flush out cc_op before gen_eob This makes code more similar to the other callers of gen_eob, especially loopz/loopnz/jcxz. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 6204764a5caf..71104fb92623 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2303,8 +2303,8 @@ static inline void gen_jcc(DisasContext *s, int b, { int l1, l2; - gen_update_cc_op(s); if (s->jmp_opt) { + gen_update_cc_op(s); l1 = gen_new_label(); gen_jcc1(s, b, l1); set_cc_op(s, CC_OP_DYNAMIC); @@ -2315,11 +2315,9 @@ static inline void gen_jcc(DisasContext *s, int b, gen_goto_tb(s, 1, val); s->is_jmp = DISAS_TB_JUMP; } else { - l1 = gen_new_label(); l2 = gen_new_label(); gen_jcc1(s, b, l1); - set_cc_op(s, CC_OP_DYNAMIC); gen_jmp_im(next_eip); tcg_gen_br(l2); From b666265b2071e4288110f6553b598efe00246d06 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:26:38 -0800 Subject: [PATCH 20/61] target-i386: Move CC discards to set_cc_op This gets us universal coverage, rather than scattering discards around at various places. As a bonus, we do not emit redundant discards e.g. between sequential logic insns. Signed-off-by: Richard Henderson --- target-i386/translate.c | 48 +++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 71104fb92623..a767b50b57ee 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -174,14 +174,48 @@ enum { OR_A0, /* temporary register used when doing address evaluation */ }; +enum { + USES_CC_DST = 1, + USES_CC_SRC = 2, +}; + +/* Bit set if the global variable is live after setting CC_OP to X. */ +static const uint8_t cc_op_live[CC_OP_NB] = { + [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC, + [CC_OP_EFLAGS] = USES_CC_SRC, + [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST, + [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC, +}; + static void set_cc_op(DisasContext *s, CCOp op) { - if (s->cc_op != op) { - s->cc_op = op; - /* The DYNAMIC setting is translator only, and should never be - stored. Thus we always consider it clean. */ - s->cc_op_dirty = (op != CC_OP_DYNAMIC); + int dead; + + if (s->cc_op == op) { + return; + } + + /* Discard CC computation that will no longer be used. */ + dead = cc_op_live[s->cc_op] & ~cc_op_live[op]; + if (dead & USES_CC_DST) { + tcg_gen_discard_tl(cpu_cc_dst); } + if (dead & USES_CC_SRC) { + tcg_gen_discard_tl(cpu_cc_src); + } + + s->cc_op = op; + /* The DYNAMIC setting is translator only, and should never be + stored. Thus we always consider it clean. */ + s->cc_op_dirty = (op != CC_OP_DYNAMIC); } static void gen_update_cc_op(DisasContext *s) @@ -809,7 +843,6 @@ static inline void gen_movs(DisasContext *s, int ot) static void gen_op_update1_cc(void) { - tcg_gen_discard_tl(cpu_cc_src); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } @@ -827,7 +860,6 @@ static inline void gen_op_cmpl_T0_T1_cc(void) static inline void gen_op_testl_T0_T1_cc(void) { - tcg_gen_discard_tl(cpu_cc_src); tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); } @@ -853,7 +885,6 @@ static void gen_compute_eflags(DisasContext *s) } gen_update_cc_op(s); gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); - tcg_gen_discard_tl(cpu_cc_dst); set_cc_op(s, CC_OP_EFLAGS); tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); } @@ -6640,7 +6671,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_T0(ot, reg); tcg_gen_movi_tl(cpu_cc_dst, 1); gen_set_label(label1); - tcg_gen_discard_tl(cpu_cc_src); set_cc_op(s, CC_OP_LOGICB + ot); } tcg_temp_free(t0); From 086c40778485f9a52d41a66fd4ef0d8723a2ac0a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:33:59 -0800 Subject: [PATCH 21/61] target-i386: do not call helper to compute ZF/SF ZF, SF and PF can always be computed from CC_DST except in the CC_OP_EFLAGS case (and CC_OP_DYNAMIC, which just resolves to CC_OP_EFLAGS in gen_compute_eflags). Use setcond to compute ZF and SF. We could also use a table lookup to compute PF. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index a767b50b57ee..026fbd685297 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -900,9 +900,22 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg) /* compute eflags.S to reg */ static void gen_compute_eflags_s(DisasContext *s, TCGv reg) { - gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 7); - tcg_gen_andi_tl(reg, reg, 1); + switch (s->cc_op) { + case CC_OP_DYNAMIC: + gen_compute_eflags(s); + /* FALLTHRU */ + case CC_OP_EFLAGS: + tcg_gen_shri_tl(reg, cpu_cc_src, 7); + tcg_gen_andi_tl(reg, reg, 1); + break; + default: + { + int size = (s->cc_op - CC_OP_ADDB) & 3; + TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true); + tcg_gen_setcondi_tl(TCG_COND_LT, reg, t0, 0); + } + break; + } } /* compute eflags.O to reg */ @@ -916,9 +929,21 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg) /* compute eflags.Z to reg */ static void gen_compute_eflags_z(DisasContext *s, TCGv reg) { - gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 6); - tcg_gen_andi_tl(reg, reg, 1); + switch (s->cc_op) { + case CC_OP_DYNAMIC: + gen_compute_eflags(s); + /* FALLTHRU */ + case CC_OP_EFLAGS: + tcg_gen_shri_tl(reg, cpu_cc_src, 6); + tcg_gen_andi_tl(reg, reg, 1); + break; + default: + { + int size = (s->cc_op - CC_OP_ADDB) & 3; + TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); + tcg_gen_setcondi_tl(TCG_COND_EQ, reg, t0, 0); + } + } } static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) From 8115f117357a63bff84522caac6c3bcadee0a285 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:37:28 -0800 Subject: [PATCH 22/61] target-i386: use inverted setcond when computing NS or NZ Make gen_compute_eflags_z and gen_compute_eflags_s able to compute the inverted condition, and use this in gen_setcc_slow_T0. We cannot do it yet in gen_compute_eflags_c, but prepare the code for it anyway. It is not worthwhile for PF, as usual. shr+and+xor could be replaced by and+setcond. I'm not doing it yet. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 49 ++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 026fbd685297..06aa7bf639e1 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -870,11 +870,14 @@ static void gen_op_update_neg_cc(void) } /* compute eflags.C to reg */ -static void gen_compute_eflags_c(DisasContext *s, TCGv reg) +static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) { gen_update_cc_op(s); gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); + if (inv) { + tcg_gen_xori_tl(reg, reg, 1); + } } /* compute all eflags to cc_src */ @@ -898,7 +901,7 @@ static void gen_compute_eflags_p(DisasContext *s, TCGv reg) } /* compute eflags.S to reg */ -static void gen_compute_eflags_s(DisasContext *s, TCGv reg) +static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv) { switch (s->cc_op) { case CC_OP_DYNAMIC: @@ -907,12 +910,15 @@ static void gen_compute_eflags_s(DisasContext *s, TCGv reg) case CC_OP_EFLAGS: tcg_gen_shri_tl(reg, cpu_cc_src, 7); tcg_gen_andi_tl(reg, reg, 1); + if (inv) { + tcg_gen_xori_tl(reg, reg, 1); + } break; default: { int size = (s->cc_op - CC_OP_ADDB) & 3; TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true); - tcg_gen_setcondi_tl(TCG_COND_LT, reg, t0, 0); + tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, t0, 0); } break; } @@ -927,7 +933,7 @@ static void gen_compute_eflags_o(DisasContext *s, TCGv reg) } /* compute eflags.Z to reg */ -static void gen_compute_eflags_z(DisasContext *s, TCGv reg) +static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv) { switch (s->cc_op) { case CC_OP_DYNAMIC: @@ -936,27 +942,33 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg) case CC_OP_EFLAGS: tcg_gen_shri_tl(reg, cpu_cc_src, 6); tcg_gen_andi_tl(reg, reg, 1); + if (inv) { + tcg_gen_xori_tl(reg, reg, 1); + } break; default: { int size = (s->cc_op - CC_OP_ADDB) & 3; TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); - tcg_gen_setcondi_tl(TCG_COND_EQ, reg, t0, 0); + tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, t0, 0); } + break; } } -static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) +static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv) { switch(jcc_op) { case JCC_O: gen_compute_eflags_o(s, cpu_T[0]); break; case JCC_B: - gen_compute_eflags_c(s, cpu_T[0]); + gen_compute_eflags_c(s, cpu_T[0], inv); + inv = false; break; case JCC_Z: - gen_compute_eflags_z(s, cpu_T[0]); + gen_compute_eflags_z(s, cpu_T[0], inv); + inv = false; break; case JCC_BE: gen_compute_eflags(s); @@ -965,7 +977,8 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; case JCC_S: - gen_compute_eflags_s(s, cpu_T[0]); + gen_compute_eflags_s(s, cpu_T[0], inv); + inv = false; break; case JCC_P: gen_compute_eflags_p(s, cpu_T[0]); @@ -988,6 +1001,9 @@ static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op) tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); break; } + if (inv) { + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); + } } /* return true if setcc_slow is not needed (WARNING: must be kept in @@ -1153,7 +1169,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1) break; default: slow_jcc: - gen_setcc_slow_T0(s, jcc_op); + gen_setcc_slow_T0(s, jcc_op, false); tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_T[0], 0, l1); break; @@ -1367,7 +1383,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) } switch(op) { case OP_ADCL: - gen_compute_eflags_c(s1, cpu_tmp4); + gen_compute_eflags_c(s1, cpu_tmp4, false); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) @@ -1382,7 +1398,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) set_cc_op(s1, CC_OP_DYNAMIC); break; case OP_SBBL: - gen_compute_eflags_c(s1, cpu_tmp4); + gen_compute_eflags_c(s1, cpu_tmp4, false); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) @@ -1456,7 +1472,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_mov_TN_reg(ot, 0, d); else gen_op_ld_T0_A0(ot + s1->mem_index); - gen_compute_eflags_c(s1, cpu_cc_src); + gen_compute_eflags_c(s1, cpu_cc_src, false); if (c > 0) { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); set_cc_op(s1, CC_OP_INCB + ot); @@ -2407,10 +2423,7 @@ static void gen_setcc(DisasContext *s, int b) worth to */ inv = b & 1; jcc_op = (b >> 1) & 7; - gen_setcc_slow_T0(s, jcc_op); - if (inv) { - tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); - } + gen_setcc_slow_T0(s, jcc_op, inv); } } @@ -6881,7 +6894,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0xd6: /* salc */ if (CODE64(s)) goto illegal_op; - gen_compute_eflags_c(s, cpu_T[0]); + gen_compute_eflags_c(s, cpu_T[0], false); tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); gen_op_mov_reg_T0(OT_BYTE, R_EAX); break; From 06847f1f1a7cff71f68dc6416cdd729c01ae2305 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:46:02 -0800 Subject: [PATCH 23/61] target-i386: convert gen_compute_eflags_c to TCG Do the switch at translation time, converting the helper templates to TCG opcodes. In some cases CF can be computed with a single setcond, though others it may require a little more work. In the CC_OP_DYNAMIC case, compute the whole EFLAGS, same as for ZF/SF/PF. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 109 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 98 insertions(+), 11 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 06aa7bf639e1..ea1b003749b4 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -869,17 +869,6 @@ static void gen_op_update_neg_cc(void) tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } -/* compute eflags.C to reg */ -static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) -{ - gen_update_cc_op(s); - gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); - if (inv) { - tcg_gen_xori_tl(reg, reg, 1); - } -} - /* compute all eflags to cc_src */ static void gen_compute_eflags(DisasContext *s) { @@ -892,6 +881,104 @@ static void gen_compute_eflags(DisasContext *s) tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); } +/* compute eflags.C to reg */ +static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) +{ + TCGv t0, t1; + int size; + + switch (s->cc_op) { + case CC_OP_SUBB ... CC_OP_SUBQ: + /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */ + size = s->cc_op - CC_OP_SUBB; + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + /* If no temporary was used, be careful not to alias t1 and t0. */ + t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg; + tcg_gen_add_tl(t0, cpu_cc_dst, cpu_cc_src); + gen_extu(size, t0); + goto add_sub; + + case CC_OP_ADDB ... CC_OP_ADDQ: + /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */ + size = s->cc_op - CC_OP_ADDB; + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); + add_sub: + tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1); + inv = false; + break; + + case CC_OP_SBBB ... CC_OP_SBBQ: + /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */ + size = s->cc_op - CC_OP_SBBB; + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) { + tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src); + t1 = cpu_tmp0; + } + + tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src); + tcg_gen_addi_tl(reg, reg, 1); + gen_extu(size, reg); + t0 = reg; + goto adc_sbb; + + case CC_OP_ADCB ... CC_OP_ADCQ: + /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */ + size = s->cc_op - CC_OP_ADCB; + t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); + adc_sbb: + tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1); + inv = false; + break; + + case CC_OP_LOGICB ... CC_OP_LOGICQ: + tcg_gen_movi_tl(reg, 0); + break; + + case CC_OP_INCB ... CC_OP_INCQ: + case CC_OP_DECB ... CC_OP_DECQ: + if (inv) { + tcg_gen_xori_tl(reg, cpu_cc_src, 1); + } else { + tcg_gen_mov_tl(reg, cpu_cc_src); + } + inv = false; + break; + + case CC_OP_SHLB ... CC_OP_SHLQ: + /* (CC_SRC >> (DATA_BITS - 1)) & 1 */ + size = s->cc_op - CC_OP_SHLB; + tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1); + tcg_gen_andi_tl(reg, reg, 1); + break; + + case CC_OP_MULB ... CC_OP_MULQ: + tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, + reg, cpu_cc_src, 0); + inv = false; + break; + + case CC_OP_EFLAGS: + case CC_OP_SARB ... CC_OP_SARQ: + /* CC_SRC & 1 */ + tcg_gen_andi_tl(reg, cpu_cc_src, 1); + break; + + default: + /* The need to compute only C from CC_OP_DYNAMIC is important + in efficiently implementing e.g. INC at the start of a TB. */ + gen_update_cc_op(s); + gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); + tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); + break; + } + if (inv) { + tcg_gen_xori_tl(reg, reg, 1); + } +} + /* compute eflags.P to reg */ static void gen_compute_eflags_p(DisasContext *s, TCGv reg) { From 1a5c635947e60167c4626dd274531b8b0eacc2e5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Oct 2012 22:54:34 +0200 Subject: [PATCH 24/61] target-i386: change gen_setcc_slow_T0 to gen_setcc_slow Do not hard code the destination register. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index ea1b003749b4..c510732765a6 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1043,53 +1043,54 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv) } } -static inline void gen_setcc_slow_T0(DisasContext *s, int jcc_op, bool inv) +static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv) { + assert(!TCGV_EQUAL(reg, cpu_cc_src)); switch(jcc_op) { case JCC_O: - gen_compute_eflags_o(s, cpu_T[0]); + gen_compute_eflags_o(s, reg); break; case JCC_B: - gen_compute_eflags_c(s, cpu_T[0], inv); + gen_compute_eflags_c(s, reg, inv); inv = false; break; case JCC_Z: - gen_compute_eflags_z(s, cpu_T[0], inv); + gen_compute_eflags_z(s, reg, inv); inv = false; break; case JCC_BE: gen_compute_eflags(s); - tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 6); - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_cc_src); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_shri_tl(reg, cpu_cc_src, 6); + tcg_gen_or_tl(reg, reg, cpu_cc_src); + tcg_gen_andi_tl(reg, reg, 1); break; case JCC_S: - gen_compute_eflags_s(s, cpu_T[0], inv); + gen_compute_eflags_s(s, reg, inv); inv = false; break; case JCC_P: - gen_compute_eflags_p(s, cpu_T[0]); + gen_compute_eflags_p(s, reg); break; case JCC_L: gen_compute_eflags(s); - tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */ + tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */ tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */ - tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp0); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_xor_tl(reg, reg, cpu_tmp0); + tcg_gen_andi_tl(reg, reg, 1); break; default: case JCC_LE: gen_compute_eflags(s); - tcg_gen_shri_tl(cpu_T[0], cpu_cc_src, 11); /* CC_O */ + tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */ tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */ tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */ - tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_tmp4); - tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); - tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_xor_tl(reg, reg, cpu_tmp4); + tcg_gen_or_tl(reg, reg, cpu_tmp0); + tcg_gen_andi_tl(reg, reg, 1); break; } if (inv) { - tcg_gen_xori_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_xori_tl(reg, reg, 1); } } @@ -1256,7 +1257,7 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1) break; default: slow_jcc: - gen_setcc_slow_T0(s, jcc_op, false); + gen_setcc_slow(s, jcc_op, cpu_T[0], false); tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, cpu_T[0], 0, l1); break; @@ -2510,7 +2511,7 @@ static void gen_setcc(DisasContext *s, int b) worth to */ inv = b & 1; jcc_op = (b >> 1) & 7; - gen_setcc_slow_T0(s, jcc_op, inv); + gen_setcc_slow(s, jcc_op, cpu_T[0], inv); } } From 2cb4764577f270eec259123955a6396ad6a2f161 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:49:43 -0800 Subject: [PATCH 25/61] target-i386: optimize setbe This is looking at EFLAGS, but it can do so more efficiently with setcond. Reviewed-by: Blue Swirl Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index c510732765a6..dab69839a782 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1060,10 +1060,9 @@ static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv) break; case JCC_BE: gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 6); - tcg_gen_or_tl(reg, reg, cpu_cc_src); - tcg_gen_andi_tl(reg, reg, 1); - break; + tcg_gen_andi_tl(reg, cpu_cc_src, CC_Z | CC_C); + tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0); + return; case JCC_S: gen_compute_eflags_s(s, reg, inv); inv = false; From be10b289d697420b6e0d8d1a681aa64555066639 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 13:53:51 -0800 Subject: [PATCH 26/61] target-i386: optimize setle And allow gen_setcc_slow to operate on cpu_cc_src. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index dab69839a782..fea43c702e0d 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1045,7 +1045,6 @@ static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv) static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv) { - assert(!TCGV_EQUAL(reg, cpu_cc_src)); switch(jcc_op) { case JCC_O: gen_compute_eflags_o(s, reg); @@ -1072,20 +1071,18 @@ static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv) break; case JCC_L: gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */ - tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 7); /* CC_S */ + tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 11); /* CC_O */ + tcg_gen_shri_tl(reg, cpu_cc_src, 7); /* CC_S */ tcg_gen_xor_tl(reg, reg, cpu_tmp0); tcg_gen_andi_tl(reg, reg, 1); break; default: case JCC_LE: gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 11); /* CC_O */ - tcg_gen_shri_tl(cpu_tmp4, cpu_cc_src, 7); /* CC_S */ - tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 6); /* CC_Z */ - tcg_gen_xor_tl(reg, reg, cpu_tmp4); - tcg_gen_or_tl(reg, reg, cpu_tmp0); - tcg_gen_andi_tl(reg, reg, 1); + tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, cpu_tmp0, cpu_cc_src); + tcg_gen_andi_tl(reg, reg, CC_S | CC_Z); + tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0); break; } if (inv) { From c365395e9bd2b3bcac48ef562c187ea6ab9820ad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 5 Oct 2012 23:00:10 +0200 Subject: [PATCH 27/61] target-i386: optimize setcc instructions Reconstruct the arguments for complex conditions involving CC_OP_SUBx (BE, L, LE). In the others do it via setcond and gen_setcc_slow (which is not that slow in many cases). Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 95 ++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 58 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index fea43c702e0d..5c9211f7dba7 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1090,55 +1090,55 @@ static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv) } } -/* return true if setcc_slow is not needed (WARNING: must be kept in - sync with gen_jcc1) */ -static int is_fast_jcc_case(DisasContext *s, int b) +/* perform a conditional store into register 'reg' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) { - int jcc_op; + int inv, jcc_op, size, cond; + TCGv t0; + + inv = b & 1; jcc_op = (b >> 1) & 7; - switch(s->cc_op) { - /* we optimize the cmp/jcc case */ + + switch (s->cc_op) { + /* we optimize relational operators for the cmp/jcc case */ case CC_OP_SUBB: case CC_OP_SUBW: case CC_OP_SUBL: case CC_OP_SUBQ: - if (jcc_op == JCC_O || jcc_op == JCC_P) - goto slow_jcc; - break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: + size = s->cc_op - CC_OP_SUBB; + switch (jcc_op) { + case JCC_BE: + cond = inv ? TCG_COND_GTU : TCG_COND_LEU; + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + gen_extu(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); + break; - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: + case JCC_L: + cond = inv ? TCG_COND_GE : TCG_COND_LT; + goto fast_jcc_l; + case JCC_LE: + cond = inv ? TCG_COND_GT : TCG_COND_LE; + fast_jcc_l: + tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + gen_exts(size, cpu_tmp4); + t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); + tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); + break; - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - if (jcc_op != JCC_Z && jcc_op != JCC_S) + default: goto slow_jcc; + } break; + default: slow_jcc: - return 0; + /* gen_setcc_slow actually generates good code for JC, JZ and JS */ + gen_setcc_slow(s, jcc_op, reg, inv); + break; } - return 1; } /* generate a conditional jump to label 'l1' according to jump opcode @@ -2487,28 +2487,7 @@ static inline void gen_jcc(DisasContext *s, int b, static void gen_setcc(DisasContext *s, int b) { - int inv, jcc_op, l1; - TCGv t0; - - if (is_fast_jcc_case(s, b)) { - /* nominal case: we use a jump */ - /* XXX: make it faster by adding new instructions in TCG */ - t0 = tcg_temp_local_new(); - tcg_gen_movi_tl(t0, 0); - l1 = gen_new_label(); - gen_jcc1(s, b ^ 1, l1); - tcg_gen_movi_tl(t0, 1); - gen_set_label(l1); - tcg_gen_mov_tl(cpu_T[0], t0); - tcg_temp_free(t0); - } else { - /* slow case: it is more efficient not to generate a jump, - although it is questionnable whether this optimization is - worth to */ - inv = b & 1; - jcc_op = (b >> 1) & 7; - gen_setcc_slow(s, jcc_op, cpu_T[0], inv); - } + gen_setcc1(s, b, cpu_T[0]); } static inline void gen_op_movl_T0_seg(int seg_reg) From bec93d7283b635aabaf0bbff67b6da7fc99e020a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 14:21:52 -0800 Subject: [PATCH 28/61] target-i386: introduce CCPrepare Introduce a struct that describes how to build a *cond operation that checks for a given x86 condition code. For now, just change gen_compute_eflags_* to return the new struct, generate code for the CCPrepare struct, and go on as before. [rth: Use ctz with the proper width rather than ffs.] Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 147 +++++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 54 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 5c9211f7dba7..06f0fbced0f8 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -23,6 +23,7 @@ #include #include +#include "qemu/host-utils.h" #include "cpu.h" #include "disas/disas.h" #include "tcg-op.h" @@ -47,6 +48,14 @@ #define REX_B(s) 0 #endif +#ifdef TARGET_X86_64 +# define ctztl ctz64 +# define clztl clz64 +#else +# define ctztl ctz32 +# define clztl clz32 +#endif + //#define MACRO_TEST 1 /* global register indexes */ @@ -881,11 +890,21 @@ static void gen_compute_eflags(DisasContext *s) tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); } +typedef struct CCPrepare { + TCGCond cond; + TCGv reg; + TCGv reg2; + target_ulong imm; + target_ulong mask; + bool use_reg2; + bool no_setcond; +} CCPrepare; + /* compute eflags.C to reg */ -static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) +static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) { TCGv t0, t1; - int size; + int size, shift; switch (s->cc_op) { case CC_OP_SUBB ... CC_OP_SUBQ: @@ -904,9 +923,8 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); add_sub: - tcg_gen_setcond_tl(inv ? TCG_COND_GEU : TCG_COND_LTU, reg, t0, t1); - inv = false; - break; + return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0, + .reg2 = t1, .mask = -1, .use_reg2 = true }; case CC_OP_SBBB ... CC_OP_SBBQ: /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */ @@ -929,42 +947,33 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); adc_sbb: - tcg_gen_setcond_tl(inv ? TCG_COND_GTU : TCG_COND_LEU, reg, t0, t1); - inv = false; - break; + return (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0, + .reg2 = t1, .mask = -1, .use_reg2 = true }; case CC_OP_LOGICB ... CC_OP_LOGICQ: - tcg_gen_movi_tl(reg, 0); - break; + return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; case CC_OP_INCB ... CC_OP_INCQ: case CC_OP_DECB ... CC_OP_DECQ: - if (inv) { - tcg_gen_xori_tl(reg, cpu_cc_src, 1); - } else { - tcg_gen_mov_tl(reg, cpu_cc_src); - } - inv = false; - break; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = -1, .no_setcond = true }; case CC_OP_SHLB ... CC_OP_SHLQ: /* (CC_SRC >> (DATA_BITS - 1)) & 1 */ size = s->cc_op - CC_OP_SHLB; - tcg_gen_shri_tl(reg, cpu_cc_src, (8 << size) - 1); - tcg_gen_andi_tl(reg, reg, 1); - break; + shift = (8 << size) - 1; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = (target_ulong)1 << shift }; case CC_OP_MULB ... CC_OP_MULQ: - tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, - reg, cpu_cc_src, 0); - inv = false; - break; + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = -1 }; case CC_OP_EFLAGS: case CC_OP_SARB ... CC_OP_SARQ: /* CC_SRC & 1 */ - tcg_gen_andi_tl(reg, cpu_cc_src, 1); - break; + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = CC_C }; default: /* The need to compute only C from CC_OP_DYNAMIC is important @@ -972,74 +981,104 @@ static void gen_compute_eflags_c(DisasContext *s, TCGv reg, bool inv) gen_update_cc_op(s); gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); - break; - } - if (inv) { - tcg_gen_xori_tl(reg, reg, 1); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = -1, .no_setcond = true }; } } /* compute eflags.P to reg */ -static void gen_compute_eflags_p(DisasContext *s, TCGv reg) +static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg) { gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 2); - tcg_gen_andi_tl(reg, reg, 1); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_P }; } /* compute eflags.S to reg */ -static void gen_compute_eflags_s(DisasContext *s, TCGv reg, bool inv) +static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) { switch (s->cc_op) { case CC_OP_DYNAMIC: gen_compute_eflags(s); /* FALLTHRU */ case CC_OP_EFLAGS: - tcg_gen_shri_tl(reg, cpu_cc_src, 7); - tcg_gen_andi_tl(reg, reg, 1); - if (inv) { - tcg_gen_xori_tl(reg, reg, 1); - } - break; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_S }; default: { int size = (s->cc_op - CC_OP_ADDB) & 3; TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true); - tcg_gen_setcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, reg, t0, 0); + return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 }; } - break; } } /* compute eflags.O to reg */ -static void gen_compute_eflags_o(DisasContext *s, TCGv reg) +static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) { gen_compute_eflags(s); - tcg_gen_shri_tl(reg, cpu_cc_src, 11); - tcg_gen_andi_tl(reg, reg, 1); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_O }; } /* compute eflags.Z to reg */ -static void gen_compute_eflags_z(DisasContext *s, TCGv reg, bool inv) +static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) { switch (s->cc_op) { case CC_OP_DYNAMIC: gen_compute_eflags(s); /* FALLTHRU */ case CC_OP_EFLAGS: - tcg_gen_shri_tl(reg, cpu_cc_src, 6); - tcg_gen_andi_tl(reg, reg, 1); - if (inv) { - tcg_gen_xori_tl(reg, reg, 1); - } - break; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_Z }; default: { int size = (s->cc_op - CC_OP_ADDB) & 3; TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); - tcg_gen_setcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, reg, t0, 0); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; } - break; + } +} + +#define gen_compute_eflags_c(s, reg, inv) \ + gen_do_setcc(reg, gen_prepare_eflags_c(s, reg), inv) +#define gen_compute_eflags_p(s, reg) \ + gen_do_setcc(reg, gen_prepare_eflags_p(s, reg), false) +#define gen_compute_eflags_s(s, reg, inv) \ + gen_do_setcc(reg, gen_prepare_eflags_s(s, reg), inv) +#define gen_compute_eflags_o(s, reg) \ + gen_do_setcc(reg, gen_prepare_eflags_o(s, reg), false) +#define gen_compute_eflags_z(s, reg, inv) \ + gen_do_setcc(reg, gen_prepare_eflags_z(s, reg), inv) + +static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv) +{ + if (inv) { + cc.cond = tcg_invert_cond(cc.cond); + } + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_xori_tl(reg, cc.reg, 1); + } else { + tcg_gen_mov_tl(reg, cc.reg); + } + return; + } + + if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 && + cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) { + tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask)); + tcg_gen_andi_tl(reg, reg, 1); + return; + } + if (cc.mask != -1) { + tcg_gen_andi_tl(reg, cc.reg, cc.mask); + } + if (cc.use_reg2) { + tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm); } } From 276e6b5f069e189e204a4320f824daa07db10286 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 14:33:45 -0800 Subject: [PATCH 29/61] target-i386: introduce gen_prepare_cc This makes the i386 front-end able to create CCPrepare structs for all condition, not just those that come from a single flag. In particular, JCC_L and JCC_LE can be optimized because gen_prepare_cc is not forced to return a result in bit 0 (unlike gen_setcc_slow). However, for now the slow jcc operations will still go through CC computation in a single-bit temporary, followed by a brcond if the temporary is nonzero. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 91 +++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 49 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 06f0fbced0f8..046d82f43de2 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1042,14 +1042,6 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) #define gen_compute_eflags_c(s, reg, inv) \ gen_do_setcc(reg, gen_prepare_eflags_c(s, reg), inv) -#define gen_compute_eflags_p(s, reg) \ - gen_do_setcc(reg, gen_prepare_eflags_p(s, reg), false) -#define gen_compute_eflags_s(s, reg, inv) \ - gen_do_setcc(reg, gen_prepare_eflags_s(s, reg), inv) -#define gen_compute_eflags_o(s, reg) \ - gen_do_setcc(reg, gen_prepare_eflags_o(s, reg), false) -#define gen_compute_eflags_z(s, reg, inv) \ - gen_do_setcc(reg, gen_prepare_eflags_z(s, reg), inv) static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv) { @@ -1074,6 +1066,7 @@ static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv) } if (cc.mask != -1) { tcg_gen_andi_tl(reg, cc.reg, cc.mask); + cc.reg = reg; } if (cc.use_reg2) { tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2); @@ -1082,58 +1075,50 @@ static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv) } } -static void gen_setcc_slow(DisasContext *s, int jcc_op, TCGv reg, bool inv) +static CCPrepare gen_prepare_cc_slow(DisasContext *s, int jcc_op, TCGv reg) { switch(jcc_op) { case JCC_O: - gen_compute_eflags_o(s, reg); - break; + return gen_prepare_eflags_o(s, reg); case JCC_B: - gen_compute_eflags_c(s, reg, inv); - inv = false; - break; + return gen_prepare_eflags_c(s, reg); case JCC_Z: - gen_compute_eflags_z(s, reg, inv); - inv = false; - break; + return gen_prepare_eflags_z(s, reg); case JCC_BE: gen_compute_eflags(s); - tcg_gen_andi_tl(reg, cpu_cc_src, CC_Z | CC_C); - tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0); - return; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_Z | CC_C }; case JCC_S: - gen_compute_eflags_s(s, reg, inv); - inv = false; - break; + return gen_prepare_eflags_s(s, reg); case JCC_P: - gen_compute_eflags_p(s, reg); - break; + return gen_prepare_eflags_p(s, reg); case JCC_L: gen_compute_eflags(s); - tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 11); /* CC_O */ - tcg_gen_shri_tl(reg, cpu_cc_src, 7); /* CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_tmp0); - tcg_gen_andi_tl(reg, reg, 1); - break; + if (TCGV_EQUAL(reg, cpu_cc_src)) { + reg = cpu_tmp0; + } + tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, reg, cpu_cc_src); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, .mask = CC_S }; default: case JCC_LE: gen_compute_eflags(s); - tcg_gen_shri_tl(cpu_tmp0, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, cpu_tmp0, cpu_cc_src); - tcg_gen_andi_tl(reg, reg, CC_S | CC_Z); - tcg_gen_setcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, reg, reg, 0); - break; - } - if (inv) { - tcg_gen_xori_tl(reg, reg, 1); + if (TCGV_EQUAL(reg, cpu_cc_src)) { + reg = cpu_tmp0; + } + tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, reg, cpu_cc_src); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = CC_S | CC_Z }; } } /* perform a conditional store into register 'reg' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ -static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) +static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) { int inv, jcc_op, size, cond; + CCPrepare cc; TCGv t0; inv = b & 1; @@ -1148,23 +1133,24 @@ static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) size = s->cc_op - CC_OP_SUBB; switch (jcc_op) { case JCC_BE: - cond = inv ? TCG_COND_GTU : TCG_COND_LEU; tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); gen_extu(size, cpu_tmp4); t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); - tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); + cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4, + .reg2 = t0, .mask = -1, .use_reg2 = true }; break; case JCC_L: - cond = inv ? TCG_COND_GE : TCG_COND_LT; + cond = TCG_COND_LT; goto fast_jcc_l; case JCC_LE: - cond = inv ? TCG_COND_GT : TCG_COND_LE; + cond = TCG_COND_LE; fast_jcc_l: tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); gen_exts(size, cpu_tmp4); t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); - tcg_gen_setcond_tl(cond, reg, cpu_tmp4, t0); + cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4, + .reg2 = t0, .mask = -1, .use_reg2 = true }; break; default: @@ -1174,12 +1160,20 @@ static inline void gen_setcc1(DisasContext *s, int b, TCGv reg) default: slow_jcc: - /* gen_setcc_slow actually generates good code for JC, JZ and JS */ - gen_setcc_slow(s, jcc_op, reg, inv); + /* gen_prepare_cc_slow actually generates good code for JC, JZ and JS */ + cc = gen_prepare_cc_slow(s, jcc_op, reg); break; } + + if (inv) { + cc.cond = tcg_invert_cond(cc.cond); + } + return cc; } +#define gen_setcc1(s, b, reg) \ + gen_do_setcc(reg, gen_prepare_cc(s, b, reg), false) + /* generate a conditional jump to label 'l1' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ static inline void gen_jcc1(DisasContext *s, int b, int l1) @@ -1292,9 +1286,8 @@ static inline void gen_jcc1(DisasContext *s, int b, int l1) break; default: slow_jcc: - gen_setcc_slow(s, jcc_op, cpu_T[0], false); - tcg_gen_brcondi_tl(inv ? TCG_COND_EQ : TCG_COND_NE, - cpu_T[0], 0, l1); + gen_setcc1(s, b, cpu_T[0]); + tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1); break; } } From 943131ca98af142da7b99111b410e741a5d42338 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 7 Oct 2012 15:53:23 +0200 Subject: [PATCH 30/61] target-i386: use CCPrepare to generate conditional jumps This simplifies all the jump generation code. CCPrepare allows the code to create an efficient brcond always, so there is no need to duplicate the setcc and jcc code. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 119 +++------------------------------------- 1 file changed, 9 insertions(+), 110 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 046d82f43de2..b081fc0cec43 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1178,117 +1178,16 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) value 'b'. In the fast case, T0 is guaranted not to be used. */ static inline void gen_jcc1(DisasContext *s, int b, int l1) { - int inv, jcc_op, size, cond; - TCGv t0; + CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]); - inv = b & 1; - jcc_op = (b >> 1) & 7; - - switch (s->cc_op) { - /* we optimize the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: - - size = s->cc_op - CC_OP_SUBB; - switch(jcc_op) { - case JCC_Z: - fast_jcc_z: - t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, false); - tcg_gen_brcondi_tl(inv ? TCG_COND_NE : TCG_COND_EQ, t0, 0, l1); - break; - case JCC_S: - fast_jcc_s: - t0 = gen_ext_tl(cpu_tmp0, cpu_cc_dst, size, true); - tcg_gen_brcondi_tl(inv ? TCG_COND_GE : TCG_COND_LT, t0, 0, l1); - break; - - case JCC_B: - cond = inv ? TCG_COND_GEU : TCG_COND_LTU; - goto fast_jcc_b; - case JCC_BE: - cond = inv ? TCG_COND_GTU : TCG_COND_LEU; - fast_jcc_b: - tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); - gen_extu(size, cpu_tmp4); - t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); - tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); - break; - - case JCC_L: - cond = inv ? TCG_COND_GE : TCG_COND_LT; - goto fast_jcc_l; - case JCC_LE: - cond = inv ? TCG_COND_GT : TCG_COND_LE; - fast_jcc_l: - tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); - gen_exts(size, cpu_tmp4); - t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); - tcg_gen_brcond_tl(cond, cpu_tmp4, t0, l1); - break; - - default: - goto slow_jcc; - } - break; - - /* some jumps are easy to compute */ - case CC_OP_ADDB: - case CC_OP_ADDW: - case CC_OP_ADDL: - case CC_OP_ADDQ: - - case CC_OP_ADCB: - case CC_OP_ADCW: - case CC_OP_ADCL: - case CC_OP_ADCQ: - - case CC_OP_SBBB: - case CC_OP_SBBW: - case CC_OP_SBBL: - case CC_OP_SBBQ: - - case CC_OP_LOGICB: - case CC_OP_LOGICW: - case CC_OP_LOGICL: - case CC_OP_LOGICQ: - - case CC_OP_INCB: - case CC_OP_INCW: - case CC_OP_INCL: - case CC_OP_INCQ: - - case CC_OP_DECB: - case CC_OP_DECW: - case CC_OP_DECL: - case CC_OP_DECQ: - - case CC_OP_SHLB: - case CC_OP_SHLW: - case CC_OP_SHLL: - case CC_OP_SHLQ: - - case CC_OP_SARB: - case CC_OP_SARW: - case CC_OP_SARL: - case CC_OP_SARQ: - switch(jcc_op) { - case JCC_Z: - size = (s->cc_op - CC_OP_ADDB) & 3; - goto fast_jcc_z; - case JCC_S: - size = (s->cc_op - CC_OP_ADDB) & 3; - goto fast_jcc_s; - default: - goto slow_jcc; - } - break; - default: - slow_jcc: - gen_setcc1(s, b, cpu_T[0]); - tcg_gen_brcondi_tl(TCG_COND_NE, cpu_T[0], 0, l1); - break; + if (cc.mask != -1) { + tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask); + cc.reg = cpu_T[0]; + } + if (cc.use_reg2) { + tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); + } else { + tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1); } } From 69d1aa31f7551050bf918dc22f0fe3307b779186 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 14:41:21 -0800 Subject: [PATCH 31/61] target-i386: inline gen_prepare_cc_slow Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 91 +++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index b081fc0cec43..0b88eaed7831 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1075,44 +1075,6 @@ static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv) } } -static CCPrepare gen_prepare_cc_slow(DisasContext *s, int jcc_op, TCGv reg) -{ - switch(jcc_op) { - case JCC_O: - return gen_prepare_eflags_o(s, reg); - case JCC_B: - return gen_prepare_eflags_c(s, reg); - case JCC_Z: - return gen_prepare_eflags_z(s, reg); - case JCC_BE: - gen_compute_eflags(s); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_Z | CC_C }; - case JCC_S: - return gen_prepare_eflags_s(s, reg); - case JCC_P: - return gen_prepare_eflags_p(s, reg); - case JCC_L: - gen_compute_eflags(s); - if (TCGV_EQUAL(reg, cpu_cc_src)) { - reg = cpu_tmp0; - } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, .mask = CC_S }; - default: - case JCC_LE: - gen_compute_eflags(s); - if (TCGV_EQUAL(reg, cpu_cc_src)) { - reg = cpu_tmp0; - } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S | CC_Z }; - } -} - /* perform a conditional store into register 'reg' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) @@ -1125,11 +1087,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) jcc_op = (b >> 1) & 7; switch (s->cc_op) { - /* we optimize relational operators for the cmp/jcc case */ - case CC_OP_SUBB: - case CC_OP_SUBW: - case CC_OP_SUBL: - case CC_OP_SUBQ: + case CC_OP_SUBB ... CC_OP_SUBQ: + /* We optimize relational operators for the cmp/jcc case. */ size = s->cc_op - CC_OP_SUBB; switch (jcc_op) { case JCC_BE: @@ -1160,8 +1119,50 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) default: slow_jcc: - /* gen_prepare_cc_slow actually generates good code for JC, JZ and JS */ - cc = gen_prepare_cc_slow(s, jcc_op, reg); + /* This actually generates good code for JC, JZ and JS. */ + switch (jcc_op) { + case JCC_O: + cc = gen_prepare_eflags_o(s, reg); + break; + case JCC_B: + cc = gen_prepare_eflags_c(s, reg); + break; + case JCC_Z: + cc = gen_prepare_eflags_z(s, reg); + break; + case JCC_BE: + gen_compute_eflags(s); + cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_Z | CC_C }; + break; + case JCC_S: + cc = gen_prepare_eflags_s(s, reg); + break; + case JCC_P: + cc = gen_prepare_eflags_p(s, reg); + break; + case JCC_L: + gen_compute_eflags(s); + if (TCGV_EQUAL(reg, cpu_cc_src)) { + reg = cpu_tmp0; + } + tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, reg, cpu_cc_src); + cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = CC_S }; + break; + default: + case JCC_LE: + gen_compute_eflags(s); + if (TCGV_EQUAL(reg, cpu_cc_src)) { + reg = cpu_tmp0; + } + tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ + tcg_gen_xor_tl(reg, reg, cpu_cc_src); + cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, + .mask = CC_S | CC_Z }; + break; + } break; } From cc8b6f5b39ae47a93074a5384faa734bf2a6ae61 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 8 Oct 2012 09:42:48 +0200 Subject: [PATCH 32/61] target-i386: cleanup temporary macros for CCPrepare Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 86 +++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 47 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 0b88eaed7831..c83b56f50741 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1040,41 +1040,6 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) } } -#define gen_compute_eflags_c(s, reg, inv) \ - gen_do_setcc(reg, gen_prepare_eflags_c(s, reg), inv) - -static void gen_do_setcc(TCGv reg, struct CCPrepare cc, bool inv) -{ - if (inv) { - cc.cond = tcg_invert_cond(cc.cond); - } - - if (cc.no_setcond) { - if (cc.cond == TCG_COND_EQ) { - tcg_gen_xori_tl(reg, cc.reg, 1); - } else { - tcg_gen_mov_tl(reg, cc.reg); - } - return; - } - - if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 && - cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) { - tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask)); - tcg_gen_andi_tl(reg, reg, 1); - return; - } - if (cc.mask != -1) { - tcg_gen_andi_tl(reg, cc.reg, cc.mask); - cc.reg = reg; - } - if (cc.use_reg2) { - tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2); - } else { - tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm); - } -} - /* perform a conditional store into register 'reg' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) @@ -1172,8 +1137,40 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) return cc; } -#define gen_setcc1(s, b, reg) \ - gen_do_setcc(reg, gen_prepare_cc(s, b, reg), false) +static void gen_setcc1(DisasContext *s, int b, TCGv reg) +{ + CCPrepare cc = gen_prepare_cc(s, b, reg); + + if (cc.no_setcond) { + if (cc.cond == TCG_COND_EQ) { + tcg_gen_xori_tl(reg, cc.reg, 1); + } else { + tcg_gen_mov_tl(reg, cc.reg); + } + return; + } + + if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 && + cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) { + tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask)); + tcg_gen_andi_tl(reg, reg, 1); + return; + } + if (cc.mask != -1) { + tcg_gen_andi_tl(reg, cc.reg, cc.mask); + cc.reg = reg; + } + if (cc.use_reg2) { + tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2); + } else { + tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm); + } +} + +static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg) +{ + gen_setcc1(s, JCC_B << 1, reg); +} /* generate a conditional jump to label 'l1' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ @@ -1399,7 +1396,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) } switch(op) { case OP_ADCL: - gen_compute_eflags_c(s1, cpu_tmp4, false); + gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) @@ -1414,7 +1411,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) set_cc_op(s1, CC_OP_DYNAMIC); break; case OP_SBBL: - gen_compute_eflags_c(s1, cpu_tmp4, false); + gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); if (d != OR_TMP0) @@ -1488,7 +1485,7 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) gen_op_mov_TN_reg(ot, 0, d); else gen_op_ld_T0_A0(ot + s1->mem_index); - gen_compute_eflags_c(s1, cpu_cc_src, false); + gen_compute_eflags_c(s1, cpu_cc_src); if (c > 0) { tcg_gen_addi_tl(cpu_T[0], cpu_T[0], 1); set_cc_op(s1, CC_OP_INCB + ot); @@ -2417,11 +2414,6 @@ static inline void gen_jcc(DisasContext *s, int b, } } -static void gen_setcc(DisasContext *s, int b) -{ - gen_setcc1(s, b, cpu_T[0]); -} - static inline void gen_op_movl_T0_seg(int seg_reg) { tcg_gen_ld32u_tl(cpu_T[0], cpu_env, @@ -6431,7 +6423,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0x190 ... 0x19f: /* setcc Gv */ modrm = cpu_ldub_code(env, s->pc++); - gen_setcc(s, b); + gen_setcc1(s, b, cpu_T[0]); gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1); break; case 0x140 ... 0x14f: /* cmov Gv, Ev */ @@ -6889,7 +6881,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, case 0xd6: /* salc */ if (CODE64(s)) goto illegal_op; - gen_compute_eflags_c(s, cpu_T[0], false); + gen_compute_eflags_c(s, cpu_T[0]); tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); gen_op_mov_reg_T0(OT_BYTE, R_EAX); break; From f32d3781de8328237c2db45ff774cbd4b30134d6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 7 Oct 2012 17:55:26 +0200 Subject: [PATCH 33/61] target-i386: introduce gen_cmovcc1 Signed-off-by: Richard Henderson --- target-i386/translate.c | 72 ++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index c83b56f50741..4b0a701d8cbf 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2414,6 +2414,40 @@ static inline void gen_jcc(DisasContext *s, int b, } } +static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b, + int modrm, int reg) +{ + int l1, mod = (modrm >> 6) & 3; + TCGv t0 = tcg_temp_local_new(); + + if (mod != 3) { + int reg_addr, offset_addr; + gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); + gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); + } else { + int rm = (modrm & 7) | REX_B(s); + gen_op_mov_v_reg(ot, t0, rm); + } + + l1 = gen_new_label(); + gen_jcc1(s, b ^ 1, l1); + switch (ot) { +#ifdef TARGET_X86_64 + case OT_LONG: + tcg_gen_mov_tl(cpu_regs[reg], t0); + gen_set_label(l1); + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); + break; +#endif + default: + gen_op_mov_reg_v(ot, reg, t0); + gen_set_label(l1); + break; + } + + tcg_temp_free(t0); +} + static inline void gen_op_movl_T0_seg(int seg_reg) { tcg_gen_ld32u_tl(cpu_T[0], cpu_env, @@ -6427,40 +6461,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1); break; case 0x140 ... 0x14f: /* cmov Gv, Ev */ - { - int l1; - TCGv t0; - - ot = dflag + OT_WORD; - modrm = cpu_ldub_code(env, s->pc++); - reg = ((modrm >> 3) & 7) | rex_r; - mod = (modrm >> 6) & 3; - t0 = tcg_temp_local_new(); - if (mod != 3) { - gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, t0, rm); - } -#ifdef TARGET_X86_64 - if (ot == OT_LONG) { - /* XXX: specific Intel behaviour ? */ - l1 = gen_new_label(); - gen_jcc1(s, b ^ 1, l1); - tcg_gen_mov_tl(cpu_regs[reg], t0); - gen_set_label(l1); - tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); - } else -#endif - { - l1 = gen_new_label(); - gen_jcc1(s, b ^ 1, l1); - gen_op_mov_reg_v(ot, reg, t0); - gen_set_label(l1); - } - tcg_temp_free(t0); - } + ot = dflag + OT_WORD; + modrm = cpu_ldub_code(env, s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_cmovcc1(env, s, ot, b, modrm, reg); break; /************************/ From 57eb0cc85469a8948d1036ab830951e63aa32f66 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 16 Jan 2013 11:00:14 -0800 Subject: [PATCH 34/61] target-i386: expand cmov via movcond Signed-off-by: Richard Henderson --- target-i386/translate.c | 45 ++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 4b0a701d8cbf..9d5467dbfb15 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2417,35 +2417,30 @@ static inline void gen_jcc(DisasContext *s, int b, static void gen_cmovcc1(CPUX86State *env, DisasContext *s, int ot, int b, int modrm, int reg) { - int l1, mod = (modrm >> 6) & 3; - TCGv t0 = tcg_temp_local_new(); + CCPrepare cc; - if (mod != 3) { - int reg_addr, offset_addr; - gen_lea_modrm(env, s, modrm, ®_addr, &offset_addr); - gen_op_ld_v(ot + s->mem_index, t0, cpu_A0); - } else { - int rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(ot, t0, rm); - } + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - l1 = gen_new_label(); - gen_jcc1(s, b ^ 1, l1); - switch (ot) { -#ifdef TARGET_X86_64 - case OT_LONG: - tcg_gen_mov_tl(cpu_regs[reg], t0); - gen_set_label(l1); - tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]); - break; -#endif - default: - gen_op_mov_reg_v(ot, reg, t0); - gen_set_label(l1); - break; + cc = gen_prepare_cc(s, b, cpu_T[1]); + if (cc.mask != -1) { + TCGv t0 = tcg_temp_new(); + tcg_gen_andi_tl(t0, cc.reg, cc.mask); + cc.reg = t0; + } + if (!cc.use_reg2) { + cc.reg2 = tcg_const_tl(cc.imm); } - tcg_temp_free(t0); + tcg_gen_movcond_tl(cc.cond, cpu_T[0], cc.reg, cc.reg2, + cpu_T[0], cpu_regs[reg]); + gen_op_mov_reg_T0(ot, reg); + + if (cc.mask != -1) { + tcg_temp_free(cc.reg); + } + if (!cc.use_reg2) { + tcg_temp_free(cc.reg2); + } } static inline void gen_op_movl_T0_seg(int seg_reg) From 3b9d3cf1609ec98411508c1e8b6dde711117825f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 12 Oct 2012 15:04:10 +0200 Subject: [PATCH 35/61] target-i386: kill cpu_T3 It is almost unused, and it is simpler to pass a TCG value directly to gen_shiftd_rm_T1_T3. This value is then written to t2 without going through a temporary register. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 9d5467dbfb15..60c1fdd29a25 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -64,7 +64,7 @@ static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ -static TCGv cpu_T[2], cpu_T3; +static TCGv cpu_T[2]; /* local register indexes (only used inside old micro ops) */ static TCGv cpu_tmp0, cpu_tmp4; static TCGv_ptr cpu_ptr0, cpu_ptr1; @@ -1858,8 +1858,8 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, } /* XXX: add faster immediate case */ -static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, - int is_right) +static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1, + int is_right, TCGv count) { int label1, label2, data_bits; target_ulong mask; @@ -1883,10 +1883,8 @@ static void gen_shiftd_rm_T1_T3(DisasContext *s, int ot, int op1, gen_op_mov_v_reg(ot, t0, op1); } - tcg_gen_andi_tl(cpu_T3, cpu_T3, mask); - + tcg_gen_andi_tl(t2, count, mask); tcg_gen_mov_tl(t1, cpu_T[1]); - tcg_gen_mov_tl(t2, cpu_T3); /* Must test zero case to avoid using undefined behaviour in TCG shifts. */ @@ -5583,12 +5581,12 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_TN_reg(ot, 1, reg); if (shift) { - val = cpu_ldub_code(env, s->pc++); - tcg_gen_movi_tl(cpu_T3, val); + TCGv imm = tcg_const_tl(cpu_ldub_code(env, s->pc++)); + gen_shiftd_rm_T1(s, ot, opreg, op, imm); + tcg_temp_free(imm); } else { - tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]); + gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]); } - gen_shiftd_rm_T1_T3(s, ot, opreg, op); break; /************************/ @@ -7869,7 +7867,6 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, cpu_T[0] = tcg_temp_new(); cpu_T[1] = tcg_temp_new(); cpu_A0 = tcg_temp_new(); - cpu_T3 = tcg_temp_new(); cpu_tmp0 = tcg_temp_new(); cpu_tmp1_i64 = tcg_temp_new_i64(); From 63633fe6eb15107d688f3b7f61a4b379f57fc4ca Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 14:51:34 -0800 Subject: [PATCH 36/61] target-i386: use gen_op for cmps/scas Replace low-level ops with a higher-level "cmp %al, (A0)" in the case of scas, and "cmp T0, (A0)" in the case of cmps. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 60c1fdd29a25..f8d5e6874273 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -123,6 +123,7 @@ typedef struct DisasContext { static void gen_eob(DisasContext *s); static void gen_jmp(DisasContext *s, target_ulong eip); static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); +static void gen_op(DisasContext *s1, int op, int ot, int d); /* i386 arith/logic operations */ enum { @@ -861,12 +862,6 @@ static void gen_op_update2_cc(void) tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } -static inline void gen_op_cmpl_T0_T1_cc(void) -{ - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); - tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); -} - static inline void gen_op_testl_T0_T1_cc(void) { tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); @@ -1224,26 +1219,22 @@ static inline void gen_lods(DisasContext *s, int ot) static inline void gen_scas(DisasContext *s, int ot) { - gen_op_mov_TN_reg(OT_LONG, 0, R_EAX); gen_string_movl_A0_EDI(s); gen_op_ld_T1_A0(ot + s->mem_index); - gen_op_cmpl_T0_T1_cc(); + gen_op(s, OP_CMPL, ot, R_EAX); gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_EDI); - set_cc_op(s, CC_OP_SUBB + ot); } static inline void gen_cmps(DisasContext *s, int ot) { - gen_string_movl_A0_ESI(s); - gen_op_ld_T0_A0(ot + s->mem_index); gen_string_movl_A0_EDI(s); gen_op_ld_T1_A0(ot + s->mem_index); - gen_op_cmpl_T0_T1_cc(); + gen_string_movl_A0_ESI(s); + gen_op(s, OP_CMPL, ot, OR_TMP0); gen_op_movl_T0_Dshift(ot); gen_op_add_reg_T0(s->aflag, R_ESI); gen_op_add_reg_T0(s->aflag, R_EDI); - set_cc_op(s, CC_OP_SUBB + ot); } static inline void gen_ins(DisasContext *s, int ot) @@ -1472,7 +1463,8 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) set_cc_op(s1, CC_OP_LOGICB + ot); break; case OP_CMPL: - gen_op_cmpl_T0_T1_cc(); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); set_cc_op(s1, CC_OP_SUBB + ot); break; } From dc259201f8b471f27136ffe50cc7019c8311ccb6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 15:01:35 -0800 Subject: [PATCH 37/61] target-i386: introduce gen_jcc1_noeob A jump that ends a basic block or otherwise falls back to CC_OP_DYNAMIC will always have to call gen_op_set_cc_op. However, not all jumps end a basic block, so introduce a variant that does not do this. This was partially undone earlier (i386: drop cc_op argument of gen_jcc1), redo it now also to prepare for the introduction of src2. Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index f8d5e6874273..948a04831ad8 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1169,14 +1169,34 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg) /* generate a conditional jump to label 'l1' according to jump opcode value 'b'. In the fast case, T0 is guaranted not to be used. */ +static inline void gen_jcc1_noeob(DisasContext *s, int b, int l1) +{ + CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]); + + if (cc.mask != -1) { + tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask); + cc.reg = cpu_T[0]; + } + if (cc.use_reg2) { + tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); + } else { + tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1); + } +} + +/* Generate a conditional jump to label 'l1' according to jump opcode + value 'b'. In the fast case, T0 is guaranted not to be used. + A translation block must end soon. */ static inline void gen_jcc1(DisasContext *s, int b, int l1) { CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]); + gen_update_cc_op(s); if (cc.mask != -1) { tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask); cc.reg = cpu_T[0]; } + set_cc_op(s, CC_OP_DYNAMIC); if (cc.use_reg2) { tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); } else { @@ -1310,7 +1330,6 @@ static inline void gen_repz_ ## op(DisasContext *s, int ot, \ if (!s->jmp_opt) \ gen_op_jz_ecx(s->aflag, l2); \ gen_jmp(s, cur_eip); \ - set_cc_op(s, CC_OP_DYNAMIC); \ } GEN_REPZ(movs) @@ -2379,11 +2398,9 @@ static inline void gen_jcc(DisasContext *s, int b, int l1, l2; if (s->jmp_opt) { - gen_update_cc_op(s); l1 = gen_new_label(); gen_jcc1(s, b, l1); - set_cc_op(s, CC_OP_DYNAMIC); - + gen_goto_tb(s, 0, next_eip); gen_set_label(l1); @@ -6077,7 +6094,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, }; op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); l1 = gen_new_label(); - gen_jcc1(s, op1, l1); + gen_jcc1_noeob(s, op1, l1); gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg)); gen_set_label(l1); } From 891a5133f1637296c3823229180b5851132ed5f5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 18 Jan 2013 10:06:55 -0800 Subject: [PATCH 38/61] target-i386: Update cc_op before TCG branches Placing the CC_OP_DYNAMIC at the join is less effective than before the branch, as the branch will have forced global registers to their home locations. This way we have a chance to discard CC_SRC2 before it gets stored. Signed-off-by: Richard Henderson --- target-i386/translate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 948a04831ad8..7f2d65f250b6 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1559,8 +1559,9 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_T0(ot, op1); } - /* update eflags */ + /* Update eflags data because we cannot predict flags afterward. */ gen_update_cc_op(s); + set_cc_op(s, CC_OP_DYNAMIC); tcg_gen_mov_tl(t1, cpu_T[0]); @@ -1587,7 +1588,6 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, } gen_set_label(shift_label); - set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */ tcg_temp_free(t0); tcg_temp_free(t1); @@ -1972,8 +1972,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_v(ot, op1, t0); } - /* update eflags */ + /* Update eflags data because we cannot predict flags afterward. */ gen_update_cc_op(s); + set_cc_op(s, CC_OP_DYNAMIC); label2 = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); @@ -1986,7 +1987,6 @@ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1, tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); } gen_set_label(label2); - set_cc_op(s, CC_OP_DYNAMIC); /* cannot predict flags after */ tcg_temp_free(t0); tcg_temp_free(t1); From a3251186fc6a04d421e9c4b65aa04ec32379ec38 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 15:43:03 -0800 Subject: [PATCH 39/61] target-i386: optimize flags checking after sub using CC_SRCT After a comparison or subtraction, the original value of the LHS will currently be reconstructed using an addition. However, in most cases it is already available: store it in a temp-local variable and save 1 or 2 TCG ops (2 if the result of the addition needs to be extended). The temp-local can be declared dead as soon as the cc_op changes again, or also before the translation block ends because gen_prepare_cc will always make a copy before returning it. All this magic, plus copy propagation and dead-code elimination, ensures that the temp local will (almost) never be spilled. Example (cmp $0x21,%rax + jbe): Before After ---------------------------------------------------------------------------- movi_i64 tmp1,$0x21 movi_i64 tmp1,$0x21 movi_i64 cc_src,$0x21 movi_i64 cc_src,$0x21 sub_i64 cc_dst,rax,tmp1 sub_i64 cc_dst,rax,tmp1 add_i64 tmp7,cc_dst,cc_src movi_i32 cc_op,$0x11 movi_i32 cc_op,$0x11 brcond_i64 tmp7,cc_src,leu,$0x0 discard loc11 brcond_i64 rax,cc_src,leu,$0x0 Before After ---------------------------------------------------------------------------- mov (%r14),%rbp mov (%r14),%rbp mov %rbp,%rbx mov %rbp,%rbx sub $0x21,%rbx sub $0x21,%rbx lea 0x21(%rbx),%r12 movl $0x11,0xa0(%r14) movl $0x11,0xa0(%r14) movq $0x21,0x90(%r14) movq $0x21,0x90(%r14) mov %rbx,0x98(%r14) mov %rbx,0x98(%r14) cmp $0x21,%r12 | cmp $0x21,%rbp jbe ... jbe ... Signed-off-by: Paolo Bonzini Signed-off-by: Richard Henderson --- target-i386/translate.c | 46 +++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 7f2d65f250b6..31e344244290 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -60,7 +60,8 @@ /* global register indexes */ static TCGv_ptr cpu_env; -static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst; +static TCGv cpu_A0; +static TCGv cpu_cc_src, cpu_cc_dst, cpu_cc_srcT; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ @@ -185,8 +186,9 @@ enum { }; enum { - USES_CC_DST = 1, - USES_CC_SRC = 2, + USES_CC_DST = 1, + USES_CC_SRC = 2, + USES_CC_SRCT = 4, }; /* Bit set if the global variable is live after setting CC_OP to X. */ @@ -196,7 +198,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC, - [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT, [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST, [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC, @@ -221,6 +223,9 @@ static void set_cc_op(DisasContext *s, CCOp op) if (dead & USES_CC_SRC) { tcg_gen_discard_tl(cpu_cc_src); } + if (dead & USES_CC_SRCT) { + tcg_gen_discard_tl(cpu_cc_srcT); + } s->cc_op = op; /* The DYNAMIC setting is translator only, and should never be @@ -869,8 +874,9 @@ static inline void gen_op_testl_T0_T1_cc(void) static void gen_op_update_neg_cc(void) { - tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]); tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + tcg_gen_neg_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_movi_tl(cpu_cc_srcT, 0); } /* compute all eflags to cc_src */ @@ -903,12 +909,12 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) switch (s->cc_op) { case CC_OP_SUBB ... CC_OP_SUBQ: - /* (DATA_TYPE)(CC_DST + CC_SRC) < (DATA_TYPE)CC_SRC */ + /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_SUBB; t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); /* If no temporary was used, be careful not to alias t1 and t0. */ t0 = TCGV_EQUAL(t1, cpu_cc_src) ? cpu_tmp0 : reg; - tcg_gen_add_tl(t0, cpu_cc_dst, cpu_cc_src); + tcg_gen_mov_tl(t0, cpu_cc_srcT); gen_extu(size, t0); goto add_sub; @@ -1052,7 +1058,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) size = s->cc_op - CC_OP_SUBB; switch (jcc_op) { case JCC_BE: - tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT); gen_extu(size, cpu_tmp4); t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = cpu_tmp4, @@ -1065,7 +1071,7 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) case JCC_LE: cond = TCG_COND_LE; fast_jcc_l: - tcg_gen_add_tl(cpu_tmp4, cpu_cc_dst, cpu_cc_src); + tcg_gen_mov_tl(cpu_tmp4, cpu_cc_srcT); gen_exts(size, cpu_tmp4); t0 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, true); cc = (CCPrepare) { .cond = cond, .reg = cpu_tmp4, @@ -1421,6 +1427,10 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) set_cc_op(s1, CC_OP_DYNAMIC); break; case OP_SBBL: + /* + * No need to store cpu_cc_srcT, because it is used only + * when the cc_op is known. + */ gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); @@ -1445,6 +1455,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) set_cc_op(s1, CC_OP_ADDB + ot); break; case OP_SUBL: + tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); if (d != OR_TMP0) gen_op_mov_reg_T0(ot, d); @@ -1483,6 +1494,7 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) break; case OP_CMPL: tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_srcT, cpu_T[0]); tcg_gen_sub_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); set_cc_op(s1, CC_OP_SUBB + ot); break; @@ -2799,8 +2811,9 @@ static void gen_eob(DisasContext *s) direct call to the next block may occur */ static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) { + gen_update_cc_op(s); + set_cc_op(s, CC_OP_DYNAMIC); if (s->jmp_opt) { - gen_update_cc_op(s); gen_goto_tb(s, tb_num, eip); s->is_jmp = DISAS_TB_JUMP; } else { @@ -5017,9 +5030,10 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, rm = 0; /* avoid warning */ } label1 = gen_new_label(); - tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0); + tcg_gen_mov_tl(t2, cpu_regs[R_EAX]); + gen_extu(ot, t0); gen_extu(ot, t2); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); + tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1); label2 = gen_new_label(); if (mod == 3) { gen_op_mov_reg_v(ot, R_EAX, t0); @@ -5038,7 +5052,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } gen_set_label(label2); tcg_gen_mov_tl(cpu_cc_src, t0); - tcg_gen_mov_tl(cpu_cc_dst, t2); + tcg_gen_mov_tl(cpu_cc_srcT, t2); + tcg_gen_sub_tl(cpu_cc_dst, t2, t0); set_cc_op(s, CC_OP_SUBB + ot); tcg_temp_free(t0); tcg_temp_free(t1); @@ -7746,10 +7761,10 @@ void optimize_flags_init(void) cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env"); cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, cc_op), "cc_op"); - cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src), - "cc_src"); cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst), "cc_dst"); + cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src), + "cc_src"); #ifdef TARGET_X86_64 cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, @@ -7885,6 +7900,7 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, cpu_tmp5 = tcg_temp_new(); cpu_ptr0 = tcg_temp_new_ptr(); cpu_ptr1 = tcg_temp_new_ptr(); + cpu_cc_srcT = tcg_temp_local_new(); gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE; From 8601c0b6c553a018fc62007efa8ac2a71d77f449 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 16:06:38 -0800 Subject: [PATCH 40/61] target-i386: Don't reference ENV through most of cc helpers In preparation for making this a const helper. By using the proper types in the parameters to the helper functions, we get to avoid quite a lot of subsequent casting. Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 217 +++++++++++---------------- target-i386/cc_helper_template.h | 245 ++++++++++++------------------- 2 files changed, 180 insertions(+), 282 deletions(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 9422003f2466..61427ddeac8e 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -75,125 +75,108 @@ const uint8_t parity_table[256] = { #endif -static int compute_all_eflags(CPUX86State *env) -{ - return CC_SRC; -} - -static int compute_c_eflags(CPUX86State *env) -{ - return CC_SRC & CC_C; -} - uint32_t helper_cc_compute_all(CPUX86State *env, int op) { + target_ulong dst = CC_DST, src1 = CC_SRC; + switch (op) { default: /* should never happen */ return 0; case CC_OP_EFLAGS: - return compute_all_eflags(env); + return src1; case CC_OP_MULB: - return compute_all_mulb(env); + return compute_all_mulb(dst, src1); case CC_OP_MULW: - return compute_all_mulw(env); + return compute_all_mulw(dst, src1); case CC_OP_MULL: - return compute_all_mull(env); + return compute_all_mull(dst, src1); case CC_OP_ADDB: - return compute_all_addb(env); + return compute_all_addb(dst, src1); case CC_OP_ADDW: - return compute_all_addw(env); + return compute_all_addw(dst, src1); case CC_OP_ADDL: - return compute_all_addl(env); + return compute_all_addl(dst, src1); case CC_OP_ADCB: - return compute_all_adcb(env); + return compute_all_adcb(dst, src1); case CC_OP_ADCW: - return compute_all_adcw(env); + return compute_all_adcw(dst, src1); case CC_OP_ADCL: - return compute_all_adcl(env); + return compute_all_adcl(dst, src1); case CC_OP_SUBB: - return compute_all_subb(env); + return compute_all_subb(dst, src1); case CC_OP_SUBW: - return compute_all_subw(env); + return compute_all_subw(dst, src1); case CC_OP_SUBL: - return compute_all_subl(env); + return compute_all_subl(dst, src1); case CC_OP_SBBB: - return compute_all_sbbb(env); + return compute_all_sbbb(dst, src1); case CC_OP_SBBW: - return compute_all_sbbw(env); + return compute_all_sbbw(dst, src1); case CC_OP_SBBL: - return compute_all_sbbl(env); + return compute_all_sbbl(dst, src1); case CC_OP_LOGICB: - return compute_all_logicb(env); + return compute_all_logicb(dst, src1); case CC_OP_LOGICW: - return compute_all_logicw(env); + return compute_all_logicw(dst, src1); case CC_OP_LOGICL: - return compute_all_logicl(env); + return compute_all_logicl(dst, src1); case CC_OP_INCB: - return compute_all_incb(env); + return compute_all_incb(dst, src1); case CC_OP_INCW: - return compute_all_incw(env); + return compute_all_incw(dst, src1); case CC_OP_INCL: - return compute_all_incl(env); + return compute_all_incl(dst, src1); case CC_OP_DECB: - return compute_all_decb(env); + return compute_all_decb(dst, src1); case CC_OP_DECW: - return compute_all_decw(env); + return compute_all_decw(dst, src1); case CC_OP_DECL: - return compute_all_decl(env); + return compute_all_decl(dst, src1); case CC_OP_SHLB: - return compute_all_shlb(env); + return compute_all_shlb(dst, src1); case CC_OP_SHLW: - return compute_all_shlw(env); + return compute_all_shlw(dst, src1); case CC_OP_SHLL: - return compute_all_shll(env); + return compute_all_shll(dst, src1); case CC_OP_SARB: - return compute_all_sarb(env); + return compute_all_sarb(dst, src1); case CC_OP_SARW: - return compute_all_sarw(env); + return compute_all_sarw(dst, src1); case CC_OP_SARL: - return compute_all_sarl(env); + return compute_all_sarl(dst, src1); #ifdef TARGET_X86_64 case CC_OP_MULQ: - return compute_all_mulq(env); - + return compute_all_mulq(dst, src1); case CC_OP_ADDQ: - return compute_all_addq(env); - + return compute_all_addq(dst, src1); case CC_OP_ADCQ: - return compute_all_adcq(env); - + return compute_all_adcq(dst, src1); case CC_OP_SUBQ: - return compute_all_subq(env); - + return compute_all_subq(dst, src1); case CC_OP_SBBQ: - return compute_all_sbbq(env); - + return compute_all_sbbq(dst, src1); case CC_OP_LOGICQ: - return compute_all_logicq(env); - + return compute_all_logicq(dst, src1); case CC_OP_INCQ: - return compute_all_incq(env); - + return compute_all_incq(dst, src1); case CC_OP_DECQ: - return compute_all_decq(env); - + return compute_all_decq(dst, src1); case CC_OP_SHLQ: - return compute_all_shlq(env); - + return compute_all_shlq(dst, src1); case CC_OP_SARQ: - return compute_all_sarq(env); + return compute_all_sarq(dst, src1); #endif } } @@ -205,113 +188,85 @@ uint32_t cpu_cc_compute_all(CPUX86State *env, int op) uint32_t helper_cc_compute_c(CPUX86State *env, int op) { + target_ulong dst = CC_DST, src1 = CC_SRC; + switch (op) { default: /* should never happen */ + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_LOGICQ: return 0; case CC_OP_EFLAGS: - return compute_c_eflags(env); + case CC_OP_SARB: + case CC_OP_SARW: + case CC_OP_SARL: + case CC_OP_SARQ: + return src1 & 1; + + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_INCQ: + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_DECQ: + return src1; case CC_OP_MULB: - return compute_c_mull(env); case CC_OP_MULW: - return compute_c_mull(env); case CC_OP_MULL: - return compute_c_mull(env); + case CC_OP_MULQ: + return src1 != 0; case CC_OP_ADDB: - return compute_c_addb(env); + return compute_c_addb(dst, src1); case CC_OP_ADDW: - return compute_c_addw(env); + return compute_c_addw(dst, src1); case CC_OP_ADDL: - return compute_c_addl(env); + return compute_c_addl(dst, src1); case CC_OP_ADCB: - return compute_c_adcb(env); + return compute_c_adcb(dst, src1); case CC_OP_ADCW: - return compute_c_adcw(env); + return compute_c_adcw(dst, src1); case CC_OP_ADCL: - return compute_c_adcl(env); + return compute_c_adcl(dst, src1); case CC_OP_SUBB: - return compute_c_subb(env); + return compute_c_subb(dst, src1); case CC_OP_SUBW: - return compute_c_subw(env); + return compute_c_subw(dst, src1); case CC_OP_SUBL: - return compute_c_subl(env); + return compute_c_subl(dst, src1); case CC_OP_SBBB: - return compute_c_sbbb(env); + return compute_c_sbbb(dst, src1); case CC_OP_SBBW: - return compute_c_sbbw(env); + return compute_c_sbbw(dst, src1); case CC_OP_SBBL: - return compute_c_sbbl(env); - - case CC_OP_LOGICB: - return compute_c_logicb(); - case CC_OP_LOGICW: - return compute_c_logicw(); - case CC_OP_LOGICL: - return compute_c_logicl(); - - case CC_OP_INCB: - return compute_c_incl(env); - case CC_OP_INCW: - return compute_c_incl(env); - case CC_OP_INCL: - return compute_c_incl(env); - - case CC_OP_DECB: - return compute_c_incl(env); - case CC_OP_DECW: - return compute_c_incl(env); - case CC_OP_DECL: - return compute_c_incl(env); + return compute_c_sbbl(dst, src1); case CC_OP_SHLB: - return compute_c_shlb(env); + return compute_c_shlb(dst, src1); case CC_OP_SHLW: - return compute_c_shlw(env); + return compute_c_shlw(dst, src1); case CC_OP_SHLL: - return compute_c_shll(env); - - case CC_OP_SARB: - return compute_c_sarl(env); - case CC_OP_SARW: - return compute_c_sarl(env); - case CC_OP_SARL: - return compute_c_sarl(env); + return compute_c_shll(dst, src1); #ifdef TARGET_X86_64 - case CC_OP_MULQ: - return compute_c_mull(env); - case CC_OP_ADDQ: - return compute_c_addq(env); - + return compute_c_addq(dst, src1); case CC_OP_ADCQ: - return compute_c_adcq(env); - + return compute_c_adcq(dst, src1); case CC_OP_SUBQ: - return compute_c_subq(env); - + return compute_c_subq(dst, src1); case CC_OP_SBBQ: - return compute_c_sbbq(env); - - case CC_OP_LOGICQ: - return compute_c_logicq(); - - case CC_OP_INCQ: - return compute_c_incl(env); - - case CC_OP_DECQ: - return compute_c_incl(env); - + return compute_c_sbbq(dst, src1); case CC_OP_SHLQ: - return compute_c_shlq(env); - - case CC_OP_SARQ: - return compute_c_sarl(env); + return compute_c_shlq(dst, src1); #endif } } diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h index 1f94e11dcf06..522b4622854a 100644 --- a/target-i386/cc_helper_template.h +++ b/target-i386/cc_helper_template.h @@ -18,255 +18,198 @@ */ #define DATA_BITS (1 << (3 + SHIFT)) -#define SIGN_MASK (((target_ulong)1) << (DATA_BITS - 1)) #if DATA_BITS == 8 #define SUFFIX b #define DATA_TYPE uint8_t -#define DATA_MASK 0xff #elif DATA_BITS == 16 #define SUFFIX w #define DATA_TYPE uint16_t -#define DATA_MASK 0xffff #elif DATA_BITS == 32 #define SUFFIX l #define DATA_TYPE uint32_t -#define DATA_MASK 0xffffffff #elif DATA_BITS == 64 #define SUFFIX q #define DATA_TYPE uint64_t -#define DATA_MASK 0xffffffffffffffffULL #else #error unhandled operand size #endif +#define SIGN_MASK (((DATA_TYPE)1) << (DATA_BITS - 1)) + /* dynamic flags computation */ -static int glue(compute_all_add, SUFFIX)(CPUX86State *env) +static int glue(compute_all_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_SRC; - src2 = CC_DST - CC_SRC; - cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src2 = dst - src1; + + cf = dst < src1; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_add, SUFFIX)(CPUX86State *env) +static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf; - target_long src1; - - src1 = CC_SRC; - cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; - return cf; + return dst < src1; } -static int glue(compute_all_adc, SUFFIX)(CPUX86State *env) +static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_SRC; - src2 = CC_DST - CC_SRC - 1; - cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src2 = dst - src1 - 1; + + cf = dst <= src1; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & 0x10; + zf = (dst == 0) << 6; + sf = lshift(dst, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_adc, SUFFIX)(CPUX86State *env) +static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - int cf; - target_long src1; - - src1 = CC_SRC; - cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1; - return cf; + return dst <= src1; } -static int glue(compute_all_sub, SUFFIX)(CPUX86State *env) +static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src1 = dst + src2; + + cf = src1 < src2; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_sub, SUFFIX)(CPUX86State *env) +static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { - int cf; - target_long src1, src2; + DATA_TYPE src1 = dst + src2; - src1 = CC_DST + CC_SRC; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; - return cf; + return src1 < src2; } -static int glue(compute_all_sbb, SUFFIX)(CPUX86State *env) +static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { int cf, pf, af, zf, sf, of; - target_long src1, src2; - - src1 = CC_DST + CC_SRC + 1; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + DATA_TYPE src1 = dst + src2 + 1; + + cf = src1 <= src2; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & 0x10; + zf = (dst == 0) << 6; + sf = lshift(dst, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2) & (src1 ^ dst), 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_sbb, SUFFIX)(CPUX86State *env) +static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) { - int cf; - target_long src1, src2; + DATA_TYPE src1 = dst + src2 + 1; - src1 = CC_DST + CC_SRC + 1; - src2 = CC_SRC; - cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2; - return cf; + return src1 <= src2; } -static int glue(compute_all_logic, SUFFIX)(CPUX86State *env) +static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; cf = 0; - pf = parity_table[(uint8_t)CC_DST]; + pf = parity_table[(uint8_t)dst]; af = 0; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; of = 0; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_logic, SUFFIX)(void) -{ - return 0; -} - -static int glue(compute_all_inc, SUFFIX)(CPUX86State *env) +static int glue(compute_all_inc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; + DATA_TYPE src2; - src1 = CC_DST - 1; + cf = src1; + src1 = dst - 1; src2 = 1; - cf = CC_SRC; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = (dst == SIGN_MASK) * CC_O; return cf | pf | af | zf | sf | of; } -#if DATA_BITS == 32 -static int glue(compute_c_inc, SUFFIX)(CPUX86State *env) -{ - return CC_SRC; -} -#endif - -static int glue(compute_all_dec, SUFFIX)(CPUX86State *env) +static int glue(compute_all_dec, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - target_long src1, src2; + DATA_TYPE src2; - src1 = CC_DST + 1; + cf = src1; + src1 = dst + 1; src2 = 1; - cf = CC_SRC; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = ((CC_DST & DATA_MASK) == ((target_ulong)SIGN_MASK - 1)) << 11; + pf = parity_table[(uint8_t)dst]; + af = (dst ^ src1 ^ src2) & CC_A; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = (dst == SIGN_MASK - 1) * CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_all_shl, SUFFIX)(CPUX86State *env) +static int glue(compute_all_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C; - pf = parity_table[(uint8_t)CC_DST]; + cf = (src1 >> (DATA_BITS - 1)) & CC_C; + pf = parity_table[(uint8_t)dst]; af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - /* of is defined if shift count == 1 */ - of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + /* of is defined iff shift count == 1 */ + of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -static int glue(compute_c_shl, SUFFIX)(CPUX86State *env) +static int glue(compute_c_shl, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { - return (CC_SRC >> (DATA_BITS - 1)) & CC_C; + return (src1 >> (DATA_BITS - 1)) & CC_C; } -#if DATA_BITS == 32 -static int glue(compute_c_sar, SUFFIX)(CPUX86State *env) -{ - return CC_SRC & 1; -} -#endif - -static int glue(compute_all_sar, SUFFIX)(CPUX86State *env) +static int glue(compute_all_sar, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) { int cf, pf, af, zf, sf, of; - cf = CC_SRC & 1; - pf = parity_table[(uint8_t)CC_DST]; + cf = src1 & 1; + pf = parity_table[(uint8_t)dst]; af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - /* of is defined if shift count == 1 */ - of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + /* of is defined iff shift count == 1 */ + of = lshift(src1 ^ dst, 12 - DATA_BITS) & CC_O; return cf | pf | af | zf | sf | of; } -#if DATA_BITS == 32 -static int glue(compute_c_mul, SUFFIX)(CPUX86State *env) -{ - int cf; - - cf = (CC_SRC != 0); - return cf; -} -#endif - /* NOTE: we compute the flags like the P4. On olders CPUs, only OF and - CF are modified and it is slower to do that. */ -static int glue(compute_all_mul, SUFFIX)(CPUX86State *env) + CF are modified and it is slower to do that. Note as well that we + don't truncate SRC1 for computing carry to DATA_TYPE. */ +static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) { int cf, pf, af, zf, sf, of; - cf = (CC_SRC != 0); - pf = parity_table[(uint8_t)CC_DST]; + cf = (src1 != 0); + pf = parity_table[(uint8_t)dst]; af = 0; /* undefined */ - zf = ((DATA_TYPE)CC_DST == 0) << 6; - sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; - of = cf << 11; + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = cf * CC_O; return cf | pf | af | zf | sf | of; } From db9f2597722d5d8bc5f2330f186288d893114338 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 16:10:49 -0800 Subject: [PATCH 41/61] target-i386: Make helper_cc_compute_{all,c} const Pass the data in explicitly, rather than indirectly via env. This avoids all sorts of unnecessary register spillage. Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 12 ++++-------- target-i386/helper.h | 4 ++-- target-i386/translate.c | 31 +++++++++++++++++++++++++++---- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 61427ddeac8e..a5d81818047d 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -75,10 +75,8 @@ const uint8_t parity_table[256] = { #endif -uint32_t helper_cc_compute_all(CPUX86State *env, int op) +target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op) { - target_ulong dst = CC_DST, src1 = CC_SRC; - switch (op) { default: /* should never happen */ return 0; @@ -183,13 +181,11 @@ uint32_t helper_cc_compute_all(CPUX86State *env, int op) uint32_t cpu_cc_compute_all(CPUX86State *env, int op) { - return helper_cc_compute_all(env, op); + return helper_cc_compute_all(CC_DST, CC_SRC, op); } -uint32_t helper_cc_compute_c(CPUX86State *env, int op) +target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op) { - target_ulong dst = CC_DST, src1 = CC_SRC; - switch (op) { default: /* should never happen */ case CC_OP_LOGICB: @@ -281,7 +277,7 @@ target_ulong helper_read_eflags(CPUX86State *env) { uint32_t eflags; - eflags = helper_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env, CC_OP); eflags |= (DF & DF_MASK); eflags |= env->eflags & ~(VM_MASK | RF_MASK); return eflags; diff --git a/target-i386/helper.h b/target-i386/helper.h index 9ed720d0ed85..901ff73c12fc 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -1,7 +1,7 @@ #include "exec/def-helper.h" -DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_NO_SE, i32, env, int) -DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_NO_SE, i32, env, int) +DEF_HELPER_FLAGS_3(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) +DEF_HELPER_FLAGS_3(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) DEF_HELPER_0(lock, void) DEF_HELPER_0(unlock, void) diff --git a/target-i386/translate.c b/target-i386/translate.c index 31e344244290..5235aff15e02 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -882,13 +882,37 @@ static void gen_op_update_neg_cc(void) /* compute all eflags to cc_src */ static void gen_compute_eflags(DisasContext *s) { + TCGv zero, dst, src1; + int live, dead; + if (s->cc_op == CC_OP_EFLAGS) { return; } + + TCGV_UNUSED(zero); + dst = cpu_cc_dst; + src1 = cpu_cc_src; + + /* Take care to not read values that are not live. */ + live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; + dead = live ^ (USES_CC_DST | USES_CC_SRC); + if (dead) { + zero = tcg_const_tl(0); + if (dead & USES_CC_DST) { + dst = zero; + } + if (dead & USES_CC_SRC) { + src1 = zero; + } + } + gen_update_cc_op(s); - gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op); + gen_helper_cc_compute_all(cpu_cc_src, dst, src1, cpu_cc_op); set_cc_op(s, CC_OP_EFLAGS); - tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32); + + if (dead) { + tcg_temp_free(zero); + } } typedef struct CCPrepare { @@ -980,8 +1004,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) /* The need to compute only C from CC_OP_DYNAMIC is important in efficiently implementing e.g. INC at the start of a TB. */ gen_update_cc_op(s); - gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op); - tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32); + gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op); return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, .mask = -1, .no_setcond = true }; } From 988c3eb0d6f41ac13f4ec145c637f12c776de602 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 16:03:16 -0800 Subject: [PATCH 42/61] target-i386: Use CC_SRC2 for ADC and SBB Add another slot in ENV and store two of the three inputs. This lets us do less work when carry-out is not needed, and avoids the unpredictable CC_OP after translating these insns. Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 40 ++++++++-------- target-i386/cc_helper_template.h | 26 ++++++----- target-i386/cpu.h | 10 ++-- target-i386/helper.h | 4 +- target-i386/translate.c | 80 +++++++++++++------------------- 5 files changed, 75 insertions(+), 85 deletions(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index a5d81818047d..218a9b519f9a 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -75,7 +75,8 @@ const uint8_t parity_table[256] = { #endif -target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op) +target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, + target_ulong src2, int op) { switch (op) { default: /* should never happen */ @@ -99,11 +100,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op) return compute_all_addl(dst, src1); case CC_OP_ADCB: - return compute_all_adcb(dst, src1); + return compute_all_adcb(dst, src1, src2); case CC_OP_ADCW: - return compute_all_adcw(dst, src1); + return compute_all_adcw(dst, src1, src2); case CC_OP_ADCL: - return compute_all_adcl(dst, src1); + return compute_all_adcl(dst, src1, src2); case CC_OP_SUBB: return compute_all_subb(dst, src1); @@ -113,11 +114,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op) return compute_all_subl(dst, src1); case CC_OP_SBBB: - return compute_all_sbbb(dst, src1); + return compute_all_sbbb(dst, src1, src2); case CC_OP_SBBW: - return compute_all_sbbw(dst, src1); + return compute_all_sbbw(dst, src1, src2); case CC_OP_SBBL: - return compute_all_sbbl(dst, src1); + return compute_all_sbbl(dst, src1, src2); case CC_OP_LOGICB: return compute_all_logicb(dst, src1); @@ -160,11 +161,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op) case CC_OP_ADDQ: return compute_all_addq(dst, src1); case CC_OP_ADCQ: - return compute_all_adcq(dst, src1); + return compute_all_adcq(dst, src1, src2); case CC_OP_SUBQ: return compute_all_subq(dst, src1); case CC_OP_SBBQ: - return compute_all_sbbq(dst, src1); + return compute_all_sbbq(dst, src1, src2); case CC_OP_LOGICQ: return compute_all_logicq(dst, src1); case CC_OP_INCQ: @@ -181,10 +182,11 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, int op) uint32_t cpu_cc_compute_all(CPUX86State *env, int op) { - return helper_cc_compute_all(CC_DST, CC_SRC, op); + return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op); } -target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op) +target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, + target_ulong src2, int op) { switch (op) { default: /* should never happen */ @@ -225,11 +227,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op) return compute_c_addl(dst, src1); case CC_OP_ADCB: - return compute_c_adcb(dst, src1); + return compute_c_adcb(dst, src1, src2); case CC_OP_ADCW: - return compute_c_adcw(dst, src1); + return compute_c_adcw(dst, src1, src2); case CC_OP_ADCL: - return compute_c_adcl(dst, src1); + return compute_c_adcl(dst, src1, src2); case CC_OP_SUBB: return compute_c_subb(dst, src1); @@ -239,11 +241,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op) return compute_c_subl(dst, src1); case CC_OP_SBBB: - return compute_c_sbbb(dst, src1); + return compute_c_sbbb(dst, src1, src2); case CC_OP_SBBW: - return compute_c_sbbw(dst, src1); + return compute_c_sbbw(dst, src1, src2); case CC_OP_SBBL: - return compute_c_sbbl(dst, src1); + return compute_c_sbbl(dst, src1, src2); case CC_OP_SHLB: return compute_c_shlb(dst, src1); @@ -256,11 +258,11 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, int op) case CC_OP_ADDQ: return compute_c_addq(dst, src1); case CC_OP_ADCQ: - return compute_c_adcq(dst, src1); + return compute_c_adcq(dst, src1, src2); case CC_OP_SUBQ: return compute_c_subq(dst, src1); case CC_OP_SBBQ: - return compute_c_sbbq(dst, src1); + return compute_c_sbbq(dst, src1, src2); case CC_OP_SHLQ: return compute_c_shlq(dst, src1); #endif diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h index 522b4622854a..87f47d2e972b 100644 --- a/target-i386/cc_helper_template.h +++ b/target-i386/cc_helper_template.h @@ -58,12 +58,13 @@ static int glue(compute_c_add, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return dst < src1; } -static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, + DATA_TYPE src3) { int cf, pf, af, zf, sf, of; - DATA_TYPE src2 = dst - src1 - 1; + DATA_TYPE src2 = dst - src1 - src3; - cf = dst <= src1; + cf = (src3 ? dst <= src1 : dst < src1); pf = parity_table[(uint8_t)dst]; af = (dst ^ src1 ^ src2) & 0x10; zf = (dst == 0) << 6; @@ -72,9 +73,10 @@ static int glue(compute_all_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) return cf | pf | af | zf | sf | of; } -static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +static int glue(compute_c_adc, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1, + DATA_TYPE src3) { - return dst <= src1; + return src3 ? dst <= src1 : dst < src1; } static int glue(compute_all_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) @@ -98,12 +100,13 @@ static int glue(compute_c_sub, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) return src1 < src2; } -static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) +static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, + DATA_TYPE src3) { int cf, pf, af, zf, sf, of; - DATA_TYPE src1 = dst + src2 + 1; + DATA_TYPE src1 = dst + src2 + src3; - cf = src1 <= src2; + cf = (src3 ? src1 <= src2 : src1 < src2); pf = parity_table[(uint8_t)dst]; af = (dst ^ src1 ^ src2) & 0x10; zf = (dst == 0) << 6; @@ -112,11 +115,12 @@ static int glue(compute_all_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) return cf | pf | af | zf | sf | of; } -static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2) +static int glue(compute_c_sbb, SUFFIX)(DATA_TYPE dst, DATA_TYPE src2, + DATA_TYPE src3) { - DATA_TYPE src1 = dst + src2 + 1; + DATA_TYPE src1 = dst + src2 + src3; - return src1 <= src2; + return (src3 ? src1 <= src2 : src1 < src2); } static int glue(compute_all_logic, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 8c4c6052998a..1fa9dc82679a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -725,8 +725,9 @@ typedef struct CPUX86State { stored elsewhere */ /* emulator internal eflags handling */ - target_ulong cc_src; target_ulong cc_dst; + target_ulong cc_src; + target_ulong cc_src2; uint32_t cc_op; int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ uint32_t hflags; /* TB flags, see HF_xxx constants. These flags @@ -1116,9 +1117,10 @@ static inline int cpu_mmu_index (CPUX86State *env) #define EIP (env->eip) #define DF (env->df) -#define CC_SRC (env->cc_src) -#define CC_DST (env->cc_dst) -#define CC_OP (env->cc_op) +#define CC_DST (env->cc_dst) +#define CC_SRC (env->cc_src) +#define CC_SRC2 (env->cc_src2) +#define CC_OP (env->cc_op) /* n must be a constant to be efficient */ static inline target_long lshift(target_long x, int n) diff --git a/target-i386/helper.h b/target-i386/helper.h index 901ff73c12fc..4c46ab1b40d4 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -1,7 +1,7 @@ #include "exec/def-helper.h" -DEF_HELPER_FLAGS_3(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) -DEF_HELPER_FLAGS_3(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) +DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) +DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) DEF_HELPER_0(lock, void) DEF_HELPER_0(unlock, void) diff --git a/target-i386/translate.c b/target-i386/translate.c index 5235aff15e02..f667f9333bfe 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -61,7 +61,7 @@ /* global register indexes */ static TCGv_ptr cpu_env; static TCGv cpu_A0; -static TCGv cpu_cc_src, cpu_cc_dst, cpu_cc_srcT; +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ @@ -188,18 +188,19 @@ enum { enum { USES_CC_DST = 1, USES_CC_SRC = 2, - USES_CC_SRCT = 4, + USES_CC_SRC2 = 4, + USES_CC_SRCT = 8, }; /* Bit set if the global variable is live after setting CC_OP to X. */ static const uint8_t cc_op_live[CC_OP_NB] = { - [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC, + [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, [CC_OP_EFLAGS] = USES_CC_SRC, [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC, - [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT, - [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST, [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC, @@ -223,6 +224,9 @@ static void set_cc_op(DisasContext *s, CCOp op) if (dead & USES_CC_SRC) { tcg_gen_discard_tl(cpu_cc_src); } + if (dead & USES_CC_SRC2) { + tcg_gen_discard_tl(cpu_cc_src2); + } if (dead & USES_CC_SRCT) { tcg_gen_discard_tl(cpu_cc_srcT); } @@ -867,6 +871,13 @@ static void gen_op_update2_cc(void) tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } +static void gen_op_update3_cc(TCGv reg) +{ + tcg_gen_mov_tl(cpu_cc_src2, reg); + tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); +} + static inline void gen_op_testl_T0_T1_cc(void) { tcg_gen_and_tl(cpu_cc_dst, cpu_T[0], cpu_T[1]); @@ -882,7 +893,7 @@ static void gen_op_update_neg_cc(void) /* compute all eflags to cc_src */ static void gen_compute_eflags(DisasContext *s) { - TCGv zero, dst, src1; + TCGv zero, dst, src1, src2; int live, dead; if (s->cc_op == CC_OP_EFLAGS) { @@ -892,10 +903,11 @@ static void gen_compute_eflags(DisasContext *s) TCGV_UNUSED(zero); dst = cpu_cc_dst; src1 = cpu_cc_src; + src2 = cpu_cc_src2; /* Take care to not read values that are not live. */ live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; - dead = live ^ (USES_CC_DST | USES_CC_SRC); + dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2); if (dead) { zero = tcg_const_tl(0); if (dead & USES_CC_DST) { @@ -904,10 +916,13 @@ static void gen_compute_eflags(DisasContext *s) if (dead & USES_CC_SRC) { src1 = zero; } + if (dead & USES_CC_SRC2) { + src2 = zero; + } } gen_update_cc_op(s); - gen_helper_cc_compute_all(cpu_cc_src, dst, src1, cpu_cc_op); + gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op); set_cc_op(s, CC_OP_EFLAGS); if (dead) { @@ -951,30 +966,6 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0, .reg2 = t1, .mask = -1, .use_reg2 = true }; - case CC_OP_SBBB ... CC_OP_SBBQ: - /* (DATA_TYPE)(CC_DST + CC_SRC + 1) <= (DATA_TYPE)CC_SRC */ - size = s->cc_op - CC_OP_SBBB; - t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); - if (TCGV_EQUAL(t1, reg) && TCGV_EQUAL(reg, cpu_cc_src)) { - tcg_gen_mov_tl(cpu_tmp0, cpu_cc_src); - t1 = cpu_tmp0; - } - - tcg_gen_add_tl(reg, cpu_cc_dst, cpu_cc_src); - tcg_gen_addi_tl(reg, reg, 1); - gen_extu(size, reg); - t0 = reg; - goto adc_sbb; - - case CC_OP_ADCB ... CC_OP_ADCQ: - /* (DATA_TYPE)CC_DST <= (DATA_TYPE)CC_SRC */ - size = s->cc_op - CC_OP_ADCB; - t1 = gen_ext_tl(cpu_tmp0, cpu_cc_src, size, false); - t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); - adc_sbb: - return (CCPrepare) { .cond = TCG_COND_LEU, .reg = t0, - .reg2 = t1, .mask = -1, .use_reg2 = true }; - case CC_OP_LOGICB ... CC_OP_LOGICQ: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; @@ -1004,7 +995,8 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) /* The need to compute only C from CC_OP_DYNAMIC is important in efficiently implementing e.g. INC at the start of a TB. */ gen_update_cc_op(s); - gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op); + gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, + cpu_cc_src2, cpu_cc_op); return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, .mask = -1, .no_setcond = true }; } @@ -1442,18 +1434,10 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); - tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); - tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_ADDB + ot); - set_cc_op(s1, CC_OP_DYNAMIC); + gen_op_update3_cc(cpu_tmp4); + set_cc_op(s1, CC_OP_ADCB + ot); break; case OP_SBBL: - /* - * No need to store cpu_cc_srcT, because it is used only - * when the cc_op is known. - */ gen_compute_eflags_c(s1, cpu_tmp4); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_T[1]); tcg_gen_sub_tl(cpu_T[0], cpu_T[0], cpu_tmp4); @@ -1461,12 +1445,8 @@ static void gen_op(DisasContext *s1, int op, int ot, int d) gen_op_mov_reg_T0(ot, d); else gen_op_st_T0_A0(ot + s1->mem_index); - tcg_gen_mov_tl(cpu_cc_src, cpu_T[1]); - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp4); - tcg_gen_shli_i32(cpu_tmp2_i32, cpu_tmp2_i32, 2); - tcg_gen_addi_i32(cpu_cc_op, cpu_tmp2_i32, CC_OP_SUBB + ot); - set_cc_op(s1, CC_OP_DYNAMIC); + gen_op_update3_cc(cpu_tmp4); + set_cc_op(s1, CC_OP_SBBB + ot); break; case OP_ADDL: gen_op_addl_T0_T1(); @@ -7788,6 +7768,8 @@ void optimize_flags_init(void) "cc_dst"); cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src), "cc_src"); + cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2), + "cc_src2"); #ifdef TARGET_X86_64 cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0, From 4a6fd938f5457ee161d2acbd9364608a2a68b7a1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 Jan 2013 13:29:23 -0800 Subject: [PATCH 43/61] target-i386: Tidy prefix parsing Avoid duplicating switch statement between 32 and 64-bit modes. Signed-off-by: Richard Henderson --- target-i386/translate.c | 134 ++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 82 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index f667f9333bfe..e5cda94805f9 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4267,44 +4267,44 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, next_byte: b = cpu_ldub_code(env, s->pc); s->pc++; - /* check prefixes */ + /* Collect prefixes. */ + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; #ifdef TARGET_X86_64 - if (CODE64(s)) { - switch (b) { - case 0xf3: - prefixes |= PREFIX_REPZ; - goto next_byte; - case 0xf2: - prefixes |= PREFIX_REPNZ; - goto next_byte; - case 0xf0: - prefixes |= PREFIX_LOCK; - goto next_byte; - case 0x2e: - s->override = R_CS; - goto next_byte; - case 0x36: - s->override = R_SS; - goto next_byte; - case 0x3e: - s->override = R_DS; - goto next_byte; - case 0x26: - s->override = R_ES; - goto next_byte; - case 0x64: - s->override = R_FS; - goto next_byte; - case 0x65: - s->override = R_GS; - goto next_byte; - case 0x66: - prefixes |= PREFIX_DATA; - goto next_byte; - case 0x67: - prefixes |= PREFIX_ADR; - goto next_byte; - case 0x40 ... 0x4f: + case 0x40 ... 0x4f: + if (CODE64(s)) { /* REX prefix */ rex_w = (b >> 3) & 1; rex_r = (b & 0x4) << 1; @@ -4313,58 +4313,28 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, x86_64_hregs = 1; /* select uniform byte register addressing */ goto next_byte; } + break; +#endif + } + + /* Post-process prefixes. */ + if (prefixes & PREFIX_DATA) { + dflag ^= 1; + } + if (prefixes & PREFIX_ADR) { + aflag ^= 1; + } +#ifdef TARGET_X86_64 + if (CODE64(s)) { if (rex_w == 1) { /* 0x66 is ignored if rex.w is set */ dflag = 2; - } else { - if (prefixes & PREFIX_DATA) - dflag ^= 1; } - if (!(prefixes & PREFIX_ADR)) + if (!(prefixes & PREFIX_ADR)) { aflag = 2; - } else -#endif - { - switch (b) { - case 0xf3: - prefixes |= PREFIX_REPZ; - goto next_byte; - case 0xf2: - prefixes |= PREFIX_REPNZ; - goto next_byte; - case 0xf0: - prefixes |= PREFIX_LOCK; - goto next_byte; - case 0x2e: - s->override = R_CS; - goto next_byte; - case 0x36: - s->override = R_SS; - goto next_byte; - case 0x3e: - s->override = R_DS; - goto next_byte; - case 0x26: - s->override = R_ES; - goto next_byte; - case 0x64: - s->override = R_FS; - goto next_byte; - case 0x65: - s->override = R_GS; - goto next_byte; - case 0x66: - prefixes |= PREFIX_DATA; - goto next_byte; - case 0x67: - prefixes |= PREFIX_ADR; - goto next_byte; } - if (prefixes & PREFIX_DATA) - dflag ^= 1; - if (prefixes & PREFIX_ADR) - aflag ^= 1; } +#endif s->prefix = prefixes; s->aflag = aflag; From 701ed211d62b2b0dba732d75997c4bbf37010c1e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 11 Jan 2013 11:35:02 -0800 Subject: [PATCH 44/61] target-i386: Decode the VEX prefixes No actual required uses of these encodings yet. Signed-off-by: Richard Henderson --- target-i386/translate.c | 68 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index e5cda94805f9..f824b9916f6e 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -37,6 +37,7 @@ #define PREFIX_LOCK 0x04 #define PREFIX_DATA 0x08 #define PREFIX_ADR 0x10 +#define PREFIX_VEX 0x20 #ifdef TARGET_X86_64 #define CODE64(s) ((s)->code64) @@ -98,6 +99,8 @@ typedef struct DisasContext { int code64; /* 64 bit code segment */ int rex_x, rex_b; #endif + int vex_l; /* vex vector length */ + int vex_v; /* vex vvvv register, without 1's compliment. */ int ss32; /* 32 bit stack segment */ CCOp cc_op; /* current CC operation */ bool cc_op_dirty; @@ -4264,6 +4267,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, x86_64_hregs = 0; #endif s->rip_offset = 0; /* for relative ip address */ + s->vex_l = 0; + s->vex_v = 0; next_byte: b = cpu_ldub_code(env, s->pc); s->pc++; @@ -4315,6 +4320,63 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; #endif + case 0xc5: /* 2-byte VEX */ + case 0xc4: /* 3-byte VEX */ + /* VEX prefixes cannot be used except in 32-bit mode. + Otherwise the instruction is LES or LDS. */ + if (s->code32 && !s->vm86) { + static const int pp_prefix[4] = { + 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ + }; + int vex3, vex2 = cpu_ldub_code(env, s->pc); + + if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { + /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, + otherwise the instruction is LES or LDS. */ + break; + } + s->pc++; + + /* 4.1.1-4.1.3: No preceeding lock, 66, f2, f3, or rex prefixes. */ + if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ + | PREFIX_LOCK | PREFIX_DATA)) { + goto illegal_op; + } +#ifdef TARGET_X86_64 + if (x86_64_hregs) { + goto illegal_op; + } +#endif + rex_r = (~vex2 >> 4) & 8; + if (b == 0xc5) { + vex3 = vex2; + b = cpu_ldub_code(env, s->pc++); + } else { +#ifdef TARGET_X86_64 + s->rex_x = (~vex2 >> 3) & 8; + s->rex_b = (~vex2 >> 2) & 8; +#endif + vex3 = cpu_ldub_code(env, s->pc++); + rex_w = (vex3 >> 7) & 1; + switch (vex2 & 0x1f) { + case 0x01: /* Implied 0f leading opcode bytes. */ + b = cpu_ldub_code(env, s->pc++) | 0x100; + break; + case 0x02: /* Implied 0f 38 leading opcode bytes. */ + b = 0x138; + break; + case 0x03: /* Implied 0f 3a leading opcode bytes. */ + b = 0x13a; + break; + default: /* Reserved for future use. */ + goto illegal_op; + } + } + s->vex_v = (~vex3 >> 3) & 0xf; + s->vex_l = (vex3 >> 2) & 1; + prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX; + } + break; } /* Post-process prefixes. */ @@ -5461,13 +5523,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 0xc4: /* les Gv */ - if (CODE64(s)) - goto illegal_op; + /* In CODE64 this is VEX3; see above. */ op = R_ES; goto do_lxx; case 0xc5: /* lds Gv */ - if (CODE64(s)) - goto illegal_op; + /* In CODE64 this is VEX2; see above. */ op = R_DS; goto do_lxx; case 0x1b2: /* lss Gv */ From 111994ee05b810d81dc6abea7fac5280e48dc198 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 10 Jan 2013 12:06:59 -0800 Subject: [PATCH 45/61] target-i386: Implement MOVBE Signed-off-by: Richard Henderson --- target-i386/cpu.c | 16 +++++- target-i386/translate.c | 122 ++++++++++++++++++++++++++++++++-------- 2 files changed, 110 insertions(+), 28 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index dfcf86e86243..0f195337cb43 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -389,10 +389,15 @@ typedef struct x86_def_t { CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \ CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT | \ - CPUID_EXT_HYPERVISOR) + CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR) /* missing: - CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST, - CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */ + CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL, + CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2, + CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM, + CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_SSE41, CPUID_EXT_SSE42, + CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES, + CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, + CPUID_EXT_F16C, CPUID_EXT_RDRAND */ #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \ CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \ CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT) @@ -402,6 +407,11 @@ typedef struct x86_def_t { CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) #define TCG_SVM_FEATURES 0 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP) + /* missing: + CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_BMI1, CPUID_7_0_EBX_HLE, + CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_BMI2, CPUID_7_0_EBX_ERMS, + CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED, + CPUID_7_0_EBX_ADX */ /* built-in CPU model definitions */ diff --git a/target-i386/translate.c b/target-i386/translate.c index f824b9916f6e..5a91ff184a9f 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -3837,11 +3837,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, reg = ((modrm >> 3) & 7) | rex_r; gen_op_mov_reg_T0(OT_LONG, reg); break; + case 0x138: - if (s->prefix & PREFIX_REPNZ) - goto crc32; case 0x038: b = modrm; + if ((b & 0xf0) == 0xf0) { + goto do_0f_38_fx; + } modrm = cpu_ldub_code(env, s->pc++); rm = modrm & 7; reg = ((modrm >> 3) & 7) | rex_r; @@ -3914,36 +3916,106 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, set_cc_op(s, CC_OP_EFLAGS); } break; - case 0x338: /* crc32 */ - crc32: - b = modrm; + + case 0x238: + case 0x338: + do_0f_38_fx: + /* Various integer extensions at 0f 38 f[0-f]. */ + b = modrm | (b1 << 8); modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; - if (b != 0xf0 && b != 0xf1) - goto illegal_op; - if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) - goto illegal_op; + switch (b) { + case 0x3f0: /* crc32 Gd,Eb */ + case 0x3f1: /* crc32 Gd,Ey */ + do_crc32: + if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) { + goto illegal_op; + } + if ((b & 0xff) == 0xf0) { + ot = OT_BYTE; + } else if (s->dflag != 2) { + ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG); + } else { + ot = OT_QUAD; + } - if (b == 0xf0) - ot = OT_BYTE; - else if (b == 0xf1 && s->dflag != 2) - if (s->prefix & PREFIX_DATA) - ot = OT_WORD; - else - ot = OT_LONG; - else - ot = OT_QUAD; + gen_op_mov_TN_reg(OT_LONG, 0, reg); + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_helper_crc32(cpu_T[0], cpu_tmp2_i32, + cpu_T[0], tcg_const_i32(8 << ot)); - gen_op_mov_TN_reg(OT_LONG, 0, reg); - tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_helper_crc32(cpu_T[0], cpu_tmp2_i32, - cpu_T[0], tcg_const_i32(8 << ot)); + ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; + gen_op_mov_reg_T0(ot, reg); + break; - ot = (s->dflag == 2) ? OT_QUAD : OT_LONG; - gen_op_mov_reg_T0(ot, reg); + case 0x1f0: /* crc32 or movbe */ + case 0x1f1: + /* For these insns, the f3 prefix is supposed to have priority + over the 66 prefix, but that's not what we implement above + setting b1. */ + if (s->prefix & PREFIX_REPNZ) { + goto do_crc32; + } + /* FALLTHRU */ + case 0x0f0: /* movbe Gy,My */ + case 0x0f1: /* movbe My,Gy */ + if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) { + goto illegal_op; + } + if (s->dflag != 2) { + ot = (s->prefix & PREFIX_DATA ? OT_WORD : OT_LONG); + } else { + ot = OT_QUAD; + } + + /* Load the data incoming to the bswap. Note that the TCG + implementation of bswap requires the input be zero + extended. In the case of the loads, we simply know that + gen_op_ld_v via gen_ldst_modrm does that already. */ + if ((b & 1) == 0) { + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + } else { + switch (ot) { + case OT_WORD: + tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[reg]); + break; + default: + tcg_gen_ext32u_tl(cpu_T[0], cpu_regs[reg]); + break; + case OT_QUAD: + tcg_gen_mov_tl(cpu_T[0], cpu_regs[reg]); + break; + } + } + + switch (ot) { + case OT_WORD: + tcg_gen_bswap16_tl(cpu_T[0], cpu_T[0]); + break; + default: + tcg_gen_bswap32_tl(cpu_T[0], cpu_T[0]); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + tcg_gen_bswap64_tl(cpu_T[0], cpu_T[0]); + break; +#endif + } + + if ((b & 1) == 0) { + gen_op_mov_reg_T0(ot, reg); + } else { + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + } + break; + + default: + goto illegal_op; + } break; + case 0x03a: case 0x13a: b = modrm; From 7073fbada733c8d10992f00772c9b9299d740e9b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 16:17:10 -0800 Subject: [PATCH 46/61] target-i386: Implement ANDN As this is the first of the BMI insns to be implemented, this carries quite a bit more baggage than normal. Signed-off-by: Richard Henderson --- target-i386/cpu.c | 10 +++++----- target-i386/translate.c | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0f195337cb43..0cb64ab583c4 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -406,12 +406,12 @@ typedef struct x86_def_t { #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) #define TCG_SVM_FEATURES 0 -#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP) +#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \ + CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2) /* missing: - CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_BMI1, CPUID_7_0_EBX_HLE, - CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_BMI2, CPUID_7_0_EBX_ERMS, - CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, CPUID_7_0_EBX_RDSEED, - CPUID_7_0_EBX_ADX */ + CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, + CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, + CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */ /* built-in CPU model definitions */ diff --git a/target-i386/translate.c b/target-i386/translate.c index 5a91ff184a9f..01ff13154d59 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -2955,8 +2955,9 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = { [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps, (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */ - [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */ - [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */ + /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */ + [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, + [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* MMX ops and their SSE extensions */ [0x60] = MMX_OP2(punpcklbw), @@ -4011,6 +4012,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; + case 0x0f2: /* andn Gy, By, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]); + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + set_cc_op(s, CC_OP_LOGICB + ot); + break; + default: goto illegal_op; } From c7ab7565bc6d52cc140230aa4d0533d13d89c8b1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 16:21:33 -0800 Subject: [PATCH 47/61] target-i386: Implement BEXTR Signed-off-by: Richard Henderson --- target-i386/translate.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/target-i386/translate.c b/target-i386/translate.c index 01ff13154d59..d742fe36936d 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4026,6 +4026,46 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, set_cc_op(s, CC_OP_LOGICB + ot); break; + case 0x0f7: /* bextr Gy, Ey, By */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + { + TCGv bound, zero; + + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + /* Extract START, and shift the operand. + Shifts larger than operand size get zeros. */ + tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_A0); + + bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31); + zero = tcg_const_tl(0); + tcg_gen_movcond_tl(TCG_COND_LEU, cpu_T[0], cpu_A0, bound, + cpu_T[0], zero); + tcg_temp_free(zero); + + /* Extract the LEN into a mask. Lengths larger than + operand size get all ones. */ + tcg_gen_shri_tl(cpu_A0, cpu_regs[s->vex_v], 8); + tcg_gen_ext8u_tl(cpu_A0, cpu_A0); + tcg_gen_movcond_tl(TCG_COND_LEU, cpu_A0, cpu_A0, bound, + cpu_A0, bound); + tcg_temp_free(bound); + tcg_gen_movi_tl(cpu_T[1], 1); + tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_A0); + tcg_gen_subi_tl(cpu_T[1], cpu_T[1], 1); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + set_cc_op(s, CC_OP_LOGICB + ot); + } + break; + default: goto illegal_op; } From bc4b43dc2fe88712ad921c05fc1ab9ebc4cb6778 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 16:44:37 -0800 Subject: [PATCH 48/61] target-i386: Implement BLSR, BLSMSK, BLSI Do all of group 17 at one time for ease. Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 18 ++++++++++++ target-i386/cc_helper_template.h | 18 ++++++++++++ target-i386/cpu.h | 5 ++++ target-i386/helper.c | 7 ++++- target-i386/translate.c | 48 ++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 1 deletion(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 218a9b519f9a..5ea6a0aeae2a 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -155,6 +155,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, case CC_OP_SARL: return compute_all_sarl(dst, src1); + case CC_OP_BMILGB: + return compute_all_bmilgb(dst, src1); + case CC_OP_BMILGW: + return compute_all_bmilgw(dst, src1); + case CC_OP_BMILGL: + return compute_all_bmilgl(dst, src1); + #ifdef TARGET_X86_64 case CC_OP_MULQ: return compute_all_mulq(dst, src1); @@ -176,6 +183,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, return compute_all_shlq(dst, src1); case CC_OP_SARQ: return compute_all_sarq(dst, src1); + case CC_OP_BMILGQ: + return compute_all_bmilgq(dst, src1); #endif } } @@ -254,6 +263,13 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_SHLL: return compute_c_shll(dst, src1); + case CC_OP_BMILGB: + return compute_c_bmilgb(dst, src1); + case CC_OP_BMILGW: + return compute_c_bmilgw(dst, src1); + case CC_OP_BMILGL: + return compute_c_bmilgl(dst, src1); + #ifdef TARGET_X86_64 case CC_OP_ADDQ: return compute_c_addq(dst, src1); @@ -265,6 +281,8 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, return compute_c_sbbq(dst, src1, src2); case CC_OP_SHLQ: return compute_c_shlq(dst, src1); + case CC_OP_BMILGQ: + return compute_c_bmilgq(dst, src1); #endif } } diff --git a/target-i386/cc_helper_template.h b/target-i386/cc_helper_template.h index 87f47d2e972b..607311f19513 100644 --- a/target-i386/cc_helper_template.h +++ b/target-i386/cc_helper_template.h @@ -217,6 +217,24 @@ static int glue(compute_all_mul, SUFFIX)(DATA_TYPE dst, target_long src1) return cf | pf | af | zf | sf | of; } +static int glue(compute_all_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +{ + int cf, pf, af, zf, sf, of; + + cf = (src1 == 0); + pf = 0; /* undefined */ + af = 0; /* undefined */ + zf = (dst == 0) * CC_Z; + sf = lshift(dst, 8 - DATA_BITS) & CC_S; + of = 0; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1) +{ + return src1 == 0; +} + #undef DATA_BITS #undef SIGN_MASK #undef DATA_TYPE diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 1fa9dc82679a..960676bebd8e 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -636,6 +636,11 @@ typedef enum { CC_OP_SARL, CC_OP_SARQ, + CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */ + CC_OP_BMILGW, + CC_OP_BMILGL, + CC_OP_BMILGQ, + CC_OP_NB, } CCOp; diff --git a/target-i386/helper.c b/target-i386/helper.c index 4bf9db7f7dab..74d600f48330 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -55,7 +55,7 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) /***********************************************************/ /* x86 debug */ -static const char *cc_op_str[] = { +static const char *cc_op_str[CC_OP_NB] = { "DYNAMIC", "EFLAGS", @@ -108,6 +108,11 @@ static const char *cc_op_str[] = { "SARW", "SARL", "SARQ", + + "BMILGB", + "BMILGW", + "BMILGL", + "BMILGQ", }; static void diff --git a/target-i386/translate.c b/target-i386/translate.c index d742fe36936d..2322d5c83836 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -209,6 +209,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC, }; static void set_cc_op(DisasContext *s, CCOp op) @@ -988,6 +989,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = -1 }; + case CC_OP_BMILGB ... CC_OP_BMILGQ: + size = s->cc_op - CC_OP_BMILGB; + t0 = gen_ext_tl(reg, cpu_cc_src, size, false); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + case CC_OP_EFLAGS: case CC_OP_SARB ... CC_OP_SARQ: /* CC_SRC & 1 */ @@ -4066,6 +4072,48 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; + case 0x0f3: + case 0x1f3: + case 0x2f3: + case 0x3f3: /* Group 17 */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + + switch (reg & 7) { + case 1: /* blsr By,Ey */ + tcg_gen_neg_tl(cpu_T[1], cpu_T[0]); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + gen_op_mov_reg_T0(ot, s->vex_v); + gen_op_update2_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + case 2: /* blsmsk By,Ey */ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_xor_tl(cpu_T[0], cpu_T[0], cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + case 3: /* blsi By, Ey */ + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], 1); + tcg_gen_and_tl(cpu_T[0], cpu_T[0], cpu_cc_src); + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + + default: + goto illegal_op; + } + break; + default: goto illegal_op; } From 02ea1e6b4fab803551bbea47eea29bc7709ba008 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 17:01:10 -0800 Subject: [PATCH 49/61] target-i386: Implement BZHI Signed-off-by: Richard Henderson --- target-i386/translate.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/target-i386/translate.c b/target-i386/translate.c index 2322d5c83836..2bb8d9f8c32a 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4072,6 +4072,33 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; + case 0x0f5: /* bzhi Gy, Ey, By */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]); + { + TCGv bound = tcg_const_tl(ot == OT_QUAD ? 63 : 31); + /* Note that since we're using BMILG (in order to get O + cleared) we need to store the inverse into C. */ + tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, + cpu_T[1], bound); + tcg_gen_movcond_tl(TCG_COND_GT, cpu_T[1], cpu_T[1], + bound, bound, cpu_T[1]); + tcg_temp_free(bound); + } + tcg_gen_movi_tl(cpu_A0, -1); + tcg_gen_shl_tl(cpu_A0, cpu_A0, cpu_T[1]); + tcg_gen_andc_tl(cpu_T[0], cpu_T[0], cpu_A0); + gen_op_mov_reg_T0(ot, reg); + gen_op_update1_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); + break; + case 0x0f3: case 0x1f3: case 0x2f3: From 5f1f4b177152286102475f9bffc359002a14d9c9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 18:06:18 -0800 Subject: [PATCH 50/61] target-i386: Implement MULX Signed-off-by: Richard Henderson --- target-i386/helper.h | 1 + target-i386/int_helper.c | 7 +++++++ target-i386/translate.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/target-i386/helper.h b/target-i386/helper.h index 4c46ab1b40d4..d75075474279 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -19,6 +19,7 @@ DEF_HELPER_2(imulq_EAX_T0, void, env, tl) DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl) DEF_HELPER_2(divq_EAX, void, env, tl) DEF_HELPER_2(idivq_EAX, void, env, tl) +DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl) #endif DEF_HELPER_2(aam, void, env, int) diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c index 84b812dccac3..4ec8cb78d213 100644 --- a/target-i386/int_helper.c +++ b/target-i386/int_helper.c @@ -385,6 +385,13 @@ void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0) CC_SRC = r1; } +target_ulong helper_umulh(target_ulong t0, target_ulong t1) +{ + uint64_t h, l; + mulu64(&l, &h, t0, t1); + return h; +} + void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0) { uint64_t r0, r1; diff --git a/target-i386/translate.c b/target-i386/translate.c index 2bb8d9f8c32a..3017d6316354 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4099,6 +4099,45 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, set_cc_op(s, CC_OP_BMILGB + ot); break; + case 0x3f6: /* mulx By, Gy, rdx, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + switch (ot) { + TCGv_i64 t0, t1; + default: + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); +#ifdef TARGET_X86_64 + tcg_gen_ext32u_i64(t0, cpu_T[0]); + tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]); +#else + tcg_gen_extu_i32_i64(t0, cpu_T[0]); + tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]); +#endif + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_trunc_i64_tl(cpu_T[0], t0); + tcg_gen_shri_i64(t0, t0, 32); + tcg_gen_trunc_i64_tl(cpu_T[1], t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + gen_op_mov_reg_T0(OT_LONG, s->vex_v); + gen_op_mov_reg_T1(OT_LONG, reg); + break; +#ifdef TARGET_X86_64 + case OT_QUAD: + tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]); + tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]); + gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]); + break; +#endif + } + break; + case 0x0f3: case 0x1f3: case 0x2f3: From 0592f74a75ab695efd48a151219667adc0fa7cc4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 18:09:43 -0800 Subject: [PATCH 51/61] target-i386: Implement PDEP, PEXT Signed-off-by: Richard Henderson --- target-i386/helper.h | 3 +++ target-i386/int_helper.c | 32 ++++++++++++++++++++++++++++++++ target-i386/translate.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/target-i386/helper.h b/target-i386/helper.h index d75075474279..81e0fbdd6d02 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -194,9 +194,12 @@ DEF_HELPER_3(fsave, void, env, tl, int) DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_3(fxsave, void, env, tl, int) DEF_HELPER_3(fxrstor, void, env, tl, int) + DEF_HELPER_1(bsf, tl, tl) DEF_HELPER_1(bsr, tl, tl) DEF_HELPER_2(lzcnt, tl, tl, int) +DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) /* MMX/SSE */ diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c index 4ec8cb78d213..527af402817b 100644 --- a/target-i386/int_helper.c +++ b/target-i386/int_helper.c @@ -488,6 +488,38 @@ target_ulong helper_bsr(target_ulong t0) return helper_lzcnt(t0, 0); } +#if TARGET_LONG_BITS == 32 +# define ctztl ctz32 +#else +# define ctztl ctz64 +#endif + +target_ulong helper_pdep(target_ulong src, target_ulong mask) +{ + target_ulong dest = 0; + int i, o; + + for (i = 0; mask != 0; i++) { + o = ctztl(mask); + mask &= mask - 1; + dest |= ((src >> i) & 1) << o; + } + return dest; +} + +target_ulong helper_pext(target_ulong src, target_ulong mask) +{ + target_ulong dest = 0; + int i, o; + + for (o = 0; mask != 0; o++) { + i = ctztl(mask); + mask &= mask - 1; + dest |= ((src >> i) & 1) << o; + } + return dest; +} + #define SHIFT 0 #include "shift_helper_template.h" #undef SHIFT diff --git a/target-i386/translate.c b/target-i386/translate.c index 3017d6316354..51016fedd58a 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4138,6 +4138,42 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, } break; + case 0x3f5: /* pdep Gy, By, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + /* Note that by zero-extending the mask operand, we + automatically handle zero-extending the result. */ + if (s->dflag == 2) { + tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]); + } else { + tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]); + } + gen_helper_pdep(cpu_regs[reg], cpu_T[0], cpu_T[1]); + break; + + case 0x2f5: /* pext Gy, By, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + /* Note that by zero-extending the mask operand, we + automatically handle zero-extending the result. */ + if (s->dflag == 2) { + tcg_gen_mov_tl(cpu_T[1], cpu_regs[s->vex_v]); + } else { + tcg_gen_ext32u_tl(cpu_T[1], cpu_regs[s->vex_v]); + } + gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]); + break; + case 0x0f3: case 0x1f3: case 0x2f3: From 4a554890e479a43568de8b5354d9ca8583f5ec7f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 18:12:13 -0800 Subject: [PATCH 52/61] target-i386: Implement SHLX, SARX, SHRX Signed-off-by: Richard Henderson --- target-i386/translate.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/target-i386/translate.c b/target-i386/translate.c index 51016fedd58a..c1a2886accb9 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4174,6 +4174,37 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]); break; + case 0x1f7: /* shlx Gy, Ey, By */ + case 0x2f7: /* sarx Gy, Ey, By */ + case 0x3f7: /* shrx Gy, Ey, By */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = (s->dflag == 2 ? OT_QUAD : OT_LONG); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + if (ot == OT_QUAD) { + tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63); + } else { + tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 31); + } + if (b == 0x1f7) { + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else if (b == 0x2f7) { + if (ot != OT_QUAD) { + tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]); + } + tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else { + if (ot != OT_QUAD) { + tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]); + } + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + gen_op_mov_reg_T0(ot, reg); + break; + case 0x0f3: case 0x1f3: case 0x2f3: From e2c3c2c551bccd843135eab1ba202f8d2f86800b Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 16 Jan 2013 14:55:09 -0800 Subject: [PATCH 53/61] target-i386: Implement RORX Signed-off-by: Richard Henderson --- target-i386/translate.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/target-i386/translate.c b/target-i386/translate.c index c1a2886accb9..68e30e699e7c 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -4433,6 +4433,38 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); break; + + case 0x33a: + /* Various integer extensions at 0f 3a f[0-f]. */ + b = modrm | (b1 << 8); + modrm = cpu_ldub_code(env, s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + + switch (b) { + case 0x3f0: /* rorx Gy,Ey, Ib */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) + || !(s->prefix & PREFIX_VEX) + || s->vex_l != 0) { + goto illegal_op; + } + ot = s->dflag == 2 ? OT_QUAD : OT_LONG; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + b = cpu_ldub_code(env, s->pc++); + if (ot == OT_QUAD) { + tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63); + } else { + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, b & 31); + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + } + gen_op_mov_reg_T0(ot, reg); + break; + + default: + goto illegal_op; + } + break; + default: goto illegal_op; } From cd7f97cafdd80d6bd4950ccfdcd9acb7850184b2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 23 Jan 2013 18:17:33 -0800 Subject: [PATCH 54/61] target-i386: Implement ADX extension Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 30 +++++++++++ target-i386/cpu.c | 4 +- target-i386/cpu.h | 4 ++ target-i386/helper.c | 4 ++ target-i386/translate.c | 109 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 146 insertions(+), 5 deletions(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 5ea6a0aeae2a..6cf57a76b36a 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -75,6 +75,24 @@ const uint8_t parity_table[256] = { #endif +static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~CC_C) | (dst * CC_C); +} + +static target_ulong compute_all_adox(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~CC_O) | (src2 * CC_O); +} + +static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1, + target_ulong src2) +{ + return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O); +} + target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, target_ulong src2, int op) { @@ -162,6 +180,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, case CC_OP_BMILGL: return compute_all_bmilgl(dst, src1); + case CC_OP_ADCX: + return compute_all_adcx(dst, src1, src2); + case CC_OP_ADOX: + return compute_all_adox(dst, src1, src2); + case CC_OP_ADCOX: + return compute_all_adcox(dst, src1, src2); + #ifdef TARGET_X86_64 case CC_OP_MULQ: return compute_all_mulq(dst, src1); @@ -210,6 +235,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_SARW: case CC_OP_SARL: case CC_OP_SARQ: + case CC_OP_ADOX: return src1 & 1; case CC_OP_INCB: @@ -228,6 +254,10 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_MULQ: return src1 != 0; + case CC_OP_ADCX: + case CC_OP_ADCOX: + return dst; + case CC_OP_ADDB: return compute_c_addb(dst, src1); case CC_OP_ADDW: diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0cb64ab583c4..5582e5f4e622 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -407,11 +407,11 @@ typedef struct x86_def_t { CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) #define TCG_SVM_FEATURES 0 #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \ - CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2) + CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX) /* missing: CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, - CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */ + CPUID_7_0_EBX_RDSEED */ /* built-in CPU model definitions */ diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 960676bebd8e..e0443d89172e 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -641,6 +641,10 @@ typedef enum { CC_OP_BMILGL, CC_OP_BMILGQ, + CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ + CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */ + CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ + CC_OP_NB, } CCOp; diff --git a/target-i386/helper.c b/target-i386/helper.c index 74d600f48330..66c36247337c 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -113,6 +113,10 @@ static const char *cc_op_str[CC_OP_NB] = { "BMILGW", "BMILGL", "BMILGQ", + + "ADCX", + "ADOX", + "ADCOX", }; static void diff --git a/target-i386/translate.c b/target-i386/translate.c index 68e30e699e7c..436658a33a5c 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -210,6 +210,9 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC, [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC, + [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, + [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, }; static void set_cc_op(DisasContext *s, CCOp op) @@ -994,6 +997,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) t0 = gen_ext_tl(reg, cpu_cc_src, size, false); return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + case CC_OP_ADCX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst, + .mask = -1, .no_setcond = true }; + case CC_OP_EFLAGS: case CC_OP_SARB ... CC_OP_SARQ: /* CC_SRC & 1 */ @@ -1027,6 +1035,9 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) gen_compute_eflags(s); /* FALLTHRU */ case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_S }; default: @@ -1041,9 +1052,17 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) /* compute eflags.O to reg */ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) { - gen_compute_eflags(s); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_O }; + switch (s->cc_op) { + case CC_OP_ADOX: + case CC_OP_ADCOX: + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, + .mask = -1, .no_setcond = true }; + + default: + gen_compute_eflags(s); + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, + .mask = CC_O }; + } } /* compute eflags.Z to reg */ @@ -1054,6 +1073,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) gen_compute_eflags(s); /* FALLTHRU */ case CC_OP_EFLAGS: + case CC_OP_ADCX: + case CC_OP_ADOX: + case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_Z }; default: @@ -4174,6 +4196,87 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]); break; + case 0x1f6: /* adcx Gy, Ey */ + case 0x2f6: /* adox Gy, Ey */ + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) { + goto illegal_op; + } else { + TCGv carry_in, carry_out; + int end_op; + + ot = (s->dflag == 2 ? OT_QUAD : OT_LONG); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + + /* Re-use the carry-out from a previous round. */ + TCGV_UNUSED(carry_in); + carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2); + switch (s->cc_op) { + case CC_OP_ADCX: + if (b == 0x1f6) { + carry_in = cpu_cc_dst; + end_op = CC_OP_ADCX; + } else { + end_op = CC_OP_ADCOX; + } + break; + case CC_OP_ADOX: + if (b == 0x1f6) { + end_op = CC_OP_ADCOX; + } else { + carry_in = cpu_cc_src2; + end_op = CC_OP_ADOX; + } + break; + case CC_OP_ADCOX: + end_op = CC_OP_ADCOX; + carry_in = carry_out; + break; + default: + end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX); + break; + } + /* If we can't reuse carry-out, get it out of EFLAGS. */ + if (TCGV_IS_UNUSED(carry_in)) { + if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { + gen_compute_eflags(s); + } + carry_in = cpu_tmp0; + tcg_gen_shri_tl(carry_in, cpu_cc_src, + ctz32(b == 0x1f6 ? CC_C : CC_O)); + tcg_gen_andi_tl(carry_in, carry_in, 1); + } + + switch (ot) { +#ifdef TARGET_X86_64 + case OT_LONG: + /* If we know TL is 64-bit, and we want a 32-bit + result, just do everything in 64-bit arithmetic. */ + tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]); + tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]); + tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]); + tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in); + tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]); + tcg_gen_shri_i64(carry_out, cpu_T[0], 32); + break; +#endif + default: + /* Otherwise compute the carry-out in two steps. */ + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]); + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4, + cpu_T[0], cpu_regs[reg]); + tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in); + tcg_gen_setcond_tl(TCG_COND_LTU, carry_out, + cpu_regs[reg], cpu_T[0]); + tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4); + break; + } + /* We began with all flags computed to CC_SRC, and we + have now placed the carry-out in CC_DST. All that + is left is to record the CC_OP. */ + set_cc_op(s, end_op); + } + break; + case 0x1f7: /* shlx Gy, Ey, By */ case 0x2f7: /* sarx Gy, Ey, By */ case 0x3f7: /* shrx Gy, Ey, By */ From f1300734cbca515d30953b2c87e259fa378ea301 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 21 Jan 2013 11:52:26 -0800 Subject: [PATCH 55/61] target-i386: Use clz/ctz for bsf/bsr helpers And mark the helpers as NO_RWG_SE. Signed-off-by: Richard Henderson --- target-i386/helper.h | 6 +++--- target-i386/int_helper.c | 45 ++++++++++------------------------------ 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/target-i386/helper.h b/target-i386/helper.h index 81e0fbdd6d02..e1ecdb81e9d0 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -195,9 +195,9 @@ DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_3(fxsave, void, env, tl, int) DEF_HELPER_3(fxrstor, void, env, tl, int) -DEF_HELPER_1(bsf, tl, tl) -DEF_HELPER_1(bsr, tl, tl) -DEF_HELPER_2(lzcnt, tl, tl, int) +DEF_HELPER_FLAGS_1(bsf, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(bsr, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(lzcnt, TCG_CALL_NO_RWG_SE, tl, tl, int) DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c index 527af402817b..7bec4ebdd60f 100644 --- a/target-i386/int_helper.c +++ b/target-i386/int_helper.c @@ -447,53 +447,30 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0) } #endif +#if TARGET_LONG_BITS == 32 +# define ctztl ctz32 +# define clztl clz32 +#else +# define ctztl ctz64 +# define clztl clz64 +#endif + /* bit operations */ target_ulong helper_bsf(target_ulong t0) { - int count; - target_ulong res; - - res = t0; - count = 0; - while ((res & 1) == 0) { - count++; - res >>= 1; - } - return count; + return ctztl(t0); } target_ulong helper_lzcnt(target_ulong t0, int wordsize) { - int count; - target_ulong res, mask; - - if (wordsize > 0 && t0 == 0) { - return wordsize; - } - res = t0; - count = TARGET_LONG_BITS - 1; - mask = (target_ulong)1 << (TARGET_LONG_BITS - 1); - while ((res & mask) == 0) { - count--; - res <<= 1; - } - if (wordsize > 0) { - return wordsize - 1 - count; - } - return count; + return clztl(t0) - (TARGET_LONG_BITS - wordsize); } target_ulong helper_bsr(target_ulong t0) { - return helper_lzcnt(t0, 0); + return clztl(t0) ^ (TARGET_LONG_BITS - 1); } -#if TARGET_LONG_BITS == 32 -# define ctztl ctz32 -#else -# define ctztl ctz64 -#endif - target_ulong helper_pdep(target_ulong src, target_ulong mask) { target_ulong dest = 0; From 321c535105a182501b888f095f7ec4dbb5f3f6ae Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 21 Jan 2013 13:32:02 -0800 Subject: [PATCH 56/61] target-i386: Implement tzcnt and fix lzcnt We weren't computing flags for lzcnt at all. At the same time, adjust the implementation of bsf/bsr to avoid the local branch, using movcond instead. Signed-off-by: Richard Henderson --- target-i386/helper.h | 5 +-- target-i386/int_helper.c | 11 ++--- target-i386/translate.c | 86 +++++++++++++++++++++++----------------- 3 files changed, 54 insertions(+), 48 deletions(-) diff --git a/target-i386/helper.h b/target-i386/helper.h index e1ecdb81e9d0..26a0cc80a4c5 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -195,9 +195,8 @@ DEF_HELPER_3(frstor, void, env, tl, int) DEF_HELPER_3(fxsave, void, env, tl, int) DEF_HELPER_3(fxrstor, void, env, tl, int) -DEF_HELPER_FLAGS_1(bsf, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_1(bsr, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(lzcnt, TCG_CALL_NO_RWG_SE, tl, tl, int) +DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(pdep, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c index 7bec4ebdd60f..3b56075a9d6f 100644 --- a/target-i386/int_helper.c +++ b/target-i386/int_helper.c @@ -456,19 +456,14 @@ void helper_idivq_EAX(CPUX86State *env, target_ulong t0) #endif /* bit operations */ -target_ulong helper_bsf(target_ulong t0) +target_ulong helper_ctz(target_ulong t0) { return ctztl(t0); } -target_ulong helper_lzcnt(target_ulong t0, int wordsize) +target_ulong helper_clz(target_ulong t0) { - return clztl(t0) - (TARGET_LONG_BITS - wordsize); -} - -target_ulong helper_bsr(target_ulong t0) -{ - return clztl(t0) ^ (TARGET_LONG_BITS - 1); + return clztl(t0); } target_ulong helper_pdep(target_ulong src, target_ulong mask) diff --git a/target-i386/translate.c b/target-i386/translate.c index 436658a33a5c..97c565f69779 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -7157,46 +7157,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_movi_tl(cpu_cc_dst, 0); } break; - case 0x1bc: /* bsf */ - case 0x1bd: /* bsr */ - { - int label1; - TCGv t0; - - ot = dflag + OT_WORD; - modrm = cpu_ldub_code(env, s->pc++); - reg = ((modrm >> 3) & 7) | rex_r; - gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0); - gen_extu(ot, cpu_T[0]); - t0 = tcg_temp_local_new(); - tcg_gen_mov_tl(t0, cpu_T[0]); - if ((b & 1) && (prefixes & PREFIX_REPZ) && - (s->cpuid_ext3_features & CPUID_EXT3_ABM)) { - switch(ot) { - case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0, - tcg_const_i32(16)); break; - case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0, - tcg_const_i32(32)); break; - case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0, - tcg_const_i32(64)); break; - } - gen_op_mov_reg_T0(ot, reg); + case 0x1bc: /* bsf / tzcnt */ + case 0x1bd: /* bsr / lzcnt */ + ot = dflag + OT_WORD; + modrm = cpu_ldub_code(env, s->pc++); + reg = ((modrm >> 3) & 7) | rex_r; + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_extu(ot, cpu_T[0]); + + /* Note that lzcnt and tzcnt are in different extensions. */ + if ((prefixes & PREFIX_REPZ) + && (b & 1 + ? s->cpuid_ext3_features & CPUID_EXT3_ABM + : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { + int size = 8 << ot; + tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]); + if (b & 1) { + /* For lzcnt, reduce the target_ulong result by the + number of zeros that we expect to find at the top. */ + gen_helper_clz(cpu_T[0], cpu_T[0]); + tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size); } else { - label1 = gen_new_label(); - tcg_gen_movi_tl(cpu_cc_dst, 0); - tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1); - if (b & 1) { - gen_helper_bsr(cpu_T[0], t0); - } else { - gen_helper_bsf(cpu_T[0], t0); - } - gen_op_mov_reg_T0(ot, reg); - tcg_gen_movi_tl(cpu_cc_dst, 1); - gen_set_label(label1); - set_cc_op(s, CC_OP_LOGICB + ot); + /* For tzcnt, a zero input must return the operand size: + force all bits outside the operand size to 1. */ + target_ulong mask = (target_ulong)-2 << (size - 1); + tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask); + gen_helper_ctz(cpu_T[0], cpu_T[0]); + } + /* For lzcnt/tzcnt, C and Z bits are defined and are + related to the result. */ + gen_op_update1_cc(); + set_cc_op(s, CC_OP_BMILGB + ot); + } else { + /* For bsr/bsf, only the Z bit is defined and it is related + to the input and not the result. */ + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + set_cc_op(s, CC_OP_LOGICB + ot); + if (b & 1) { + /* For bsr, return the bit index of the first 1 bit, + not the count of leading zeros. */ + gen_helper_clz(cpu_T[0], cpu_T[0]); + tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1); + } else { + gen_helper_ctz(cpu_T[0], cpu_T[0]); } - tcg_temp_free(t0); + /* ??? The manual says that the output is undefined when the + input is zero, but real hardware leaves it unchanged, and + real programs appear to depend on that. */ + tcg_gen_movi_tl(cpu_tmp0, 0); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0, + cpu_regs[reg], cpu_T[0]); } + gen_op_mov_reg_T0(ot, reg); break; /************************/ /* bcd */ From 436ff2d227588d42970c4f0ed1cdfcb87c872fba Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 29 Jan 2013 13:38:43 -0800 Subject: [PATCH 57/61] target-i386: Add CC_OP_CLR Special case xor with self. We need not even store the known zero into cc_src. Signed-off-by: Richard Henderson --- target-i386/cc_helper.c | 3 +++ target-i386/cpu.h | 2 ++ target-i386/helper.c | 2 ++ target-i386/translate.c | 17 ++++++++++++++--- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c index 6cf57a76b36a..9daa1a06b85b 100644 --- a/target-i386/cc_helper.c +++ b/target-i386/cc_helper.c @@ -102,6 +102,8 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, case CC_OP_EFLAGS: return src1; + case CC_OP_CLR: + return CC_Z; case CC_OP_MULB: return compute_all_mulb(dst, src1); @@ -228,6 +230,7 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, case CC_OP_LOGICW: case CC_OP_LOGICL: case CC_OP_LOGICQ: + case CC_OP_CLR: return 0; case CC_OP_EFLAGS: diff --git a/target-i386/cpu.h b/target-i386/cpu.h index e0443d89172e..493dda8bb644 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -645,6 +645,8 @@ typedef enum { CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */ CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ + CC_OP_CLR, /* Z set, all other flags clear. */ + CC_OP_NB, } CCOp; diff --git a/target-i386/helper.c b/target-i386/helper.c index 66c36247337c..82a731c77de8 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -117,6 +117,8 @@ static const char *cc_op_str[CC_OP_NB] = { "ADCX", "ADOX", "ADCOX", + + "CLR", }; static void diff --git a/target-i386/translate.c b/target-i386/translate.c index 97c565f69779..007d32602a31 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -213,6 +213,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC, [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2, [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2, + [CC_OP_CLR] = 0, }; static void set_cc_op(DisasContext *s, CCOp op) @@ -906,6 +907,11 @@ static void gen_compute_eflags(DisasContext *s) if (s->cc_op == CC_OP_EFLAGS) { return; } + if (s->cc_op == CC_OP_CLR) { + tcg_gen_movi_tl(cpu_cc_src, CC_Z); + set_cc_op(s, CC_OP_EFLAGS); + return; + } TCGV_UNUSED(zero); dst = cpu_cc_dst; @@ -974,6 +980,7 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) .reg2 = t1, .mask = -1, .use_reg2 = true }; case CC_OP_LOGICB ... CC_OP_LOGICQ: + case CC_OP_CLR: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; case CC_OP_INCB ... CC_OP_INCQ: @@ -1040,6 +1047,8 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_S }; + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: { int size = (s->cc_op - CC_OP_ADDB) & 3; @@ -1057,7 +1066,8 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, .mask = -1, .no_setcond = true }; - + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: gen_compute_eflags(s); return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, @@ -1078,6 +1088,8 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, .mask = CC_Z }; + case CC_OP_CLR: + return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 }; default: { int size = (s->cc_op - CC_OP_ADDB) & 3; @@ -4890,10 +4902,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } else if (op == OP_XORL && rm == reg) { xor_zero: /* xor reg, reg optimisation */ + set_cc_op(s, CC_OP_CLR); gen_op_movl_T0_0(); - set_cc_op(s, CC_OP_LOGICB + ot); gen_op_mov_reg_T0(ot, reg); - gen_op_update1_cc(); break; } else { opreg = rm; From a41f62f592d9ecf97df4a12023760fe082b1ee68 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 30 Jan 2013 17:52:59 -0800 Subject: [PATCH 58/61] target-i386: Use movcond to implement shift flags. With this being all straight-line code, it can get deleted when the cc variables die. Signed-off-by: Richard Henderson --- target-i386/translate.c | 94 ++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 52 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 007d32602a31..74694d180b38 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1572,15 +1572,9 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, int is_right, int is_arith) { - target_ulong mask; - int shift_label; - TCGv t0, t1, t2; - - if (ot == OT_QUAD) { - mask = 0x3f; - } else { - mask = 0x1f; - } + target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f); + TCGv_i32 z32, s32, oldop; + TCGv z_tl; /* load */ if (op1 == OR_TMP0) { @@ -1589,25 +1583,22 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_TN_reg(ot, 0, op1); } - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - - tcg_gen_andi_tl(t2, cpu_T[1], mask); + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); + tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1); if (is_right) { if (is_arith) { gen_exts(ot, cpu_T[0]); - tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_sar_tl(cpu_T[0], cpu_T[0], t2); + tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]); } else { gen_extu(ot, cpu_T[0]); - tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_shr_tl(cpu_T[0], cpu_T[0], t2); + tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); } } else { - tcg_gen_mov_tl(t0, cpu_T[0]); - tcg_gen_shl_tl(cpu_T[0], cpu_T[0], t2); + tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); } /* store */ @@ -1617,50 +1608,49 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_T0(ot, op1); } - /* Update eflags data because we cannot predict flags afterward. */ - gen_update_cc_op(s); - set_cc_op(s, CC_OP_DYNAMIC); - - tcg_gen_mov_tl(t1, cpu_T[0]); - - shift_label = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, shift_label); - - tcg_gen_addi_tl(t2, t2, -1); - tcg_gen_mov_tl(cpu_cc_dst, t1); - - if (is_right) { - if (is_arith) { - tcg_gen_sar_tl(cpu_cc_src, t0, t2); - } else { - tcg_gen_shr_tl(cpu_cc_src, t0, t2); - } + /* Store the results into the CC variables. If we know that the + variable must be dead, store unconditionally. Otherwise we'll + need to not disrupt the current contents. */ + z_tl = tcg_const_tl(0); + if (cc_op_live[s->cc_op] & USES_CC_DST) { + tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, cpu_T[1], z_tl, + cpu_T[0], cpu_cc_dst); + } else { + tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); + } + if (cc_op_live[s->cc_op] & USES_CC_SRC) { + tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, cpu_T[1], z_tl, + cpu_tmp0, cpu_cc_src); } else { - tcg_gen_shl_tl(cpu_cc_src, t0, t2); + tcg_gen_mov_tl(cpu_cc_src, cpu_tmp0); } + tcg_temp_free(z_tl); - if (is_right) { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + /* Get the two potential CC_OP values into temporaries. */ + tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); + if (s->cc_op == CC_OP_DYNAMIC) { + oldop = cpu_cc_op; } else { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op); + oldop = cpu_tmp3_i32; } - gen_set_label(shift_label); + /* Conditionally store the CC_OP value. */ + z32 = tcg_const_i32(0); + s32 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(s32, cpu_T[1]); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop); + tcg_temp_free_i32(z32); + tcg_temp_free_i32(s32); - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); + /* The CC_OP value is no longer predictable. */ + set_cc_op(s, CC_OP_DYNAMIC); } static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, int is_right, int is_arith) { - int mask; - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + int mask = (ot == OT_QUAD ? 0x3f : 0x1f); /* load */ if (op1 == OR_TMP0) From 34d80a55ff8517fd37bcfea5063b9797e2bd9132 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 30 Jan 2013 19:16:45 -0800 Subject: [PATCH 59/61] target-i386: Use movcond to implement rotate flags. With this being all straight-line code, it can get deleted when the cc variables die. Signed-off-by: Richard Henderson --- target-i386/translate.c | 237 ++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 116 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 74694d180b38..6b109e853b23 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -1698,167 +1698,172 @@ static inline void tcg_gen_lshift(TCGv ret, TCGv arg1, target_long arg2) tcg_gen_shri_tl(ret, arg1, -arg2); } -static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, - int is_right) +static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right) { - target_ulong mask; - int label1, label2, data_bits; - TCGv t0, t1, t2, a0; - - /* XXX: inefficient, but we must use local temps */ - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f); + TCGv_i32 t0, t1; /* load */ if (op1 == OR_TMP0) { - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(ot + s->mem_index, t0, a0); + gen_op_ld_T0_A0(ot + s->mem_index); } else { - gen_op_mov_v_reg(ot, t0, op1); + gen_op_mov_TN_reg(ot, 0, op1); } - tcg_gen_mov_tl(t1, cpu_T[1]); - - tcg_gen_andi_tl(t1, t1, mask); - - /* Must test zero case to avoid using undefined behaviour in TCG - shifts. */ - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1); - - if (ot <= OT_WORD) - tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1); - else - tcg_gen_mov_tl(cpu_tmp0, t1); - - gen_extu(ot, t0); - tcg_gen_mov_tl(t2, t0); + tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask); - data_bits = 8 << ot; - /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX: - fix TCG definition) */ - if (is_right) { - tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0); - tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); - tcg_gen_shl_tl(t0, t0, cpu_tmp0); - } else { - tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0); - tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0); - tcg_gen_shr_tl(t0, t0, cpu_tmp0); + switch (ot) { + case OT_BYTE: + /* Replicate the 8-bit input so that a 32-bit rotate works. */ + tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]); + tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101); + goto do_long; + case OT_WORD: + /* Replicate the 16-bit input so that a 32-bit rotate works. */ + tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16); + goto do_long; + do_long: +#ifdef TARGET_X86_64 + case OT_LONG: + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]); + if (is_right) { + tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); + } else { + tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32); + } + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + break; +#endif + default: + if (is_right) { + tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } else { + tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + } + break; } - tcg_gen_or_tl(t0, t0, cpu_tmp4); - gen_set_label(label1); /* store */ if (op1 == OR_TMP0) { - gen_op_st_v(ot + s->mem_index, t0, a0); + gen_op_st_T0_A0(ot + s->mem_index); } else { - gen_op_mov_reg_v(ot, op1, t0); + gen_op_mov_reg_T0(ot, op1); } - - /* update eflags. It is needed anyway most of the time, do it always. */ - gen_compute_eflags(s); - assert(s->cc_op == CC_OP_EFLAGS); - label2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2); + /* We'll need the flags computed into CC_SRC. */ + gen_compute_eflags(s); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); - tcg_gen_xor_tl(cpu_tmp0, t2, t0); - tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); - tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + /* The value that was "rotated out" is now present at the other end + of the word. Compute C into CC_DST and O into CC_SRC2. Note that + since we've computed the flags into CC_SRC, these variables are + currently dead. */ if (is_right) { - tcg_gen_shri_tl(t0, t0, data_bits - 1); + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1); + tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask); + } else { + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask); + tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1); } - tcg_gen_andi_tl(t0, t0, CC_C); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); - - gen_set_label(label2); - - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); + tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); + + /* Now conditionally store the new CC_OP value. If the shift count + is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live. + Otherwise reuse CC_OP_ADCOX which have the C and O flags split out + exactly as we computed above. */ + t0 = tcg_const_i32(0); + t1 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t1, cpu_T[1]); + tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX); + tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0, + cpu_tmp2_i32, cpu_tmp3_i32); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + + /* The CC_OP value is no longer predictable. */ + set_cc_op(s, CC_OP_DYNAMIC); } static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2, int is_right) { - int mask; - int data_bits; - TCGv t0, t1, a0; - - /* XXX: inefficient, but we must use local temps */ - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + int mask = (ot == OT_QUAD ? 0x3f : 0x1f); + int shift; /* load */ if (op1 == OR_TMP0) { - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(ot + s->mem_index, t0, a0); + gen_op_ld_T0_A0(ot + s->mem_index); } else { - gen_op_mov_v_reg(ot, t0, op1); + gen_op_mov_TN_reg(ot, 0, op1); } - gen_extu(ot, t0); - tcg_gen_mov_tl(t1, t0); - op2 &= mask; - data_bits = 8 << ot; if (op2 != 0) { - int shift = op2 & ((1 << (3 + ot)) - 1); - if (is_right) { - tcg_gen_shri_tl(cpu_tmp4, t0, shift); - tcg_gen_shli_tl(t0, t0, data_bits - shift); - } - else { - tcg_gen_shli_tl(cpu_tmp4, t0, shift); - tcg_gen_shri_tl(t0, t0, data_bits - shift); + switch (ot) { +#ifdef TARGET_X86_64 + case OT_LONG: + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]); + if (is_right) { + tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2); + } else { + tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2); + } + tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32); + break; +#endif + default: + if (is_right) { + tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2); + } else { + tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2); + } + break; + case OT_BYTE: + mask = 7; + goto do_shifts; + case OT_WORD: + mask = 15; + do_shifts: + shift = op2 & mask; + if (is_right) { + shift = mask + 1 - shift; + } + gen_extu(ot, cpu_T[0]); + tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift); + tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0); + break; } - tcg_gen_or_tl(t0, t0, cpu_tmp4); } /* store */ if (op1 == OR_TMP0) { - gen_op_st_v(ot + s->mem_index, t0, a0); + gen_op_st_T0_A0(ot + s->mem_index); } else { - gen_op_mov_reg_v(ot, op1, t0); + gen_op_mov_reg_T0(ot, op1); } if (op2 != 0) { - /* update eflags */ + /* Compute the flags into CC_SRC. */ gen_compute_eflags(s); - assert(s->cc_op == CC_OP_EFLAGS); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C)); - tcg_gen_xor_tl(cpu_tmp0, t1, t0); - tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1)); - tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0); + /* The value that was "rotated out" is now present at the other end + of the word. Compute C into CC_DST and O into CC_SRC2. Note that + since we've computed the flags into CC_SRC, these variables are + currently dead. */ if (is_right) { - tcg_gen_shri_tl(t0, t0, data_bits - 1); + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1); + tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask); + } else { + tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask); + tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1); } - tcg_gen_andi_tl(t0, t0, CC_C); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0); + tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); + tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); + set_cc_op(s, CC_OP_ADCOX); } - - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(a0); } /* XXX: add faster immediate = 1 case */ From e2f515cf2f3795b9edb68eee42262e7c5f88fe98 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 19 Feb 2013 14:48:43 -0800 Subject: [PATCH 60/61] target-i386: Discard CC_OP computation in set_cc_op also The shift and rotate insns use movcond to set CC_OP, and thus achieve a conditional EFLAGS setting. By discarding CC_OP in a later flags setting insn, we can discard that movcond. Signed-off-by: Richard Henderson --- target-i386/translate.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index 6b109e853b23..b9a269299136 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -239,10 +239,18 @@ static void set_cc_op(DisasContext *s, CCOp op) tcg_gen_discard_tl(cpu_cc_srcT); } + if (op == CC_OP_DYNAMIC) { + /* The DYNAMIC setting is translator only, and should never be + stored. Thus we always consider it clean. */ + s->cc_op_dirty = false; + } else { + /* Discard any computed CC_OP value (see shifts). */ + if (s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_discard_i32(cpu_cc_op); + } + s->cc_op_dirty = true; + } s->cc_op = op; - /* The DYNAMIC setting is translator only, and should never be - stored. Thus we always consider it clean. */ - s->cc_op_dirty = (op != CC_OP_DYNAMIC); } static void gen_update_cc_op(DisasContext *s) From f437d0a3c24e471a855da33a086fe529e09a06af Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 19 Feb 2013 21:06:31 -0800 Subject: [PATCH 61/61] target-i386: Use movcond to implement shiftd. With this being all straight-line code, it can get deleted when the cc variables die. Signed-off-by: Richard Henderson --- target-i386/translate.c | 247 +++++++++++++++++----------------------- 1 file changed, 106 insertions(+), 141 deletions(-) diff --git a/target-i386/translate.c b/target-i386/translate.c index b9a269299136..439d19efe063 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -72,7 +72,6 @@ static TCGv cpu_tmp0, cpu_tmp4; static TCGv_ptr cpu_ptr0, cpu_ptr1; static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; static TCGv_i64 cpu_tmp1_i64; -static TCGv cpu_tmp5; static uint8_t gen_opc_cc_op[OPC_BUF_SIZE]; @@ -1577,12 +1576,55 @@ static void gen_inc(DisasContext *s1, int ot, int d, int c) tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); } +static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1, + TCGv count, bool is_right) +{ + TCGv_i32 z32, s32, oldop; + TCGv z_tl; + + /* Store the results into the CC variables. If we know that the + variable must be dead, store unconditionally. Otherwise we'll + need to not disrupt the current contents. */ + z_tl = tcg_const_tl(0); + if (cc_op_live[s->cc_op] & USES_CC_DST) { + tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl, + result, cpu_cc_dst); + } else { + tcg_gen_mov_tl(cpu_cc_dst, result); + } + if (cc_op_live[s->cc_op] & USES_CC_SRC) { + tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl, + shm1, cpu_cc_src); + } else { + tcg_gen_mov_tl(cpu_cc_src, shm1); + } + tcg_temp_free(z_tl); + + /* Get the two potential CC_OP values into temporaries. */ + tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); + if (s->cc_op == CC_OP_DYNAMIC) { + oldop = cpu_cc_op; + } else { + tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op); + oldop = cpu_tmp3_i32; + } + + /* Conditionally store the CC_OP value. */ + z32 = tcg_const_i32(0); + s32 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(s32, count); + tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop); + tcg_temp_free_i32(z32); + tcg_temp_free_i32(s32); + + /* The CC_OP value is no longer predictable. */ + set_cc_op(s, CC_OP_DYNAMIC); +} + static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, int is_right, int is_arith) { target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f); - TCGv_i32 z32, s32, oldop; - TCGv z_tl; /* load */ if (op1 == OR_TMP0) { @@ -1616,43 +1658,7 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1, gen_op_mov_reg_T0(ot, op1); } - /* Store the results into the CC variables. If we know that the - variable must be dead, store unconditionally. Otherwise we'll - need to not disrupt the current contents. */ - z_tl = tcg_const_tl(0); - if (cc_op_live[s->cc_op] & USES_CC_DST) { - tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, cpu_T[1], z_tl, - cpu_T[0], cpu_cc_dst); - } else { - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]); - } - if (cc_op_live[s->cc_op] & USES_CC_SRC) { - tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, cpu_T[1], z_tl, - cpu_tmp0, cpu_cc_src); - } else { - tcg_gen_mov_tl(cpu_cc_src, cpu_tmp0); - } - tcg_temp_free(z_tl); - - /* Get the two potential CC_OP values into temporaries. */ - tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); - if (s->cc_op == CC_OP_DYNAMIC) { - oldop = cpu_cc_op; - } else { - tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op); - oldop = cpu_tmp3_i32; - } - - /* Conditionally store the CC_OP value. */ - z32 = tcg_const_i32(0); - s32 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(s32, cpu_T[1]); - tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop); - tcg_temp_free_i32(z32); - tcg_temp_free_i32(s32); - - /* The CC_OP value is no longer predictable. */ - set_cc_op(s, CC_OP_DYNAMIC); + gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right); } static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2, @@ -1931,128 +1937,88 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1, /* XXX: add faster immediate case */ static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1, - int is_right, TCGv count) + bool is_right, TCGv count_in) { - int label1, label2, data_bits; - target_ulong mask; - TCGv t0, t1, t2, a0; - - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); - a0 = tcg_temp_local_new(); - - if (ot == OT_QUAD) - mask = 0x3f; - else - mask = 0x1f; + target_ulong mask = (ot == OT_QUAD ? 63 : 31); + TCGv count; /* load */ if (op1 == OR_TMP0) { - tcg_gen_mov_tl(a0, cpu_A0); - gen_op_ld_v(ot + s->mem_index, t0, a0); + gen_op_ld_T0_A0(ot + s->mem_index); } else { - gen_op_mov_v_reg(ot, t0, op1); + gen_op_mov_TN_reg(ot, 0, op1); } - tcg_gen_andi_tl(t2, count, mask); - tcg_gen_mov_tl(t1, cpu_T[1]); + count = tcg_temp_new(); + tcg_gen_andi_tl(count, count_in, mask); - /* Must test zero case to avoid using undefined behaviour in TCG - shifts. */ - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1); - - tcg_gen_addi_tl(cpu_tmp5, t2, -1); - if (ot == OT_WORD) { - /* Note: we implement the Intel behaviour for shift count > 16 */ + switch (ot) { + case OT_WORD: + /* Note: we implement the Intel behaviour for shift count > 16. + This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A + portion by constructing it as a 32-bit value. */ if (is_right) { - tcg_gen_andi_tl(t0, t0, 0xffff); - tcg_gen_shli_tl(cpu_tmp0, t1, 16); - tcg_gen_or_tl(t0, t0, cpu_tmp0); - tcg_gen_ext32u_tl(t0, t0); - - tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); - - /* only needed if count > 16, but a test would complicate */ - tcg_gen_subfi_tl(cpu_tmp5, 32, t2); - tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5); - - tcg_gen_shr_tl(t0, t0, t2); - - tcg_gen_or_tl(t0, t0, cpu_tmp0); + tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16); + tcg_gen_mov_tl(cpu_T[1], cpu_T[0]); + tcg_gen_mov_tl(cpu_T[0], cpu_tmp0); } else { - /* XXX: not optimal */ - tcg_gen_andi_tl(t0, t0, 0xffff); - tcg_gen_shli_tl(t1, t1, 16); - tcg_gen_or_tl(t1, t1, t0); - tcg_gen_ext32u_tl(t1, t1); - - tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); - tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5); - tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0); - tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5); - - tcg_gen_shl_tl(t0, t0, t2); - tcg_gen_subfi_tl(cpu_tmp5, 32, t2); - tcg_gen_shr_tl(t1, t1, cpu_tmp5); - tcg_gen_or_tl(t0, t0, t1); + tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16); } - } else { - data_bits = 8 << ot; + /* FALLTHRU */ +#ifdef TARGET_X86_64 + case OT_LONG: + /* Concatenate the two 32-bit values and use a 64-bit shift. */ + tcg_gen_subi_tl(cpu_tmp0, count, 1); if (is_right) { - if (ot == OT_LONG) - tcg_gen_ext32u_tl(t0, t0); - - tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5); + tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]); + tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count); + } else { + tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]); + tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0); + tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count); + tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32); + tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32); + } + break; +#endif + default: + tcg_gen_subi_tl(cpu_tmp0, count, 1); + if (is_right) { + tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); - tcg_gen_shr_tl(t0, t0, t2); - tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); - tcg_gen_shl_tl(t1, t1, cpu_tmp5); - tcg_gen_or_tl(t0, t0, t1); - + tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count); + tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count); + tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4); } else { - if (ot == OT_LONG) - tcg_gen_ext32u_tl(t1, t1); - - tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5); - - tcg_gen_shl_tl(t0, t0, t2); - tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2); - tcg_gen_shr_tl(t1, t1, cpu_tmp5); - tcg_gen_or_tl(t0, t0, t1); + tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0); + if (ot == OT_WORD) { + /* Only needed if count > 16, for Intel behaviour. */ + tcg_gen_subfi_tl(cpu_tmp4, 33, count); + tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4); + tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4); + } + + tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count); + tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count); + tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4); } + tcg_gen_movi_tl(cpu_tmp4, 0); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4, + cpu_tmp4, cpu_T[1]); + tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]); + break; } - tcg_gen_mov_tl(t1, cpu_tmp4); - gen_set_label(label1); /* store */ if (op1 == OR_TMP0) { - gen_op_st_v(ot + s->mem_index, t0, a0); - } else { - gen_op_mov_reg_v(ot, op1, t0); - } - - /* Update eflags data because we cannot predict flags afterward. */ - gen_update_cc_op(s); - set_cc_op(s, CC_OP_DYNAMIC); - - label2 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2); - - tcg_gen_mov_tl(cpu_cc_src, t1); - tcg_gen_mov_tl(cpu_cc_dst, t0); - if (is_right) { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot); + gen_op_st_T0_A0(ot + s->mem_index); } else { - tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot); + gen_op_mov_reg_T0(ot, op1); } - gen_set_label(label2); - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); - tcg_temp_free(a0); + gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right); + tcg_temp_free(count); } static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) @@ -8401,7 +8367,6 @@ static inline void gen_intermediate_code_internal(CPUX86State *env, cpu_tmp2_i32 = tcg_temp_new_i32(); cpu_tmp3_i32 = tcg_temp_new_i32(); cpu_tmp4 = tcg_temp_new(); - cpu_tmp5 = tcg_temp_new(); cpu_ptr0 = tcg_temp_new_ptr(); cpu_ptr1 = tcg_temp_new_ptr(); cpu_cc_srcT = tcg_temp_local_new();