Skip to content

Commit

Permalink
Merge tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu into…
Browse files Browse the repository at this point in the history
… staging

tcg: Add write_aofs to GVecGen3i
tcg/i386: Simplify immediate 8-bit logical vector shifts
tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff
tcg/optimize: Optimize setcond with zmask
accel/tcg: Introduce CF_BP_PAGE
target/sh4: Update DisasContextBase.insn_start
gitlab: Drop --static from s390x linux-user build
gitlab: Streamline ubuntu-22.04-s390x

# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmY6OoAdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8FEwf7Bhs9bV2Kp4LxUzGq
# +dSHHc/WuCyIILLDQ4kZyXvILuI59wYhrWBUUTzBnAZ/tEf0oMG2y57F/lIcxz9w
# VvsFicMOhtjQ8iBEfl/rkkaYs9BLcxqMTAA3PxNBE6l3bzjcHSTkhey4MoPGRibn
# CkwaLzb2ebNjfgzC1IsNf/tyiMXl0tBQM7JVV4EztaOGEmqw8X0/PyVZDiC3WUNC
# tf9yqiNIlgGkn7rj3sT/rNdi4xlzQybgrb1MCFT6z5cqsW2bwqivRpxHi4yulHKI
# VhYA3kud+TX2ASukpibsSkA+9SbcH/qwOugPhPIu+KANsFUcVKL6Anzv6Ysl9kZ0
# +Wnbow==
# =FJCW
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 07 May 2024 07:28:16 AM PDT
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]

* tag 'pull-tcg-20240507' of https://gitlab.com/rth7680/qemu:
  gitlab: Streamline ubuntu-22.04-s390x
  gitlab: Drop --static from s390x linux-user build
  gitlab: Drop --disable-libssh from ubuntu-22.04-s390x.yml
  target/sh4: Update DisasContextBase.insn_start
  accel/tcg: Introduce CF_BP_PAGE
  tcg/optimize: Optimize setcond with zmask
  tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff
  tcg/i386: Simplify immediate 8-bit logical vector shifts
  tcg: Add write_aofs to GVecGen3i

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
rth7680 committed May 7, 2024
2 parents e116b92 + f578b66 commit 571882c
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 65 deletions.
16 changes: 8 additions & 8 deletions .gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# setup by the scripts/ci/setup/build-environment.yml task
# "Install basic packages to build QEMU on Ubuntu 22.04"

ubuntu-22.04-s390x-all-linux-static:
ubuntu-22.04-s390x-all-linux:
extends: .custom_runner_template
needs: []
stage: build
Expand All @@ -15,13 +15,13 @@ ubuntu-22.04-s390x-all-linux-static:
script:
- mkdir build
- cd build
- ../configure --enable-debug --static --disable-system
- ../configure --enable-debug --disable-system --disable-tools --disable-docs
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync check-tcg
- make --output-sync -j`nproc` check

ubuntu-22.04-s390x-all:
ubuntu-22.04-s390x-all-system:
extends: .custom_runner_template
needs: []
stage: build
Expand All @@ -35,7 +35,7 @@ ubuntu-22.04-s390x-all:
script:
- mkdir build
- cd build
- ../configure --disable-libssh
- ../configure --disable-user
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync -j`nproc` check
Expand All @@ -57,7 +57,7 @@ ubuntu-22.04-s390x-alldbg:
script:
- mkdir build
- cd build
- ../configure --enable-debug --disable-libssh
- ../configure --enable-debug
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make clean
- make --output-sync -j`nproc`
Expand All @@ -80,7 +80,7 @@ ubuntu-22.04-s390x-clang:
script:
- mkdir build
- cd build
- ../configure --disable-libssh --cc=clang --cxx=clang++ --enable-sanitizers
- ../configure --cc=clang --cxx=clang++ --enable-sanitizers
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync -j`nproc` check
Expand All @@ -101,7 +101,7 @@ ubuntu-22.04-s390x-tci:
script:
- mkdir build
- cd build
- ../configure --disable-libssh --enable-tcg-interpreter
- ../configure --enable-tcg-interpreter
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`

Expand All @@ -122,7 +122,7 @@ ubuntu-22.04-s390x-notcg:
script:
- mkdir build
- cd build
- ../configure --disable-libssh --disable-tcg
- ../configure --disable-tcg
|| { cat config.log meson-logs/meson-log.txt; exit 1; }
- make --output-sync -j`nproc`
- make --output-sync -j`nproc` check
2 changes: 1 addition & 1 deletion accel/tcg/cpu-exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
* breakpoints are removed.
*/
if (match_page) {
*cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
*cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | CF_BP_PAGE | 1;
}
return false;
}
Expand Down
1 change: 1 addition & 0 deletions include/exec/translation-block.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct TranslationBlock {
#define CF_PARALLEL 0x00008000 /* Generate code for a parallel context */
#define CF_NOIRQ 0x00010000 /* Generate an uninterruptible TB */
#define CF_PCREL 0x00020000 /* Opcodes in TB are PC-relative */
#define CF_BP_PAGE 0x00040000 /* Breakpoint present in code page */
#define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
#define CF_CLUSTER_SHIFT 24

Expand Down
2 changes: 2 additions & 0 deletions include/tcg/tcg-op-gvec-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ typedef struct {
bool prefer_i64;
/* Load dest as a 3rd source operand. */
bool load_dest;
/* Write aofs as a 2nd dest operand. */
bool write_aofs;
} GVecGen3i;

typedef struct {
Expand Down
1 change: 1 addition & 0 deletions target/sh4/translate.c
Original file line number Diff line number Diff line change
Expand Up @@ -2189,6 +2189,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
*/
for (i = 1; i < max_insns; ++i) {
tcg_gen_insn_start(pc + i * 2, ctx->envflags);
ctx->base.insn_start = tcg_last_op();
}
}
#endif
Expand Down
76 changes: 28 additions & 48 deletions tcg/i386/tcg-target.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -1658,6 +1658,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
TCGArg dest, TCGArg arg1, TCGArg arg2,
int const_arg2, bool neg)
{
int cmp_rexw = rexw;
bool inv = false;
bool cleared;
int jcc;
Expand All @@ -1674,6 +1675,18 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
}
break;

case TCG_COND_TSTNE:
inv = true;
/* fall through */
case TCG_COND_TSTEQ:
/* If arg2 is -1, convert to LTU/GEU vs 1. */
if (const_arg2 && arg2 == 0xffffffffu) {
arg2 = 1;
cmp_rexw = 0;
goto do_ltu;
}
break;

case TCG_COND_LEU:
inv = true;
/* fall through */
Expand All @@ -1697,7 +1710,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
* We can then use NEG or INC to produce the desired result.
* This is always smaller than the SETCC expansion.
*/
tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, rexw);
tcg_out_cmp(s, TCG_COND_LTU, arg1, arg2, const_arg2, cmp_rexw);

/* X - X - C = -C = (C ? -1 : 0) */
tgen_arithr(s, ARITH_SBB + (neg ? rexw : 0), dest, dest);
Expand Down Expand Up @@ -1744,7 +1757,7 @@ static void tcg_out_setcond(TCGContext *s, int rexw, TCGCond cond,
cleared = true;
}

jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, rexw);
jcc = tcg_out_cmp(s, cond, arg1, arg2, const_arg2, cmp_rexw);
tcg_out_modrm(s, OPC_SETCC | jcc, 0, dest);

if (!cleared) {
Expand Down Expand Up @@ -3769,49 +3782,20 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}

static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
static void expand_vec_shi(TCGType type, unsigned vece, bool right,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
TCGv_vec t1, t2;
uint8_t mask;

tcg_debug_assert(vece == MO_8);

t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);

/*
* Unpack to W, shift, and repack. Tricky bits:
* (1) Use punpck*bw x,x to produce DDCCBBAA,
* i.e. duplicate in other half of the 16-bit lane.
* (2) For right-shift, add 8 so that the high half of the lane
* becomes zero. For left-shift, and left-rotate, we must
* shift up and down again.
* (3) Step 2 leaves high half zero such that PACKUSWB
* (pack with unsigned saturation) does not modify
* the quantity.
*/
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));

if (opc != INDEX_op_rotli_vec) {
imm += 8;
}
if (opc == INDEX_op_shri_vec) {
tcg_gen_shri_vec(MO_16, t1, t1, imm);
tcg_gen_shri_vec(MO_16, t2, t2, imm);
if (right) {
mask = 0xff >> imm;
tcg_gen_shri_vec(MO_16, v0, v1, imm);
} else {
tcg_gen_shli_vec(MO_16, t1, t1, imm);
tcg_gen_shli_vec(MO_16, t2, t2, imm);
tcg_gen_shri_vec(MO_16, t1, t1, 8);
tcg_gen_shri_vec(MO_16, t2, t2, 8);
mask = 0xff << imm;
tcg_gen_shli_vec(MO_16, v0, v1, imm);
}

vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
tcg_temp_free_vec(t1);
tcg_temp_free_vec(t2);
tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask));
}

static void expand_vec_sari(TCGType type, unsigned vece,
Expand All @@ -3821,7 +3805,7 @@ static void expand_vec_sari(TCGType type, unsigned vece,

switch (vece) {
case MO_8:
/* Unpack to W, shift, and repack, as in expand_vec_shi. */
/* Unpack to 16-bit, shift, and repack. */
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
Expand Down Expand Up @@ -3874,12 +3858,7 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
{
TCGv_vec t;

if (vece == MO_8) {
expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
return;
}

if (have_avx512vbmi2) {
if (vece != MO_8 && have_avx512vbmi2) {
vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
return;
Expand Down Expand Up @@ -4155,10 +4134,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,

switch (opc) {
case INDEX_op_shli_vec:
expand_vec_shi(type, vece, false, v0, v1, a2);
break;
case INDEX_op_shri_vec:
expand_vec_shi(type, vece, opc, v0, v1, a2);
expand_vec_shi(type, vece, true, v0, v1, a2);
break;

case INDEX_op_sari_vec:
expand_vec_sari(type, vece, v0, v1, a2);
break;
Expand Down
110 changes: 110 additions & 0 deletions tcg/optimize.c
Original file line number Diff line number Diff line change
Expand Up @@ -2099,6 +2099,108 @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
return false;
}

static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
{
uint64_t a_zmask, b_val;
TCGCond cond;

if (!arg_is_const(op->args[2])) {
return false;
}

a_zmask = arg_info(op->args[1])->z_mask;
b_val = arg_info(op->args[2])->val;
cond = op->args[3];

if (ctx->type == TCG_TYPE_I32) {
a_zmask = (uint32_t)a_zmask;
b_val = (uint32_t)b_val;
}

/*
* A with only low bits set vs B with high bits set means that A < B.
*/
if (a_zmask < b_val) {
bool inv = false;

switch (cond) {
case TCG_COND_NE:
case TCG_COND_LEU:
case TCG_COND_LTU:
inv = true;
/* fall through */
case TCG_COND_GTU:
case TCG_COND_GEU:
case TCG_COND_EQ:
return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
default:
break;
}
}

/*
* A with only lsb set is already boolean.
*/
if (a_zmask <= 1) {
bool convert = false;
bool inv = false;

switch (cond) {
case TCG_COND_EQ:
inv = true;
/* fall through */
case TCG_COND_NE:
convert = (b_val == 0);
break;
case TCG_COND_LTU:
case TCG_COND_TSTEQ:
inv = true;
/* fall through */
case TCG_COND_GEU:
case TCG_COND_TSTNE:
convert = (b_val == 1);
break;
default:
break;
}
if (convert) {
TCGOpcode add_opc, xor_opc, neg_opc;

if (!inv && !neg) {
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
}

switch (ctx->type) {
case TCG_TYPE_I32:
add_opc = INDEX_op_add_i32;
neg_opc = INDEX_op_neg_i32;
xor_opc = INDEX_op_xor_i32;
break;
case TCG_TYPE_I64:
add_opc = INDEX_op_add_i64;
neg_opc = INDEX_op_neg_i64;
xor_opc = INDEX_op_xor_i64;
break;
default:
g_assert_not_reached();
}

if (!inv) {
op->opc = neg_opc;
} else if (neg) {
op->opc = add_opc;
op->args[2] = arg_new_constant(ctx, -1);
} else {
op->opc = xor_opc;
op->args[2] = arg_new_constant(ctx, 1);
}
return false;
}
}

return false;
}

static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
{
TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
Expand Down Expand Up @@ -2200,6 +2302,10 @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
}

if (fold_setcond_zmask(ctx, op, false)) {
return true;
}
fold_setcond_tst_pow2(ctx, op, false);

ctx->z_mask = 1;
Expand All @@ -2214,6 +2320,10 @@ static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
if (i >= 0) {
return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
}

if (fold_setcond_zmask(ctx, op, true)) {
return true;
}
fold_setcond_tst_pow2(ctx, op, true);

/* Value is {0,-1} so all bits are repetitions of the sign. */
Expand Down

0 comments on commit 571882c

Please sign in to comment.