Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190513' int…
Browse files Browse the repository at this point in the history
…o staging

Improve code generation for vector duplication.
Add vector expansions for shifts by non-constant scalar.
Add vector expansions for shifts by vector.
Add integer and vector expansions for absolute value.
Several patches in preparation for Altivec.
Bug fix for tcg/aarch64 vs min/max.

# gpg: Signature made Tue 14 May 2019 00:58:02 BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20190513: (31 commits)
  tcg/aarch64: Do not advertise minmax for MO_64
  target/xtensa: Use tcg_gen_abs_i32
  target/tricore: Use tcg_gen_abs_tl
  target/s390x: Use tcg_gen_abs_i64
  target/ppc: Use tcg_gen_abs_tl
  target/ppc: Use tcg_gen_abs_i32
  target/cris: Use tcg_gen_abs_tl
  target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs
  tcg/aarch64: Support vector absolute value
  tcg/i386: Support vector absolute value
  tcg: Add support for vector absolute value
  tcg: Add support for integer absolute value
  tcg/i386: Support vector scalar shift opcodes
  tcg: Add gvec expanders for vector shift by scalar
  tcg/aarch64: Support vector variable shift opcodes
  tcg/i386: Support vector variable shift opcodes
  tcg: Add gvec expanders for variable shift
  tcg: Add INDEX_op_dupm_vec
  tcg/aarch64: Implement tcg_out_dupm_vec
  tcg/i386: Implement tcg_out_dupm_vec
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed May 14, 2019
2 parents e24f44d + a7b6d28 commit e329ad2
Show file tree
Hide file tree
Showing 36 changed files with 2,020 additions and 473 deletions.
192 changes: 192 additions & 0 deletions accel/tcg/tcg-runtime-gvec.c
Expand Up @@ -398,6 +398,54 @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(int8_t)) {
int8_t aa = *(int8_t *)(a + i);
*(int8_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(int16_t)) {
int16_t aa = *(int16_t *)(a + i);
*(int16_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(int32_t)) {
int32_t aa = *(int32_t *)(a + i);
*(int32_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(int64_t)) {
int64_t aa = *(int64_t *)(a + i);
*(int64_t *)(d + i) = aa < 0 ? -aa : aa;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
Expand Down Expand Up @@ -725,6 +773,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(vec8)) {
uint8_t sh = *(uint8_t *)(b + i) & 7;
*(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(int16_t)) {
uint8_t sh = *(uint16_t *)(b + i) & 15;
*(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(vec32)) {
uint8_t sh = *(uint32_t *)(b + i) & 31;
*(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
intptr_t i;

for (i = 0; i < oprsz; i += sizeof(vec64)) {
uint8_t sh = *(uint64_t *)(b + i) & 63;
*(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
}
clear_high(d, oprsz, desc);
}

/* If vectors are enabled, the compiler fills in -1 for true.
Otherwise, we must take care of this by hand. */
#ifdef CONFIG_VECTOR16
Expand Down
20 changes: 20 additions & 0 deletions accel/tcg/tcg-runtime.h
Expand Up @@ -225,6 +225,11 @@ DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(gvec_abs8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
Expand Down Expand Up @@ -254,6 +259,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
Expand Down
2 changes: 0 additions & 2 deletions target/arm/helper.h
Expand Up @@ -352,8 +352,6 @@ DEF_HELPER_2(neon_ceq_u8, i32, i32, i32)
DEF_HELPER_2(neon_ceq_u16, i32, i32, i32)
DEF_HELPER_2(neon_ceq_u32, i32, i32, i32)

DEF_HELPER_1(neon_abs_s8, i32, i32)
DEF_HELPER_1(neon_abs_s16, i32, i32)
DEF_HELPER_1(neon_clz_u8, i32, i32)
DEF_HELPER_1(neon_clz_u16, i32, i32)
DEF_HELPER_1(neon_cls_s8, i32, i32)
Expand Down
5 changes: 0 additions & 5 deletions target/arm/neon_helper.c
Expand Up @@ -1228,11 +1228,6 @@ NEON_VOP(ceq_u16, neon_u16, 2)
NEON_VOP(ceq_u32, neon_u32, 1)
#undef NEON_FN

#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
NEON_VOP1(abs_s8, neon_s8, 4)
NEON_VOP1(abs_s16, neon_s16, 2)
#undef NEON_FN

/* Count Leading Sign/Zero Bits. */
static inline int do_clz8(uint8_t x)
{
Expand Down
41 changes: 5 additions & 36 deletions target/arm/translate-a64.c
Expand Up @@ -9468,11 +9468,7 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
if (u) {
tcg_gen_neg_i64(tcg_rd, tcg_rn);
} else {
TCGv_i64 tcg_zero = tcg_const_i64(0);
tcg_gen_neg_i64(tcg_rd, tcg_rn);
tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
tcg_rn, tcg_rd);
tcg_temp_free_i64(tcg_zero);
tcg_gen_abs_i64(tcg_rd, tcg_rn);
}
break;
case 0x2f: /* FABS */
Expand Down Expand Up @@ -12366,11 +12362,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0xb:
if (u) { /* NEG */
if (u) { /* ABS, NEG */
gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
return;
} else {
gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
}
break;
return;
}

if (size == 3) {
Expand Down Expand Up @@ -12438,17 +12435,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
}
break;
case 0xb: /* ABS, NEG */
if (u) {
tcg_gen_neg_i32(tcg_res, tcg_op);
} else {
TCGv_i32 tcg_zero = tcg_const_i32(0);
tcg_gen_neg_i32(tcg_res, tcg_op);
tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
tcg_zero, tcg_op, tcg_res);
tcg_temp_free_i32(tcg_zero);
}
break;
case 0x2f: /* FABS */
gen_helper_vfp_abss(tcg_res, tcg_op);
break;
Expand Down Expand Up @@ -12561,23 +12547,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
tcg_temp_free_i32(tcg_zero);
break;
}
case 0xb: /* ABS, NEG */
if (u) {
TCGv_i32 tcg_zero = tcg_const_i32(0);
if (size) {
gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
} else {
gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
}
tcg_temp_free_i32(tcg_zero);
} else {
if (size) {
gen_helper_neon_abs_s16(tcg_res, tcg_op);
} else {
gen_helper_neon_abs_s8(tcg_res, tcg_op);
}
}
break;
case 0x4: /* CLS, CLZ */
if (u) {
if (size == 0) {
Expand Down
9 changes: 5 additions & 4 deletions target/arm/translate-sve.c
Expand Up @@ -3302,29 +3302,30 @@ static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a)

static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a)
{
static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 };
static const GVecGen2s op[4] = {
{ .fni8 = tcg_gen_vec_sub8_i64,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_b,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.vece = MO_8,
.scalar_first = true },
{ .fni8 = tcg_gen_vec_sub16_i64,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_h,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.vece = MO_16,
.scalar_first = true },
{ .fni4 = tcg_gen_sub_i32,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_s,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.vece = MO_32,
.scalar_first = true },
{ .fni8 = tcg_gen_sub_i64,
.fniv = tcg_gen_sub_vec,
.fno = gen_helper_sve_subri_d,
.opc = INDEX_op_sub_vec,
.opt_opc = vecop_list,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.vece = MO_64,
.scalar_first = true }
Expand Down

0 comments on commit e329ad2

Please sign in to comment.