Skip to content

Commit

Permalink
target/arm: Convert Neon 2-reg-misc pairwise ops to decodetree
Browse files Browse the repository at this point in the history
Convert the pairwise ops VPADDL and VPADAL in the 2-reg-misc grouping
to decodetree.

At this point we can get rid of the weird CPU_V001 #define that was
used to avoid having to explicitly list all the arguments being
passed to some TCG gen/helper functions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200616170844.13318-3-peter.maydell@linaro.org
  • Loading branch information
pm215 committed Jun 23, 2020
1 parent 353d2b8 commit 6106af3
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 33 deletions.
6 changes: 6 additions & 0 deletions target/arm/neon-dp.decode
Expand Up @@ -441,6 +441,12 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
&2misc vm=%vm_dp vd=%vd_dp

VREV64 1111 001 11 . 11 .. 00 .... 0 0000 . . 0 .... @2misc

VPADDL_S 1111 001 11 . 11 .. 00 .... 0 0100 . . 0 .... @2misc
VPADDL_U 1111 001 11 . 11 .. 00 .... 0 0101 . . 0 .... @2misc

VPADAL_S 1111 001 11 . 11 .. 00 .... 0 1100 . . 0 .... @2misc
VPADAL_U 1111 001 11 . 11 .. 00 .... 0 1101 . . 0 .... @2misc
]

# Subgroup for size != 0b11
Expand Down
149 changes: 149 additions & 0 deletions target/arm/translate-neon.inc.c
Expand Up @@ -3020,3 +3020,152 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a)
}
return true;
}

static bool do_2misc_pairwise(DisasContext *s, arg_2misc *a,
NeonGenWidenFn *widenfn,
NeonGenTwo64OpFn *opfn,
NeonGenTwo64OpFn *accfn)
{
/*
* Pairwise long operations: widen both halves of the pair,
* combine the pairs with the opfn, and then possibly accumulate
* into the destination with the accfn.
*/
int pass;

if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}

/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}

if ((a->vd | a->vm) & a->q) {
return false;
}

if (!widenfn) {
return false;
}

if (!vfp_access_check(s)) {
return true;
}

for (pass = 0; pass < a->q + 1; pass++) {
TCGv_i32 tmp;
TCGv_i64 rm0_64, rm1_64, rd_64;

rm0_64 = tcg_temp_new_i64();
rm1_64 = tcg_temp_new_i64();
rd_64 = tcg_temp_new_i64();
tmp = neon_load_reg(a->vm, pass * 2);
widenfn(rm0_64, tmp);
tcg_temp_free_i32(tmp);
tmp = neon_load_reg(a->vm, pass * 2 + 1);
widenfn(rm1_64, tmp);
tcg_temp_free_i32(tmp);
opfn(rd_64, rm0_64, rm1_64);
tcg_temp_free_i64(rm0_64);
tcg_temp_free_i64(rm1_64);

if (accfn) {
TCGv_i64 tmp64 = tcg_temp_new_i64();
neon_load_reg64(tmp64, a->vd + pass);
accfn(rd_64, tmp64, rd_64);
tcg_temp_free_i64(tmp64);
}
neon_store_reg64(rd_64, a->vd + pass);
tcg_temp_free_i64(rd_64);
}
return true;
}

static bool trans_VPADDL_S(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};

return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
}

static bool trans_VPADDL_U(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};

return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size], NULL);
}

static bool trans_VPADAL_S(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_s8,
gen_helper_neon_widen_s16,
tcg_gen_ext_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
static NeonGenTwo64OpFn * const accfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_add_i64,
NULL,
};

return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
accfn[a->size]);
}

static bool trans_VPADAL_U(DisasContext *s, arg_2misc *a)
{
static NeonGenWidenFn * const widenfn[] = {
gen_helper_neon_widen_u8,
gen_helper_neon_widen_u16,
tcg_gen_extu_i32_i64,
NULL,
};
static NeonGenTwo64OpFn * const opfn[] = {
gen_helper_neon_paddl_u16,
gen_helper_neon_paddl_u32,
tcg_gen_add_i64,
NULL,
};
static NeonGenTwo64OpFn * const accfn[] = {
gen_helper_neon_addl_u16,
gen_helper_neon_addl_u32,
tcg_gen_add_i64,
NULL,
};

return do_2misc_pairwise(s, a, widenfn[a->size], opfn[a->size],
accfn[a->size]);
}
35 changes: 2 additions & 33 deletions target/arm/translate.c
Expand Up @@ -2934,8 +2934,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
gen_rfe(s, pc, load_cpu_field(spsr));
}

#define CPU_V001 cpu_V0, cpu_V0, cpu_V1

static int gen_neon_unzip(int rd, int rm, int size, int q)
{
TCGv_ptr pd, pm;
Expand Down Expand Up @@ -3117,16 +3115,6 @@ static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
tcg_temp_free_i32(src);
}

static inline void gen_neon_addl(int size)
{
switch (size) {
case 0: gen_helper_neon_addl_u16(CPU_V001); break;
case 1: gen_helper_neon_addl_u32(CPU_V001); break;
case 2: tcg_gen_add_i64(CPU_V001); break;
default: abort();
}
}

static void gen_neon_narrow_op(int op, int u, int size,
TCGv_i32 dest, TCGv_i64 src)
{
Expand Down Expand Up @@ -5092,29 +5080,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
switch (op) {
case NEON_2RM_VREV64:
/* handled by decodetree */
return 1;
case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
for (pass = 0; pass < q + 1; pass++) {
tmp = neon_load_reg(rm, pass * 2);
gen_neon_widen(cpu_V0, tmp, size, op & 1);
tmp = neon_load_reg(rm, pass * 2 + 1);
gen_neon_widen(cpu_V1, tmp, size, op & 1);
switch (size) {
case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
case 2: tcg_gen_add_i64(CPU_V001); break;
default: abort();
}
if (op >= NEON_2RM_VPADAL) {
/* Accumulate. */
neon_load_reg64(cpu_V1, rd + pass);
gen_neon_addl(size);
}
neon_store_reg64(cpu_V0, rd + pass);
}
break;
/* handled by decodetree */
return 1;
case NEON_2RM_VTRN:
if (size == 2) {
int n;
Expand Down

0 comments on commit 6106af3

Please sign in to comment.