Skip to content

Commit

Permalink
target/arm: Convert Neon 'load single structure to all lanes' to deco…
Browse files Browse the repository at this point in the history
…detree

Convert the Neon "load single structure to all lanes" insns to
decodetree.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200430181003.21682-13-peter.maydell@linaro.org
  • Loading branch information
pm215 committed May 4, 2020
1 parent a27b463 commit 3698747
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 53 deletions.
5 changes: 5 additions & 0 deletions target/arm/neon-ls.decode
Expand Up @@ -34,3 +34,8 @@

VLDST_multiple 1111 0100 0 . l:1 0 rn:4 .... itype:4 size:2 align:2 rm:4 \
vd=%vd_dp

# Neon load single element to all lanes

VLD_all_lanes 1111 0100 1 . 1 0 rn:4 .... 11 n:2 size:2 t:1 a:1 rm:4 \
vd=%vd_dp
73 changes: 73 additions & 0 deletions target/arm/translate-neon.inc.c
Expand Up @@ -398,3 +398,76 @@ static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a)
gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8);
return true;
}

static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a)
{
/* Neon load single structure to all lanes */
int reg, stride, vec_size;
int vd = a->vd;
int size = a->size;
int nregs = a->n + 1;
TCGv_i32 addr, tmp;

if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return false;
}

/* UNDEF accesses to D16-D31 if they don't exist */
if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) {
return false;
}

if (size == 3) {
if (nregs != 4 || a->a == 0) {
return false;
}
/* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */
size = 2;
}
if (nregs == 1 && a->a == 1 && size == 0) {
return false;
}
if (nregs == 3 && a->a == 1) {
return false;
}

if (!vfp_access_check(s)) {
return true;
}

/*
* VLD1 to all lanes: T bit indicates how many Dregs to write.
* VLD2/3/4 to all lanes: T bit indicates register stride.
*/
stride = a->t ? 2 : 1;
vec_size = nregs == 1 ? stride * 8 : 8;

tmp = tcg_temp_new_i32();
addr = tcg_temp_new_i32();
load_reg_var(s, addr, a->rn);
for (reg = 0; reg < nregs; reg++) {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
s->be_data | size);
if ((vd & 1) && vec_size == 16) {
/*
* We cannot write 16 bytes at once because the
* destination is unaligned.
*/
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
8, 8, tmp);
tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0),
neon_reg_offset(vd, 0), 8, 8);
} else {
tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0),
vec_size, vec_size, tmp);
}
tcg_gen_addi_i32(addr, addr, 1 << size);
vd += stride;
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(addr);

gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs);

return true;
}
55 changes: 2 additions & 53 deletions target/arm/translate.c
Expand Up @@ -3224,7 +3224,6 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
int size;
int reg;
int load;
int vec_size;
TCGv_i32 addr;
TCGv_i32 tmp;

Expand Down Expand Up @@ -3254,58 +3253,8 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
} else {
size = (insn >> 10) & 3;
if (size == 3) {
/* Load single element to all lanes. */
int a = (insn >> 4) & 1;
if (!load) {
return 1;
}
size = (insn >> 6) & 3;
nregs = ((insn >> 8) & 3) + 1;

if (size == 3) {
if (nregs != 4 || a == 0) {
return 1;
}
/* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
size = 2;
}
if (nregs == 1 && a == 1 && size == 0) {
return 1;
}
if (nregs == 3 && a == 1) {
return 1;
}
addr = tcg_temp_new_i32();
load_reg_var(s, addr, rn);

/* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
* VLD2/3/4 to all lanes: bit 5 indicates register stride.
*/
stride = (insn & (1 << 5)) ? 2 : 1;
vec_size = nregs == 1 ? stride * 8 : 8;

tmp = tcg_temp_new_i32();
for (reg = 0; reg < nregs; reg++) {
gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
s->be_data | size);
if ((rd & 1) && vec_size == 16) {
/* We cannot write 16 bytes at once because the
* destination is unaligned.
*/
tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
8, 8, tmp);
tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
neon_reg_offset(rd, 0), 8, 8);
} else {
tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
vec_size, vec_size, tmp);
}
tcg_gen_addi_i32(addr, addr, 1 << size);
rd += stride;
}
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(addr);
stride = (1 << size) * nregs;
/* Load single element to all lanes -- handled by decodetree */
return 1;
} else {
/* Single element. */
int idx = (insn >> 4) & 0xf;
Expand Down

0 comments on commit 3698747

Please sign in to comment.