Skip to content

Commit

Permalink
target/arm: Convert Neon VCVT f16/f32 insns to decodetree
Browse files Browse the repository at this point in the history
Convert the Neon insns in the 2-reg-misc group which are
VCVT between f32 and f16 to decodetree.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200616170844.13318-7-peter.maydell@linaro.org
  • Loading branch information
pm215 committed Jun 23, 2020
1 parent 749e2be commit 654a517
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 62 deletions.
3 changes: 3 additions & 0 deletions target/arm/neon-dp.decode
Expand Up @@ -461,6 +461,9 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
VQMOVN_U 1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0

VSHLL 1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0

VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
]

# Subgroup for size != 0b11
Expand Down
96 changes: 96 additions & 0 deletions target/arm/translate-neon.inc.c
Expand Up @@ -3354,3 +3354,99 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
tcg_temp_free_i32(rm1);
return true;
}

static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
{
TCGv_ptr fpst;
TCGv_i32 ahp, tmp, tmp2, tmp3;

if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!dc_isar_feature(aa32_fp16_spconv, s)) {
return false;
}

/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}

if ((a->vm & 1) || (a->size != 1)) {
return false;
}

if (!vfp_access_check(s)) {
return true;
}

fpst = get_fpstatus_ptr(true);
ahp = get_ahp_flag();
tmp = neon_load_reg(a->vm, 0);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
tmp2 = neon_load_reg(a->vm, 1);
gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
tcg_gen_shli_i32(tmp2, tmp2, 16);
tcg_gen_or_i32(tmp2, tmp2, tmp);
tcg_temp_free_i32(tmp);
tmp = neon_load_reg(a->vm, 2);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
tmp3 = neon_load_reg(a->vm, 3);
neon_store_reg(a->vd, 0, tmp2);
gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
tcg_gen_shli_i32(tmp3, tmp3, 16);
tcg_gen_or_i32(tmp3, tmp3, tmp);
neon_store_reg(a->vd, 1, tmp3);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);

return true;
}

static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
{
TCGv_ptr fpst;
TCGv_i32 ahp, tmp, tmp2, tmp3;

if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!dc_isar_feature(aa32_fp16_spconv, s)) {
return false;
}

/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}

if ((a->vd & 1) || (a->size != 1)) {
return false;
}

if (!vfp_access_check(s)) {
return true;
}

fpst = get_fpstatus_ptr(true);
ahp = get_ahp_flag();
tmp3 = tcg_temp_new_i32();
tmp = neon_load_reg(a->vm, 0);
tmp2 = neon_load_reg(a->vm, 1);
tcg_gen_ext16u_i32(tmp3, tmp);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
neon_store_reg(a->vd, 0, tmp3);
tcg_gen_shri_i32(tmp, tmp, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
neon_store_reg(a->vd, 1, tmp);
tmp3 = tcg_temp_new_i32();
tcg_gen_ext16u_i32(tmp3, tmp2);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
neon_store_reg(a->vd, 2, tmp3);
tcg_gen_shri_i32(tmp2, tmp2, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
neon_store_reg(a->vd, 3, tmp2);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);

return true;
}
65 changes: 3 additions & 62 deletions target/arm/translate.c
Expand Up @@ -4860,7 +4860,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int pass;
int u;
int vec_size;
TCGv_i32 tmp, tmp2, tmp3;
TCGv_i32 tmp, tmp2;

if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
Expand Down Expand Up @@ -4927,6 +4927,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_2RM_VZIP:
case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
case NEON_2RM_VSHLL:
case NEON_2RM_VCVT_F16_F32:
case NEON_2RM_VCVT_F32_F16:
/* handled by decodetree */
return 1;
case NEON_2RM_VTRN:
Expand All @@ -4942,67 +4944,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
goto elementwise;
}
break;
case NEON_2RM_VCVT_F16_F32:
{
TCGv_ptr fpst;
TCGv_i32 ahp;

if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rm & 1)) {
return 1;
}
fpst = get_fpstatus_ptr(true);
ahp = get_ahp_flag();
tmp = neon_load_reg(rm, 0);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
tmp2 = neon_load_reg(rm, 1);
gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
tcg_gen_shli_i32(tmp2, tmp2, 16);
tcg_gen_or_i32(tmp2, tmp2, tmp);
tcg_temp_free_i32(tmp);
tmp = neon_load_reg(rm, 2);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
tmp3 = neon_load_reg(rm, 3);
neon_store_reg(rd, 0, tmp2);
gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
tcg_gen_shli_i32(tmp3, tmp3, 16);
tcg_gen_or_i32(tmp3, tmp3, tmp);
neon_store_reg(rd, 1, tmp3);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
break;
}
case NEON_2RM_VCVT_F32_F16:
{
TCGv_ptr fpst;
TCGv_i32 ahp;
if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rd & 1)) {
return 1;
}
fpst = get_fpstatus_ptr(true);
ahp = get_ahp_flag();
tmp3 = tcg_temp_new_i32();
tmp = neon_load_reg(rm, 0);
tmp2 = neon_load_reg(rm, 1);
tcg_gen_ext16u_i32(tmp3, tmp);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
neon_store_reg(rd, 0, tmp3);
tcg_gen_shri_i32(tmp, tmp, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
neon_store_reg(rd, 1, tmp);
tmp3 = tcg_temp_new_i32();
tcg_gen_ext16u_i32(tmp3, tmp2);
gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
neon_store_reg(rd, 2, tmp3);
tcg_gen_shri_i32(tmp2, tmp2, 16);
gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
neon_store_reg(rd, 3, tmp2);
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
break;
}
case NEON_2RM_AESE: case NEON_2RM_AESMC:
if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
return 1;
Expand Down

0 comments on commit 654a517

Please sign in to comment.