Skip to content

Commit

Permalink
target/arm: Convert the VCVT-to-f16 insns to decodetree
Browse files Browse the repository at this point in the history
Convert the VCVTT and VCVTB instructions which convert from
f32 and f64 to f16 to decodetree.

Since we're no longer constrained to the old decoder's style
using cpu_F0s and cpu_F0d we can perform a direct 16 bit
store of the right half of the input single-precision register
rather than doing a load/modify/store sequence on the full
32 bits.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
pm215 committed Jun 13, 2019
1 parent b623d80 commit cdfd14e
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 78 deletions.
62 changes: 62 additions & 0 deletions target/arm/translate-vfp.inc.c
Expand Up @@ -2095,3 +2095,65 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
tcg_temp_free_i64(vd);
return true;
}

static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
{
TCGv_ptr fpst;
TCGv_i32 ahp_mode;
TCGv_i32 tmp;

if (!dc_isar_feature(aa32_fp16_spconv, s)) {
return false;
}

if (!vfp_access_check(s)) {
return true;
}

fpst = get_fpstatus_ptr(false);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();

neon_load_reg32(tmp, a->vm);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode);
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
tcg_temp_free_i32(ahp_mode);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
}

static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
{
TCGv_ptr fpst;
TCGv_i32 ahp_mode;
TCGv_i32 tmp;
TCGv_i64 vm;

if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
return false;
}

/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_fp_d32, s) && (a->vm & 0x10)) {
return false;
}

if (!vfp_access_check(s)) {
return true;
}

fpst = get_fpstatus_ptr(false);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
vm = tcg_temp_new_i64();

neon_load_reg64(vm, a->vm);
gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode);
tcg_temp_free_i64(vm);
tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t));
tcg_temp_free_i32(ahp_mode);
tcg_temp_free_ptr(fpst);
tcg_temp_free_i32(tmp);
return true;
}
79 changes: 1 addition & 78 deletions target/arm/translate.c
Expand Up @@ -2963,20 +2963,6 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn)
#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)

/* Move between integer and VFP cores. */
static TCGv_i32 gen_vfp_mrs(void)
{
TCGv_i32 tmp = tcg_temp_new_i32();
tcg_gen_mov_i32(tmp, cpu_F0s);
return tmp;
}

static void gen_vfp_msr(TCGv_i32 tmp)
{
tcg_gen_mov_i32(cpu_F0s, tmp);
tcg_temp_free_i32(tmp);
}

static void gen_neon_dup_low16(TCGv_i32 var)
{
TCGv_i32 tmp = tcg_temp_new_i32();
Expand All @@ -3003,8 +2989,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
{
uint32_t rd, rn, rm, op, delta_d, delta_m, bank_mask;
int dp, veclen;
TCGv_i32 tmp;
TCGv_i32 tmp2;

if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
return 1;
Expand Down Expand Up @@ -3066,8 +3050,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
return 1;
case 15:
switch (rn) {
case 0 ... 5:
case 8 ... 11:
case 0 ... 11:
/* Already handled by decodetree */
return 1;
default:
Expand All @@ -3080,20 +3063,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
if (op == 15) {
/* rn is opcode, encoded as per VFP_SREG_N. */
switch (rn) {
case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
if (dp) {
if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
return 1;
}
} else {
if (!dc_isar_feature(aa32_fp16_spconv, s)) {
return 1;
}
}
rd_is_dp = false;
break;

case 0x0c: /* vrintr */
case 0x0d: /* vrintz */
case 0x0e: /* vrintx */
Expand Down Expand Up @@ -3221,52 +3190,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
switch (op) {
case 15: /* extension space */
switch (rn) {
case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
{
TCGv_ptr fpst = get_fpstatus_ptr(false);
TCGv_i32 ahp = get_ahp_flag();
tmp = tcg_temp_new_i32();

if (dp) {
gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
fpst, ahp);
} else {
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
fpst, ahp);
}
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
gen_mov_F0_vreg(0, rd);
tmp2 = gen_vfp_mrs();
tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
tcg_gen_or_i32(tmp, tmp, tmp2);
tcg_temp_free_i32(tmp2);
gen_vfp_msr(tmp);
break;
}
case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
{
TCGv_ptr fpst = get_fpstatus_ptr(false);
TCGv_i32 ahp = get_ahp_flag();
tmp = tcg_temp_new_i32();
if (dp) {
gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
fpst, ahp);
} else {
gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
fpst, ahp);
}
tcg_temp_free_i32(ahp);
tcg_temp_free_ptr(fpst);
tcg_gen_shli_i32(tmp, tmp, 16);
gen_mov_F0_vreg(0, rd);
tmp2 = gen_vfp_mrs();
tcg_gen_ext16u_i32(tmp2, tmp2);
tcg_gen_or_i32(tmp, tmp, tmp2);
tcg_temp_free_i32(tmp2);
gen_vfp_msr(tmp);
break;
}
case 12: /* vrintr */
{
TCGv_ptr fpst = get_fpstatus_ptr(0);
Expand Down
6 changes: 6 additions & 0 deletions target/arm/vfp.decode
Expand Up @@ -187,3 +187,9 @@ VCVT_f32_f16 ---- 1110 1.11 0010 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f64_f16 ---- 1110 1.11 0010 .... 1011 t:1 1.0 .... \
vd=%vd_dp vm=%vm_sp

# VCVTB and VCVTT to f16: Vd format is always vd_sp; Vm format depends on size bit
VCVT_f16_f32 ---- 1110 1.11 0011 .... 1010 t:1 1.0 .... \
vd=%vd_sp vm=%vm_sp
VCVT_f16_f64 ---- 1110 1.11 0011 .... 1011 t:1 1.0 .... \
vd=%vd_sp vm=%vm_dp

0 comments on commit cdfd14e

Please sign in to comment.