Skip to content

Commit

Permalink
target-arm: A64: Implement plain vector SIMD indexed element insns
Browse files Browse the repository at this point in the history
Implement all the SIMD vector x indexed element instructions
in the subcategory which are not 'long' ops.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
  • Loading branch information
pm215 committed Feb 20, 2014
1 parent 8731690 commit f5e51e7
Show file tree
Hide file tree
Showing 3 changed files with 275 additions and 1 deletion.
26 changes: 26 additions & 0 deletions target-arm/helper-a64.c
Expand Up @@ -123,6 +123,32 @@ uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
return float_rel_to_flags(float64_compare(x, y, fp_status));
}

float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
{
float_status *fpst = fpstp;

if ((float32_is_zero(a) && float32_is_infinity(b)) ||
(float32_is_infinity(a) && float32_is_zero(b))) {
/* 2.0 with the sign bit set to sign(A) XOR sign(B) */
return make_float32((1U << 30) |
((float32_val(a) ^ float32_val(b)) & (1U << 31)));
}
return float32_mul(a, b, fpst);
}

float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
{
float_status *fpst = fpstp;

if ((float64_is_zero(a) && float64_is_infinity(b)) ||
(float64_is_infinity(a) && float64_is_zero(b))) {
/* 2.0 with the sign bit set to sign(A) XOR sign(B) */
return make_float64((1ULL << 62) |
((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
}
return float64_mul(a, b, fpst);
}

uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
uint32_t rn, uint32_t numregs)
{
Expand Down
2 changes: 2 additions & 0 deletions target-arm/helper-a64.h
Expand Up @@ -27,3 +27,5 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
248 changes: 247 additions & 1 deletion target-arm/translate-a64.c
Expand Up @@ -7813,7 +7813,253 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
*/
static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
{
unsupported_encoding(s, insn);
/* This encoding has two kinds of instruction:
* normal, where we perform elt x idxelt => elt for each
* element in the vector
* long, where we perform elt x idxelt and generate a result of
* double the width of the input element
* The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
*/
bool is_q = extract32(insn, 30, 1);
bool u = extract32(insn, 29, 1);
int size = extract32(insn, 22, 2);
int l = extract32(insn, 21, 1);
int m = extract32(insn, 20, 1);
/* Note that the Rm field here is only 4 bits, not 5 as it usually is */
int rm = extract32(insn, 16, 4);
int opcode = extract32(insn, 12, 4);
int h = extract32(insn, 11, 1);
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
bool is_long = false;
bool is_fp = false;
int index;
TCGv_ptr fpst;

switch (opcode) {
case 0x0: /* MLA */
case 0x4: /* MLS */
if (!u) {
unallocated_encoding(s);
return;
}
break;
case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
is_long = true;
break;
case 0x3: /* SQDMLAL, SQDMLAL2 */
case 0x7: /* SQDMLSL, SQDMLSL2 */
case 0xb: /* SQDMULL, SQDMULL2 */
is_long = true;
/* fall through */
case 0xc: /* SQDMULH */
case 0xd: /* SQRDMULH */
case 0x8: /* MUL */
if (u) {
unallocated_encoding(s);
return;
}
break;
case 0x1: /* FMLA */
case 0x5: /* FMLS */
if (u) {
unallocated_encoding(s);
return;
}
/* fall through */
case 0x9: /* FMUL, FMULX */
if (!extract32(size, 1, 1)) {
unallocated_encoding(s);
return;
}
is_fp = true;
break;
default:
unallocated_encoding(s);
return;
}

if (is_fp) {
/* low bit of size indicates single/double */
size = extract32(size, 0, 1) ? 3 : 2;
if (size == 2) {
index = h << 1 | l;
} else {
if (l || !is_q) {
unallocated_encoding(s);
return;
}
index = h;
}
rm |= (m << 4);
} else {
switch (size) {
case 1:
index = h << 2 | l << 1 | m;
break;
case 2:
index = h << 1 | l;
rm |= (m << 4);
break;
default:
unallocated_encoding(s);
return;
}
}

if (is_long) {
unsupported_encoding(s, insn);
return;
}

if (is_fp) {
fpst = get_fpstatus_ptr();
} else {
TCGV_UNUSED_PTR(fpst);
}

if (size == 3) {
TCGv_i64 tcg_idx = tcg_temp_new_i64();
int pass;

assert(is_fp && is_q && !is_long);

read_vec_element(s, tcg_idx, rm, index, MO_64);

for (pass = 0; pass < 2; pass++) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
TCGv_i64 tcg_res = tcg_temp_new_i64();

read_vec_element(s, tcg_op, rn, pass, MO_64);

switch (opcode) {
case 0x5: /* FMLS */
/* As usual for ARM, separate negation for fused multiply-add */
gen_helper_vfp_negd(tcg_op, tcg_op);
/* fall through */
case 0x1: /* FMLA */
read_vec_element(s, tcg_res, rd, pass, MO_64);
gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
break;
case 0x9: /* FMUL, FMULX */
if (u) {
gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
} else {
gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
}
break;
default:
g_assert_not_reached();
}

write_vec_element(s, tcg_res, rd, pass, MO_64);
tcg_temp_free_i64(tcg_op);
tcg_temp_free_i64(tcg_res);
}

tcg_temp_free_i64(tcg_idx);
} else if (!is_long) {
/* 32 bit floating point, or 16 or 32 bit integer */
TCGv_i32 tcg_idx = tcg_temp_new_i32();
int pass;

read_vec_element_i32(s, tcg_idx, rm, index, size);

if (size == 1) {
/* The simplest way to handle the 16x16 indexed ops is to duplicate
* the index into both halves of the 32 bit tcg_idx and then use
* the usual Neon helpers.
*/
tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
}

for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
TCGv_i32 tcg_op = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();

read_vec_element_i32(s, tcg_op, rn, pass, MO_32);

switch (opcode) {
case 0x0: /* MLA */
case 0x4: /* MLS */
case 0x8: /* MUL */
{
static NeonGenTwoOpFn * const fns[2][2] = {
{ gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
{ tcg_gen_add_i32, tcg_gen_sub_i32 },
};
NeonGenTwoOpFn *genfn;
bool is_sub = opcode == 0x4;

if (size == 1) {
gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
} else {
tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
}
if (opcode == 0x8) {
break;
}
read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
genfn = fns[size - 1][is_sub];
genfn(tcg_res, tcg_op, tcg_res);
break;
}
case 0x5: /* FMLS */
/* As usual for ARM, separate negation for fused multiply-add */
gen_helper_vfp_negs(tcg_op, tcg_op);
/* fall through */
case 0x1: /* FMLA */
read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
break;
case 0x9: /* FMUL, FMULX */
if (u) {
gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
} else {
gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
}
break;
case 0xc: /* SQDMULH */
if (size == 1) {
gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
tcg_op, tcg_idx);
} else {
gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
tcg_op, tcg_idx);
}
break;
case 0xd: /* SQRDMULH */
if (size == 1) {
gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
tcg_op, tcg_idx);
} else {
gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
tcg_op, tcg_idx);
}
break;
default:
g_assert_not_reached();
}

write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
}

tcg_temp_free_i32(tcg_idx);

if (!is_q) {
clear_vec_high(s, rd);
}
} else {
/* long ops: 16x16->32 or 32x32->64 */
}

if (!TCGV_IS_UNUSED_PTR(fpst)) {
tcg_temp_free_ptr(fpst);
}
}

/* C3.6.19 Crypto AES
Expand Down

0 comments on commit f5e51e7

Please sign in to comment.