Skip to content

Commit

Permalink
target-arm: add support for v8 VMULL.P64 instruction
Browse files Browse the repository at this point in the history
Add support for the VMULL.P64 polynomial 64x64 to 128 bit multiplication
instruction in the A32/T32 instruction sets; this is part of the v8
Crypto Extensions.

To do this we have to move the neon_pmull_64_{lo,hi} helpers from
helper-a64.c into neon_helper.c so they can be used by the AArch32
translator.

Inspired-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1401386724-26529-4-git-send-email-peter.maydell@linaro.org
  • Loading branch information
pm215 committed Jun 9, 2014
1 parent 526d009 commit 4e624ed
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 33 deletions.
1 change: 1 addition & 0 deletions linux-user/elfload.c
Expand Up @@ -468,6 +468,7 @@ static uint32_t get_elf_hwcap2(void)
uint32_t hwcaps = 0;

GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
Expand Down
1 change: 1 addition & 0 deletions target-arm/cpu.c
Expand Up @@ -319,6 +319,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
set_feature(env, ARM_FEATURE_V8_AES);
set_feature(env, ARM_FEATURE_V8_SHA1);
set_feature(env, ARM_FEATURE_V8_SHA256);
set_feature(env, ARM_FEATURE_V8_PMULL);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA);
Expand Down
1 change: 1 addition & 0 deletions target-arm/cpu.h
Expand Up @@ -637,6 +637,7 @@ enum arm_features {
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
};

static inline int arm_feature(CPUARMState *env, int feature)
Expand Down
30 changes: 0 additions & 30 deletions target-arm/helper-a64.c
Expand Up @@ -186,36 +186,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result;
}

/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;

for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;

/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}

/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
Expand Down
2 changes: 0 additions & 2 deletions target-arm/helper-a64.h
Expand Up @@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
Expand Down
3 changes: 3 additions & 0 deletions target-arm/helper.h
Expand Up @@ -525,6 +525,9 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)

DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)

#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
30 changes: 30 additions & 0 deletions target-arm/neon_helper.c
Expand Up @@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
env->vfp.regs[rm] = make_float64(m0);
env->vfp.regs[rd] = make_float64(d0);
}

/* Helper function for 64 bit polynomial multiply case:
* perform PolynomialMult(op1, op2) and return either the top or
* bottom half of the 128 bit result.
*/
uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;

for (bitnum = 0; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 << bitnum;
}
}
return res;
}
uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
{
int bitnum;
uint64_t res = 0;

/* bit 0 of op1 can't influence the high 64 bits at all */
for (bitnum = 1; bitnum < 64; bitnum++) {
if (op1 & (1ULL << bitnum)) {
res ^= op2 >> (64 - bitnum);
}
}
return res;
}
26 changes: 25 additions & 1 deletion target-arm/translate.c
Expand Up @@ -5977,7 +5977,7 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
{0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
{0, 0, 0, 1}, /* VQDMULL */
{0, 0, 0, 15}, /* Polynomial VMULL */
{0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};

Expand All @@ -5996,6 +5996,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
return 1;
}

/* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
* outside the loop below as it only performs a single pass.
*/
if (op == 14 && size == 2) {
TCGv_i64 tcg_rn, tcg_rm, tcg_rd;

if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
return 1;
}
tcg_rn = tcg_temp_new_i64();
tcg_rm = tcg_temp_new_i64();
tcg_rd = tcg_temp_new_i64();
neon_load_reg64(tcg_rn, rn);
neon_load_reg64(tcg_rm, rm);
gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
neon_store_reg64(tcg_rd, rd);
gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
neon_store_reg64(tcg_rd, rd + 1);
tcg_temp_free_i64(tcg_rn);
tcg_temp_free_i64(tcg_rm);
tcg_temp_free_i64(tcg_rd);
return 0;
}

/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */
Expand Down

0 comments on commit 4e624ed

Please sign in to comment.