Skip to content

Commit

Permalink
target/riscv/vector_helpers: do early exit when vstart >= vl
Browse files Browse the repository at this point in the history
We're going to make changes that will required each helper to be
responsible for the 'vstart' management, i.e. we will relieve the
'vstart < vl' assumption that helpers have today.

Helpers are usually able to deal with vstart >= vl, i.e. doing nothing
aside from setting vstart = 0 at the end, but the tail update functions
will update the tail regardless of vstart being valid or not. Unifying
the tail update process in a single function that would handle the
vstart >= vl case isn't trivial (see [1] for more info).

This patch takes a blunt approach: do an early exit in every single
vector helper if vstart >= vl, unless the helper is guarded with
vstart_eq_zero in the translation. For those cases the helper is ready
to deal with cases where vl might be zero, i.e. throwing exceptions
based on it like vcpop_m() and first_m().

Helpers that weren't changed:

- vcpop_m(), vfirst_m(), vmsetm(), GEN_VEXT_VIOTA_M(): these are guarded
  directly with vstart_eq_zero;

- GEN_VEXT_VCOMPRESS_VM(): guarded with vcompress_vm_check() that checks
  vstart_eq_zero;

- GEN_VEXT_RED(): guarded with either reduction_check() or
  reduction_widen_check(), both check vstart_eq_zero;

- GEN_VEXT_FRED(): guarded with either freduction_check() or
  freduction_widen_check(), both check vstart_eq_zero.

Another exception is vext_ldst_whole(), who operates on effective vector
length regardless of the current settings in vtype and vl.

[1] https://lore.kernel.org/qemu-riscv/1590234b-0291-432a-a0fa-c5a6876097bc@linux.alibaba.com/

Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Message-ID: <20240314175704.478276-7-dbarboza@ventanamicro.com>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
  • Loading branch information
danielhb authored and alistair23 committed Mar 22, 2024
1 parent 929e521 commit df4252b
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 0 deletions.
32 changes: 32 additions & 0 deletions target/riscv/vcrypto_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ static inline void xor_round_key(AESState *round_state, AESState *round_key)
uint32_t total_elems = vext_get_total_elems(env, desc, 4); \
uint32_t vta = vext_vta(desc); \
\
VSTART_CHECK_EARLY_EXIT(env); \
\
for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \
AESState round_key; \
round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \
Expand All @@ -246,6 +248,8 @@ static inline void xor_round_key(AESState *round_state, AESState *round_key)
uint32_t total_elems = vext_get_total_elems(env, desc, 4); \
uint32_t vta = vext_vta(desc); \
\
VSTART_CHECK_EARLY_EXIT(env); \
\
for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \
AESState round_key; \
round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \
Expand Down Expand Up @@ -305,6 +309,8 @@ void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
uint32_t total_elems = vext_get_total_elems(env, desc, 4);
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

uimm &= 0b1111;
if (uimm > 10 || uimm == 0) {
uimm ^= 0b1000;
Expand Down Expand Up @@ -351,6 +357,8 @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
uint32_t total_elems = vext_get_total_elems(env, desc, 4);
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

uimm &= 0b1111;
if (uimm > 14 || uimm < 2) {
uimm ^= 0b1000;
Expand Down Expand Up @@ -457,6 +465,8 @@ void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
uint32_t total_elems;
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
if (sew == MO_32) {
vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4,
Expand Down Expand Up @@ -572,6 +582,8 @@ void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
uint32_t total_elems;
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
((uint32_t *)vs1) + 4 * i + 2);
Expand All @@ -590,6 +602,8 @@ void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
uint32_t total_elems;
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
((uint64_t *)vs1) + 4 * i + 2);
Expand All @@ -608,6 +622,8 @@ void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
uint32_t total_elems;
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i,
(((uint32_t *)vs1) + 4 * i));
Expand All @@ -626,6 +642,8 @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env,
uint32_t total_elems;
uint32_t vta = vext_vta(desc);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i,
(((uint64_t *)vs1) + 4 * i));
Expand Down Expand Up @@ -658,6 +676,8 @@ void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
uint32_t *vs1 = vs1_vptr;
uint32_t *vs2 = vs2_vptr;

VSTART_CHECK_EARLY_EXIT(env);

for (int i = env->vstart / 8; i < env->vl / 8; i++) {
uint32_t w[24];
for (int j = 0; j < 8; j++) {
Expand Down Expand Up @@ -757,6 +777,8 @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm,
uint32_t *vs2 = vs2_vptr;
uint32_t v1[8], v2[8], v3[8];

VSTART_CHECK_EARLY_EXIT(env);

for (int i = env->vstart / 8; i < env->vl / 8; i++) {
for (int k = 0; k < 8; k++) {
v2[k] = bswap32(vd[H4(i * 8 + k)]);
Expand All @@ -780,6 +802,8 @@ void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr,
uint32_t vta = vext_vta(desc);
uint32_t total_elems = vext_get_total_elems(env, desc, 4);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]};
uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
Expand Down Expand Up @@ -817,6 +841,8 @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env,
uint32_t vta = vext_vta(desc);
uint32_t total_elems = vext_get_total_elems(env, desc, 4);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) {
uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])};
uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])};
Expand Down Expand Up @@ -853,6 +879,8 @@ void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env,
uint32_t esz = sizeof(uint32_t);
uint32_t total_elems = vext_get_total_elems(env, desc, esz);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = group_start; i < group_end; ++i) {
uint32_t vstart = i * egs;
uint32_t vend = (i + 1) * egs;
Expand Down Expand Up @@ -909,6 +937,8 @@ void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
uint32_t esz = sizeof(uint32_t);
uint32_t total_elems = vext_get_total_elems(env, desc, esz);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = group_start; i < group_end; ++i) {
uint32_t vstart = i * egs;
uint32_t vend = (i + 1) * egs;
Expand Down Expand Up @@ -943,6 +973,8 @@ void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
uint32_t esz = sizeof(uint32_t);
uint32_t total_elems = vext_get_total_elems(env, desc, esz);

VSTART_CHECK_EARLY_EXIT(env);

for (uint32_t i = group_start; i < group_end; ++i) {
uint32_t vstart = i * egs;
uint32_t vend = (i + 1) * egs;
Expand Down

0 comments on commit df4252b

Please sign in to comment.