Skip to content

Commit

Permalink
target/ppc: Optimize emulation of vsl and vsr instructions
Browse files Browse the repository at this point in the history
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
  • Loading branch information
Stefan Brankovic authored and dgibson committed Aug 21, 2019
1 parent 1cc7926 commit 4e6d092
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 39 deletions.
2 changes: 0 additions & 2 deletions target/ppc/helper.h
Expand Up @@ -217,8 +217,6 @@ DEF_HELPER_3(vrlb, void, avr, avr, avr)
DEF_HELPER_3(vrlh, void, avr, avr, avr)
DEF_HELPER_3(vrlw, void, avr, avr, avr)
DEF_HELPER_3(vrld, void, avr, avr, avr)
DEF_HELPER_3(vsl, void, avr, avr, avr)
DEF_HELPER_3(vsr, void, avr, avr, avr)
DEF_HELPER_4(vsldoi, void, avr, avr, avr, i32)
DEF_HELPER_3(vextractub, void, avr, avr, i32)
DEF_HELPER_3(vextractuh, void, avr, avr, i32)
Expand Down
35 changes: 0 additions & 35 deletions target/ppc/int_helper.c
Expand Up @@ -1740,41 +1740,6 @@ VEXTU_X_DO(vextuhrx, 16, 0)
VEXTU_X_DO(vextuwrx, 32, 0)
#undef VEXTU_X_DO

/*
* The specification says that the results are undefined if all of the
* shift counts are not identical. We check to make sure that they
* are to conform to what real hardware appears to do.
*/
#define VSHIFT(suffix, leftp) \
void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
{ \
int shift = b->VsrB(15) & 0x7; \
int doit = 1; \
int i; \
\
for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
doit = doit && ((b->u8[i] & 0x7) == shift); \
} \
if (doit) { \
if (shift == 0) { \
*r = *a; \
} else if (leftp) { \
uint64_t carry = a->VsrD(1) >> (64 - shift); \
\
r->VsrD(0) = (a->VsrD(0) << shift) | carry; \
r->VsrD(1) = a->VsrD(1) << shift; \
} else { \
uint64_t carry = a->VsrD(0) << (64 - shift); \
\
r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \
r->VsrD(0) = a->VsrD(0) >> shift; \
} \
} \
}
VSHIFT(l, 1)
VSHIFT(r, 0)
#undef VSHIFT

void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
int i;
Expand Down
101 changes: 99 additions & 2 deletions target/ppc/translate/vmx-impl.inc.c
Expand Up @@ -570,6 +570,103 @@ static void trans_lvsr(DisasContext *ctx)
tcg_temp_free(EA);
}

/*
* vsl VRT,VRA,VRB - Vector Shift Left
*
* Shifting left 128 bit value of vA by value specified in bits 125-127 of vB.
* Lowest 3 bits in each byte element of register vB must be identical or
* result is undefined.
*/
static void trans_vsl(DisasContext *ctx)
{
int VT = rD(ctx->opcode);
int VA = rA(ctx->opcode);
int VB = rB(ctx->opcode);
TCGv_i64 avrA = tcg_temp_new_i64();
TCGv_i64 avrB = tcg_temp_new_i64();
TCGv_i64 sh = tcg_temp_new_i64();
TCGv_i64 shifted = tcg_temp_new_i64();
TCGv_i64 tmp = tcg_temp_new_i64();

/* Place bits 125-127 of vB in sh. */
get_avr64(avrB, VB, false);
tcg_gen_andi_i64(sh, avrB, 0x07ULL);

/*
* Save highest sh bits of lower doubleword element of vA in variable
* shifted and perform shift on lower doubleword.
*/
get_avr64(avrA, VA, false);
tcg_gen_subfi_i64(tmp, 64, sh);
tcg_gen_shr_i64(shifted, avrA, tmp);
tcg_gen_andi_i64(shifted, shifted, 0x7fULL);
tcg_gen_shl_i64(avrA, avrA, sh);
set_avr64(VT, avrA, false);

/*
* Perform shift on higher doubleword element of vA and replace lowest
* sh bits with shifted.
*/
get_avr64(avrA, VA, true);
tcg_gen_shl_i64(avrA, avrA, sh);
tcg_gen_or_i64(avrA, avrA, shifted);
set_avr64(VT, avrA, true);

tcg_temp_free_i64(avrA);
tcg_temp_free_i64(avrB);
tcg_temp_free_i64(sh);
tcg_temp_free_i64(shifted);
tcg_temp_free_i64(tmp);
}

/*
* vsr VRT,VRA,VRB - Vector Shift Right
*
* Shifting right 128 bit value of vA by value specified in bits 125-127 of vB.
* Lowest 3 bits in each byte element of register vB must be identical or
* result is undefined.
*/
static void trans_vsr(DisasContext *ctx)
{
int VT = rD(ctx->opcode);
int VA = rA(ctx->opcode);
int VB = rB(ctx->opcode);
TCGv_i64 avrA = tcg_temp_new_i64();
TCGv_i64 avrB = tcg_temp_new_i64();
TCGv_i64 sh = tcg_temp_new_i64();
TCGv_i64 shifted = tcg_temp_new_i64();
TCGv_i64 tmp = tcg_temp_new_i64();

/* Place bits 125-127 of vB in sh. */
get_avr64(avrB, VB, false);
tcg_gen_andi_i64(sh, avrB, 0x07ULL);

/*
* Save lowest sh bits of higher doubleword element of vA in variable
* shifted and perform shift on higher doubleword.
*/
get_avr64(avrA, VA, true);
tcg_gen_subfi_i64(tmp, 64, sh);
tcg_gen_shl_i64(shifted, avrA, tmp);
tcg_gen_andi_i64(shifted, shifted, 0xfe00000000000000ULL);
tcg_gen_shr_i64(avrA, avrA, sh);
set_avr64(VT, avrA, true);
/*
* Perform shift on lower doubleword element of vA and replace highest
* sh bits with shifted.
*/
get_avr64(avrA, VA, false);
tcg_gen_shr_i64(avrA, avrA, sh);
tcg_gen_or_i64(avrA, avrA, shifted);
set_avr64(VT, avrA, false);

tcg_temp_free_i64(avrA);
tcg_temp_free_i64(avrB);
tcg_temp_free_i64(sh);
tcg_temp_free_i64(shifted);
tcg_temp_free_i64(tmp);
}

GEN_VXFORM(vmuloub, 4, 0);
GEN_VXFORM(vmulouh, 4, 1);
GEN_VXFORM(vmulouw, 4, 2);
Expand Down Expand Up @@ -682,11 +779,11 @@ GEN_VXFORM(vrld, 2, 3);
GEN_VXFORM(vrldmi, 2, 3);
GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
vrldmi, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vsl, 2, 7);
GEN_VXFORM_TRANS(vsl, 2, 7);
GEN_VXFORM(vrldnm, 2, 7);
GEN_VXFORM_DUAL(vsl, PPC_ALTIVEC, PPC_NONE, \
vrldnm, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vsr, 2, 11);
GEN_VXFORM_TRANS(vsr, 2, 11);
GEN_VXFORM_ENV(vpkuhum, 7, 0);
GEN_VXFORM_ENV(vpkuwum, 7, 1);
GEN_VXFORM_ENV(vpkudum, 7, 17);
Expand Down

0 comments on commit 4e6d092

Please sign in to comment.