Skip to content

Commit

Permalink
target-ppc: Altivec 2.07: Quadword Addition and Subtracation
Browse files Browse the repository at this point in the history
This patch adds the Vector Quadword Addition and Subtraction instructions
introduced in Power ISA Version 2.07:

  - Vector Add Unsigned Quadword Modulo (vadduqm)
  - Vector Add & Write Carry Unsigned Quadword (vaddcuq)
  - Vector Add Extended Unsigned Quadword (vaddeuqm)
  - Vector Add Extended & Write Carry Unsigned Quadword (vaddecuq)
  - Vector Subtract Unsigned Quadword Modulo (vsubuqm)
  - Vector Subtract & Write Carry Unsigned Quadword (vsubcuq)
  - Vector Subtract Extended Unsigned Quadword (vsubeuqm)
  - Vector Subtract Extended & Write Carry Unsigned Quadword (vsubecuq)

Signed-off-by: Tom Musta <tommusta@gmail.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
  • Loading branch information
Tom Musta authored and agraf committed Mar 5, 2014
1 parent 2fdf78e commit b41da4e
Show file tree
Hide file tree
Showing 3 changed files with 211 additions and 0 deletions.
8 changes: 8 additions & 0 deletions target-ppc/helper.h
Expand Up @@ -211,6 +211,14 @@ DEF_HELPER_4(vadduws, void, env, avr, avr, avr)
DEF_HELPER_4(vsububs, void, env, avr, avr, avr)
DEF_HELPER_4(vsubuhs, void, env, avr, avr, avr)
DEF_HELPER_4(vsubuws, void, env, avr, avr, avr)
DEF_HELPER_3(vadduqm, void, avr, avr, avr)
DEF_HELPER_4(vaddecuq, void, avr, avr, avr, avr)
DEF_HELPER_4(vaddeuqm, void, avr, avr, avr, avr)
DEF_HELPER_3(vaddcuq, void, avr, avr, avr)
DEF_HELPER_3(vsubuqm, void, avr, avr, avr)
DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr)
DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr)
DEF_HELPER_3(vsubcuq, void, avr, avr, avr)
DEF_HELPER_3(vrlb, void, avr, avr, avr)
DEF_HELPER_3(vrlh, void, avr, avr, avr)
DEF_HELPER_3(vrlw, void, avr, avr, avr)
Expand Down
185 changes: 185 additions & 0 deletions target-ppc/int_helper.c
Expand Up @@ -1568,6 +1568,191 @@ VGENERIC_DO(popcntd, u64)

#undef VGENERIC_DO

#if defined(HOST_WORDS_BIGENDIAN)
#define QW_ONE { .u64 = { 0, 1 } }
#else
#define QW_ONE { .u64 = { 1, 0 } }
#endif

#ifndef CONFIG_INT128

static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
{
t->u64[0] = ~a.u64[0];
t->u64[1] = ~a.u64[1];
}

static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
{
if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
return -1;
} else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
return 1;
} else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
return -1;
} else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
return 1;
} else {
return 0;
}
}

static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
{
t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
(~a.u64[LO_IDX] < b.u64[LO_IDX]);
}

static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
{
ppc_avr_t not_a;
t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
(~a.u64[LO_IDX] < b.u64[LO_IDX]);
avr_qw_not(&not_a, a);
return avr_qw_cmpu(not_a, b) < 0;
}

#endif

void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
#ifdef CONFIG_INT128
r->u128 = a->u128 + b->u128;
#else
avr_qw_add(r, *a, *b);
#endif
}

void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
#ifdef CONFIG_INT128
r->u128 = a->u128 + b->u128 + (c->u128 & 1);
#else

if (c->u64[LO_IDX] & 1) {
ppc_avr_t tmp;

tmp.u64[HI_IDX] = 0;
tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
avr_qw_add(&tmp, *a, tmp);
avr_qw_add(r, tmp, *b);
} else {
avr_qw_add(r, *a, *b);
}
#endif
}

void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
#ifdef CONFIG_INT128
r->u128 = (~a->u128 < b->u128);
#else
ppc_avr_t not_a;

avr_qw_not(&not_a, *a);

r->u64[HI_IDX] = 0;
r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
#endif
}

void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
#ifdef CONFIG_INT128
int carry_out = (~a->u128 < b->u128);
if (!carry_out && (c->u128 & 1)) {
carry_out = ((a->u128 + b->u128 + 1) == 0) &&
((a->u128 != 0) || (b->u128 != 0));
}
r->u128 = carry_out;
#else

int carry_in = c->u64[LO_IDX] & 1;
int carry_out = 0;
ppc_avr_t tmp;

carry_out = avr_qw_addc(&tmp, *a, *b);

if (!carry_out && carry_in) {
ppc_avr_t one = QW_ONE;
carry_out = avr_qw_addc(&tmp, tmp, one);
}
r->u64[HI_IDX] = 0;
r->u64[LO_IDX] = carry_out;
#endif
}

void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
#ifdef CONFIG_INT128
r->u128 = a->u128 - b->u128;
#else
ppc_avr_t tmp;
ppc_avr_t one = QW_ONE;

avr_qw_not(&tmp, *b);
avr_qw_add(&tmp, *a, tmp);
avr_qw_add(r, tmp, one);
#endif
}

void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
#ifdef CONFIG_INT128
r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
#else
ppc_avr_t tmp, sum;

avr_qw_not(&tmp, *b);
avr_qw_add(&sum, *a, tmp);

tmp.u64[HI_IDX] = 0;
tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
avr_qw_add(r, sum, tmp);
#endif
}

void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
{
#ifdef CONFIG_INT128
r->u128 = (~a->u128 < ~b->u128) ||
(a->u128 + ~b->u128 == (__uint128_t)-1);
#else
int carry = (avr_qw_cmpu(*a, *b) > 0);
if (!carry) {
ppc_avr_t tmp;
avr_qw_not(&tmp, *b);
avr_qw_add(&tmp, *a, tmp);
carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
}
r->u64[HI_IDX] = 0;
r->u64[LO_IDX] = carry;
#endif
}

void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
{
#ifdef CONFIG_INT128
r->u128 =
(~a->u128 < ~b->u128) ||
((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
#else
int carry_in = c->u64[LO_IDX] & 1;
int carry_out = (avr_qw_cmpu(*a, *b) > 0);
if (!carry_out && carry_in) {
ppc_avr_t tmp;
avr_qw_not(&tmp, *b);
avr_qw_add(&tmp, *a, tmp);
carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
}

r->u64[HI_IDX] = 0;
r->u64[LO_IDX] = carry_out;
#endif
}


#undef VECTOR_FOR_INORDER_I
#undef HI_IDX
Expand Down
18 changes: 18 additions & 0 deletions target-ppc/translate.c
Expand Up @@ -7043,6 +7043,18 @@ GEN_VXFORM_ENV(vsubuws, 0, 26);
GEN_VXFORM_ENV(vsubsbs, 0, 28);
GEN_VXFORM_ENV(vsubshs, 0, 29);
GEN_VXFORM_ENV(vsubsws, 0, 30);
GEN_VXFORM(vadduqm, 0, 4);
GEN_VXFORM(vaddcuq, 0, 5);
GEN_VXFORM3(vaddeuqm, 30, 0);
GEN_VXFORM3(vaddecuq, 30, 0);
GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vsubuqm, 0, 20);
GEN_VXFORM(vsubcuq, 0, 21);
GEN_VXFORM3(vsubeuqm, 31, 0);
GEN_VXFORM3(vsubecuq, 31, 0);
GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vrlb, 2, 0);
GEN_VXFORM(vrlh, 2, 1);
GEN_VXFORM(vrlw, 2, 2);
Expand Down Expand Up @@ -10488,6 +10500,12 @@ GEN_VXFORM(vsubuws, 0, 26),
GEN_VXFORM(vsubsbs, 0, 28),
GEN_VXFORM(vsubshs, 0, 29),
GEN_VXFORM(vsubsws, 0, 30),
GEN_VXFORM_207(vadduqm, 0, 4),
GEN_VXFORM_207(vaddcuq, 0, 5),
GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM_207(vsubuqm, 0, 20),
GEN_VXFORM_207(vsubcuq, 0, 21),
GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
GEN_VXFORM(vrlb, 2, 0),
GEN_VXFORM(vrlh, 2, 1),
GEN_VXFORM(vrlw, 2, 2),
Expand Down

0 comments on commit b41da4e

Please sign in to comment.