Skip to content

Commit

Permalink
target/arm: Fix short-vector increment behaviour
Browse files Browse the repository at this point in the history
For VFP short vectors, the VFP registers are divided into a
series of banks: for single-precision these are s0-s7, s8-s15,
s16-s23 and s24-s31; for double-precision they are d0-d3,
d4-d7, ... d28-d31. Some banks are "scalar" meaning that
use of a register within them triggers a pure-scalar or
mixed vector-scalar operation rather than a full vector
operation. The scalar banks are s0-s7, d0-d3 and d16-d19.
When using a bank as part of a vector operation, we
iterate through it, increasing the register number by
the specified stride each time, and wrapping around to
the beginning of the bank.

Unfortunately our calculation of the "increment" part of this
was incorrect:
 vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask)
will only do the intended thing if bank_mask has exactly
one set high bit. For instance for doubles (bank_mask = 0xc),
if we start with vd = 6 and delta_d = 2 then vd is updated
to 12 rather than the intended 4.

This only causes problems in the unlikely case that the
starting register is not the first in its bank: if the
register number doesn't have to wrap around then the
expression happens to give the right answer.

Fix this bug by abstracting out the "check whether register
is in a scalar bank" and "advance register within bank"
operations to utility functions which use the right
bit masking operations.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
  • Loading branch information
pm215 committed Jun 13, 2019
1 parent 3111bfc commit 18cf951
Showing 1 changed file with 60 additions and 40 deletions.
100 changes: 60 additions & 40 deletions target/arm/translate-vfp.inc.c
Expand Up @@ -1139,6 +1139,42 @@ typedef void VFPGen3OpDPFn(TCGv_i64 vd,
typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm);
typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm);

/*
* Return true if the specified S reg is in a scalar bank
* (ie if it is s0..s7)
*/
static inline bool vfp_sreg_is_scalar(int reg)
{
return (reg & 0x18) == 0;
}

/*
* Return true if the specified D reg is in a scalar bank
* (ie if it is d0..d3 or d16..d19)
*/
static inline bool vfp_dreg_is_scalar(int reg)
{
return (reg & 0xc) == 0;
}

/*
* Advance the S reg number forwards by delta within its bank
* (ie increment the low 3 bits but leave the rest the same)
*/
static inline int vfp_advance_sreg(int reg, int delta)
{
return ((reg + delta) & 0x7) | (reg & ~0x7);
}

/*
* Advance the D reg number forwards by delta within its bank
* (ie increment the low 2 bits but leave the rest the same)
*/
static inline int vfp_advance_dreg(int reg, int delta)
{
return ((reg + delta) & 0x3) | (reg & ~0x3);
}

/*
* Perform a 3-operand VFP data processing instruction. fn is the
* callback to do the actual operation; this function deals with the
Expand All @@ -1149,7 +1185,6 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
{
uint32_t delta_m = 0;
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i32 f0, f1, fd;
TCGv_ptr fpst;
Expand All @@ -1164,16 +1199,14 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
}

if (veclen > 0) {
bank_mask = 0x18;

/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
if (vfp_sreg_is_scalar(vd)) {
/* scalar */
veclen = 0;
} else {
delta_d = s->vec_stride + 1;

if ((vm & bank_mask) == 0) {
if (vfp_sreg_is_scalar(vm)) {
/* mixed scalar/vector */
delta_m = 0;
} else {
Expand Down Expand Up @@ -1204,11 +1237,11 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,

/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
vd = vfp_advance_sreg(vd, delta_d);
vn = vfp_advance_sreg(vn, delta_d);
neon_load_reg32(f0, vn);
if (delta_m) {
vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
vm = vfp_advance_sreg(vm, delta_m);
neon_load_reg32(f1, vm);
}
}
Expand All @@ -1226,7 +1259,6 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
{
uint32_t delta_m = 0;
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i64 f0, f1, fd;
TCGv_ptr fpst;
Expand All @@ -1246,16 +1278,14 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
}

if (veclen > 0) {
bank_mask = 0xc;

/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
if (vfp_dreg_is_scalar(vd)) {
/* scalar */
veclen = 0;
} else {
delta_d = (s->vec_stride >> 1) + 1;

if ((vm & bank_mask) == 0) {
if (vfp_dreg_is_scalar(vm)) {
/* mixed scalar/vector */
delta_m = 0;
} else {
Expand Down Expand Up @@ -1285,11 +1315,11 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
}
/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vn = ((vn + delta_d) & (bank_mask - 1)) | (vn & bank_mask);
vd = vfp_advance_dreg(vd, delta_d);
vn = vfp_advance_dreg(vn, delta_d);
neon_load_reg64(f0, vn);
if (delta_m) {
vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
vm = vfp_advance_dreg(vm, delta_m);
neon_load_reg64(f1, vm);
}
}
Expand All @@ -1306,7 +1336,6 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
{
uint32_t delta_m = 0;
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i32 f0, fd;

Expand All @@ -1320,16 +1349,14 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
}

if (veclen > 0) {
bank_mask = 0x18;

/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
if (vfp_sreg_is_scalar(vd)) {
/* scalar */
veclen = 0;
} else {
delta_d = s->vec_stride + 1;

if ((vm & bank_mask) == 0) {
if (vfp_sreg_is_scalar(vm)) {
/* mixed scalar/vector */
delta_m = 0;
} else {
Expand All @@ -1355,16 +1382,16 @@ static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
if (delta_m == 0) {
/* single source one-many */
while (veclen--) {
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vd = vfp_advance_sreg(vd, delta_d);
neon_store_reg32(fd, vd);
}
break;
}

/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
vd = vfp_advance_sreg(vd, delta_d);
vm = vfp_advance_sreg(vm, delta_m);
neon_load_reg32(f0, vm);
}

Expand All @@ -1378,7 +1405,6 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
{
uint32_t delta_m = 0;
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i64 f0, fd;

Expand All @@ -1397,16 +1423,14 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
}

if (veclen > 0) {
bank_mask = 0xc;

/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
if (vfp_dreg_is_scalar(vd)) {
/* scalar */
veclen = 0;
} else {
delta_d = (s->vec_stride >> 1) + 1;

if ((vm & bank_mask) == 0) {
if (vfp_dreg_is_scalar(vm)) {
/* mixed scalar/vector */
delta_m = 0;
} else {
Expand All @@ -1432,16 +1456,16 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
if (delta_m == 0) {
/* single source one-many */
while (veclen--) {
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vd = vfp_advance_dreg(vd, delta_d);
neon_store_reg64(fd, vd);
}
break;
}

/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
vd = vfp_advance_dreg(vd, delta_d);
vd = vfp_advance_dreg(vm, delta_m);
neon_load_reg64(f0, vm);
}

Expand Down Expand Up @@ -1783,7 +1807,6 @@ static bool trans_VFM_dp(DisasContext *s, arg_VFM_sp *a)
static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
{
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i32 fd;
uint32_t n, i, vd;
Expand All @@ -1804,9 +1827,8 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
}

if (veclen > 0) {
bank_mask = 0x18;
/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
if (vfp_sreg_is_scalar(vd)) {
/* scalar */
veclen = 0;
} else {
Expand Down Expand Up @@ -1835,7 +1857,7 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)

/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vd = vfp_advance_sreg(vd, delta_d);
}

tcg_temp_free_i32(fd);
Expand All @@ -1845,7 +1867,6 @@ static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a)
static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
{
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i64 fd;
uint32_t n, i, vd;
Expand All @@ -1871,9 +1892,8 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
}

if (veclen > 0) {
bank_mask = 0xc;
/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
if (vfp_dreg_is_scalar(vd)) {
/* scalar */
veclen = 0;
} else {
Expand Down Expand Up @@ -1902,7 +1922,7 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)

/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vfp_advance_dreg(vd, delta_d);
}

tcg_temp_free_i64(fd);
Expand Down

0 comments on commit 18cf951

Please sign in to comment.