Skip to content
Closed
20 changes: 0 additions & 20 deletions src/hotspot/cpu/aarch64/register_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,3 @@ const char* PRegister::PRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "pnoreg";
}

// convenience methods for splitting 8-way vector register sequences
// in half -- needed because vector operations can normally only be
// benefit from 4-way instruction parallelism

VSeq<4> vs_front(const VSeq<8>& v) {
return VSeq<4>(v.base(), v.delta());
}

VSeq<4> vs_back(const VSeq<8>& v) {
return VSeq<4>(v.base() + 4 * v.delta(), v.delta());
}

VSeq<4> vs_even(const VSeq<8>& v) {
return VSeq<4>(v.base(), v.delta() * 2);
}

VSeq<4> vs_odd(const VSeq<8>& v) {
return VSeq<4>(v.base() + 1, v.delta() * 2);
}
93 changes: 78 additions & 15 deletions src/hotspot/cpu/aarch64/register_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,19 +436,20 @@ enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
// inputs into front and back halves or odd and even halves (see
// convenience methods below).

// helper macro for computing register masks
#define VS_MASK_BIT(base, delta, i) (1 << (base + delta * i))

template<int N> class VSeq {
static_assert(N >= 2, "vector sequence length must be greater than 1");
static_assert(N <= 8, "vector sequence length must not exceed 8");
static_assert((N & (N - 1)) == 0, "vector sequence length must be power of two");
private:
int _base; // index of first register in sequence
int _delta; // increment to derive successive indices
public:
VSeq(FloatRegister base_reg, int delta = 1) : VSeq(base_reg->encoding(), delta) { }
VSeq(int base, int delta = 1) : _base(base), _delta(delta) {
assert (_base >= 0, "invalid base register");
assert (_delta >= 0, "invalid register delta");
assert ((_base + (N - 1) * _delta) < 32, "range exceeded");
assert (_base >= 0 && _base <= 31, "invalid base register");
assert ((_base + (N - 1) * _delta) >= 0, "register range underflow");
assert ((_base + (N - 1) * _delta) < 32, "register range overflow");
}
// indexed access to sequence
FloatRegister operator [](int i) const {
Expand All @@ -457,27 +458,89 @@ template<int N> class VSeq {
}
int mask() const {
int m = 0;
int bit = 1 << _base;
for (int i = 0; i < N; i++) {
m |= bit << (i * _delta);
m |= VS_MASK_BIT(_base, _delta, i);
}
return m;
}
int base() const { return _base; }
int delta() const { return _delta; }
bool is_constant() const { return _delta == 0; }
};

// declare convenience methods for splitting vector register sequences

VSeq<4> vs_front(const VSeq<8>& v);
VSeq<4> vs_back(const VSeq<8>& v);
VSeq<4> vs_even(const VSeq<8>& v);
VSeq<4> vs_odd(const VSeq<8>& v);

// methods for use in asserts to check VSeq inputs and oupts are
// methods for use in asserts to check VSeq inputs and outputs are
// either disjoint or equal

template<int N, int M> bool vs_disjoint(const VSeq<N>& n, const VSeq<M>& m) { return (n.mask() & m.mask()) == 0; }
template<int N> bool vs_same(const VSeq<N>& n, const VSeq<N>& m) { return n.mask() == m.mask(); }

// method for use in asserts to check whether registers appearing in
// an output sequence will be written before they are read from an
// input sequence.

template<int N> bool vs_write_before_read(const VSeq<N>& vout, const VSeq<N>& vin) {
int b_in = vin.base();
int d_in = vin.delta();
int b_out = vout.base();
int d_out = vout.delta();
int bit_in = 1 << b_in;
int bit_out = 1 << b_out;
int mask_read = vin.mask(); // all pending reads
int mask_write = 0; // no writes as yet


for (int i = 0; i < N - 1; i++) {
// check whether a pending read clashes with a write
if ((mask_write & mask_read) != 0) {
return true;
}
// remove the pending input (so long as this is a constant
// sequence)
if (d_in != 0) {
mask_read ^= VS_MASK_BIT(b_in, d_in, i);
}
// record the next write
mask_write |= VS_MASK_BIT(b_out, d_out, i);
}
// no write before read
return false;
}

// convenience methods for splitting 8-way of 4-way vector register
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// convenience methods for splitting 8-way of 4-way vector register
// convenience methods for splitting 8-way or 4-way vector register

// sequences in half -- needed because vector operations can normally
// benefit from 4-way instruction parallelism or, occasionally, 2-way
// parallelism

template<int N>
VSeq<N/2> vs_front(const VSeq<N>& v) {
static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
return VSeq<N/2>(v.base(), v.delta());
}

template<int N>
VSeq<N/2> vs_back(const VSeq<N>& v) {
static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
return VSeq<N/2>(v.base() + N / 2 * v.delta(), v.delta());
}

template<int N>
VSeq<N/2> vs_even(const VSeq<N>& v) {
static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
return VSeq<N/2>(v.base(), v.delta() * 2);
}

template<int N>
VSeq<N/2> vs_odd(const VSeq<N>& v) {
static_assert(N > 0 && ((N & 1) == 0), "sequence length must be even");
return VSeq<N/2>(v.base() + v.delta(), v.delta() * 2);
}

// convenience method to construct a vector register sequence that
// indexes its elements in reverse order to the original

template<int N>
VSeq<N> vs_reverse(const VSeq<N>& v) {
return VSeq<N>(v.base() + (N - 1) * v.delta(), -v.delta());
}

#endif // CPU_AARCH64_REGISTER_AARCH64_HPP
Loading