Skip to content
Permalink
Browse files

8241597: x86: Remove MMX support

Reviewed-by: kvn, redestad
  • Loading branch information
Vladimir Ivanov
Vladimir Ivanov committed Mar 27, 2020
1 parent d0a6722 commit 2df18266b9a15d95fc9289a79add8eb6721a6381
@@ -1163,19 +1163,6 @@ void Assembler::emit_operand(XMMRegister reg, Address adr) {
}
}

// MMX operations
void Assembler::emit_operand(MMXRegister reg, Address adr) {
assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
}

// work around gcc (3.2.1-7a) bug
void Assembler::emit_operand(Address adr, MMXRegister reg) {
assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
}


// Now the Assembler instructions (identical for 32/64 bits)

void Assembler::adcl(Address dst, int32_t imm32) {
@@ -2003,11 +1990,6 @@ void Assembler::divss(XMMRegister dst, XMMRegister src) {
emit_int16(0x5E, (0xC0 | encode));
}

void Assembler::emms() {
NOT_LP64(assert(VM_Version::supports_mmx(), ""));
emit_int16(0x0F, 0x77);
}

void Assembler::hlt() {
emit_int8((unsigned char)0xF4);
}
@@ -2771,24 +2753,6 @@ void Assembler::movlpd(XMMRegister dst, Address src) {
emit_operand(dst, src);
}

void Assembler::movq( MMXRegister dst, Address src ) {
assert( VM_Version::supports_mmx(), "" );
emit_int16(0x0F, 0x6F);
emit_operand(dst, src);
}

void Assembler::movq( Address dst, MMXRegister src ) {
assert( VM_Version::supports_mmx(), "" );
emit_int16(0x0F, 0x7F);
// workaround gcc (3.2.1-7a) bug
// In that version of gcc with only an emit_operand(MMX, Address)
// gcc will tail jump and try and reverse the parameters completely
// obliterating dst in the process. By having a version available
// that doesn't need to swap the args at the tail jump the bug is
// avoided.
emit_operand(dst, src);
}

void Assembler::movq(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionMark im(this);
@@ -6861,6 +6825,11 @@ void Assembler::vzeroupper_uncached() {
#ifndef _LP64
// 32bit only pieces of the assembler

void Assembler::emms() {
NOT_LP64(assert(VM_Version::supports_mmx(), ""));
emit_int16(0x0F, 0x77);
}

void Assembler::vzeroupper() {
vzeroupper_uncached();
}
@@ -766,11 +766,6 @@ class Assembler : public AbstractAssembler {

void emit_operand(XMMRegister reg, Address adr);

void emit_operand(MMXRegister reg, Address adr);

// workaround gcc (3.2.1-7) bug
void emit_operand(Address adr, MMXRegister reg);

// Immediate-to-memory forms
void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);

@@ -1168,7 +1163,6 @@ class Assembler : public AbstractAssembler {
void divss(XMMRegister dst, Address src);
void divss(XMMRegister dst, XMMRegister src);

void emms();

#ifndef _LP64
private:
@@ -1178,6 +1172,8 @@ class Assembler : public AbstractAssembler {
void emit_farith(int b1, int b2, int i);

public:
void emms();

void fabs();

void fadd(int i);
@@ -1543,12 +1539,7 @@ class Assembler : public AbstractAssembler {
void movq(Register dst, Register src);
void movq(Register dst, Address src);
void movq(Address dst, Register src);
#endif

void movq(Address dst, MMXRegister src );
void movq(MMXRegister dst, Address src );

#ifdef _LP64
// These dummies prevent using movq from converting a zero (like NULL) into Register
// by giving the compiler two choices it can't resolve

@@ -129,16 +129,6 @@ REGISTER_DEFINITION(Register, r12_heapbase);
REGISTER_DEFINITION(Register, r15_thread);
#endif // AMD64

REGISTER_DEFINITION(MMXRegister, mnoreg );
REGISTER_DEFINITION(MMXRegister, mmx0 );
REGISTER_DEFINITION(MMXRegister, mmx1 );
REGISTER_DEFINITION(MMXRegister, mmx2 );
REGISTER_DEFINITION(MMXRegister, mmx3 );
REGISTER_DEFINITION(MMXRegister, mmx4 );
REGISTER_DEFINITION(MMXRegister, mmx5 );
REGISTER_DEFINITION(MMXRegister, mmx6 );
REGISTER_DEFINITION(MMXRegister, mmx7 );

REGISTER_DEFINITION(KRegister, knoreg);
REGISTER_DEFINITION(KRegister, k0);
REGISTER_DEFINITION(KRegister, k1);
@@ -130,17 +130,10 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1));
class XMMRegisterImpl;
typedef XMMRegisterImpl* XMMRegister;

// Use MMXRegister as shortcut
class MMXRegisterImpl;
typedef MMXRegisterImpl* MMXRegister;

inline XMMRegister as_XMMRegister(int encoding) {
return (XMMRegister)(intptr_t)encoding;
}

inline MMXRegister as_MMXRegister(int encoding) {
return (MMXRegister)(intptr_t)encoding;
}

// The implementation of XMM registers for the IA32 architecture
class XMMRegisterImpl: public AbstractRegisterImpl {
@@ -212,17 +205,6 @@ CONSTANT_REGISTER_DECLARATION(XMMRegister, xmm31, (31));
// can't be described in oopMaps and therefore can't be used by the compilers (at least
// were deopt might wan't to see them).

// The MMX registers, for P3 and up chips
CONSTANT_REGISTER_DECLARATION(MMXRegister, mnoreg , (-1));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx0 , ( 0));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx1 , ( 1));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx2 , ( 2));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx3 , ( 3));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx4 , ( 4));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx5 , ( 5));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx6 , ( 6));
CONSTANT_REGISTER_DECLARATION(MMXRegister, mmx7 , ( 7));

// Use XMMRegister as shortcut
class KRegisterImpl;
typedef KRegisterImpl* KRegister;
@@ -795,55 +795,6 @@ class StubGenerator: public StubCodeGenerator {
__ BIND(L_exit);
}

// Copy 64 bytes chunks
//
// Inputs:
// from - source array address
// to_from - destination array address - from
// qword_count - 8-bytes element count, negative
//
void mmx_copy_forward(Register from, Register to_from, Register qword_count) {
assert( VM_Version::supports_mmx(), "supported cpu only" );
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
// Copy 64-byte chunks
__ jmpb(L_copy_64_bytes);
__ align(OptoLoopAlignment);
__ BIND(L_copy_64_bytes_loop);
__ movq(mmx0, Address(from, 0));
__ movq(mmx1, Address(from, 8));
__ movq(mmx2, Address(from, 16));
__ movq(Address(from, to_from, Address::times_1, 0), mmx0);
__ movq(mmx3, Address(from, 24));
__ movq(Address(from, to_from, Address::times_1, 8), mmx1);
__ movq(mmx4, Address(from, 32));
__ movq(Address(from, to_from, Address::times_1, 16), mmx2);
__ movq(mmx5, Address(from, 40));
__ movq(Address(from, to_from, Address::times_1, 24), mmx3);
__ movq(mmx6, Address(from, 48));
__ movq(Address(from, to_from, Address::times_1, 32), mmx4);
__ movq(mmx7, Address(from, 56));
__ movq(Address(from, to_from, Address::times_1, 40), mmx5);
__ movq(Address(from, to_from, Address::times_1, 48), mmx6);
__ movq(Address(from, to_from, Address::times_1, 56), mmx7);
__ addptr(from, 64);
__ BIND(L_copy_64_bytes);
__ subl(qword_count, 8);
__ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
__ addl(qword_count, 8);
__ jccb(Assembler::zero, L_exit);
//
// length is too short, just copy qwords
//
__ BIND(L_copy_8_bytes);
__ movq(mmx0, Address(from, 0));
__ movq(Address(from, to_from, Address::times_1), mmx0);
__ addptr(from, 8);
__ decrement(qword_count);
__ jcc(Assembler::greater, L_copy_8_bytes);
__ BIND(L_exit);
__ emms();
}

address generate_disjoint_copy(BasicType t, bool aligned,
Address::ScaleFactor sf,
address* entry, const char *name,
@@ -918,7 +869,7 @@ class StubGenerator: public StubCodeGenerator {
__ subl(count, 1<<(shift-1));
__ BIND(L_skip_align2);
}
if (!VM_Version::supports_mmx()) {
if (!UseXMMForArrayCopy) {
__ mov(rax, count); // save 'count'
__ shrl(count, shift); // bytes count
__ addptr(to_from, from);// restore 'to'
@@ -935,18 +886,14 @@ class StubGenerator: public StubCodeGenerator {
__ movl(Address(from, to_from, Address::times_1, 0), rax);
__ addptr(from, 4);
__ subl(count, 1<<shift);
}
}
__ BIND(L_copy_64_bytes);
__ mov(rax, count);
__ shrl(rax, shift+1); // 8 bytes chunk count
//
// Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
// Copy 8-byte chunks through XMM registers, 8 per iteration of the loop
//
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, rax);
} else {
mmx_copy_forward(from, to_from, rax);
}
xmm_copy_forward(from, to_from, rax);
}
// copy tailing dword
__ BIND(L_copy_4_bytes);
@@ -979,9 +926,6 @@ class StubGenerator: public StubCodeGenerator {
}
}

if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms();
}
__ movl(count, Address(rsp, 12+12)); // reread 'count'
bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);

@@ -1117,7 +1061,7 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::below, L_copy_4_bytes);
}

if (!VM_Version::supports_mmx()) {
if (!UseXMMForArrayCopy) {
__ std();
__ mov(rax, count); // Save 'count'
__ mov(rdx, to); // Save 'to'
@@ -1143,20 +1087,12 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
// Move 8 bytes
__ BIND(L_copy_8_bytes_loop);
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), xmm0);
} else {
__ movq(mmx0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), mmx0);
}
__ movq(xmm0, Address(from, count, sf, 0));
__ movq(Address(to, count, sf, 0), xmm0);
__ BIND(L_copy_8_bytes);
__ subl(count, 2<<shift);
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
__ addl(count, 2<<shift);
if (!UseXMMForArrayCopy) {
__ emms();
}
}
__ BIND(L_copy_4_bytes);
// copy prefix qword
@@ -1190,9 +1126,6 @@ class StubGenerator: public StubCodeGenerator {
}
}

if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms();
}
__ movl2ptr(count, Address(rsp, 12+12)); // reread count
bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);

@@ -1232,12 +1165,8 @@ class StubGenerator: public StubCodeGenerator {
// UnsafeCopyMemory page error: continue after ucm
UnsafeCopyMemoryMark ucmm(this, true, true);
__ subptr(to, from); // to --> to_from
if (VM_Version::supports_mmx()) {
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, count);
} else {
mmx_copy_forward(from, to_from, count);
}
if (UseXMMForArrayCopy) {
xmm_copy_forward(from, to_from, count);
} else {
__ jmpb(L_copy_8_bytes);
__ align(OptoLoopAlignment);
@@ -1250,9 +1179,6 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
}
}
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms();
}
inc_copy_counter_np(T_LONG);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ vzeroupper();
@@ -1298,14 +1224,9 @@ class StubGenerator: public StubCodeGenerator {

__ align(OptoLoopAlignment);
__ BIND(L_copy_8_bytes_loop);
if (VM_Version::supports_mmx()) {
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), xmm0);
} else {
__ movq(mmx0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), mmx0);
}
if (UseXMMForArrayCopy) {
__ movq(xmm0, Address(from, count, Address::times_8));
__ movq(Address(to, count, Address::times_8), xmm0);
} else {
__ fild_d(Address(from, count, Address::times_8));
__ fistp_d(Address(to, count, Address::times_8));
@@ -1315,9 +1236,6 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);

}
if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
__ emms();
}
inc_copy_counter_np(T_LONG);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ xorptr(rax, rax); // return 0
@@ -734,7 +734,7 @@ void VM_Version::get_processor_features() {
char buf[512];
int res = jio_snprintf(buf, sizeof(buf),
"(%u cores per cpu, %u threads per core) family %d model %d stepping %d"
"%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s",
"%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s",

cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping,
@@ -758,10 +758,9 @@ void VM_Version::get_processor_features() {
(supports_clmul() ? ", clmul" : ""),
(supports_erms() ? ", erms" : ""),
(supports_rtm() ? ", rtm" : ""),
(supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),

(supports_lzcnt() ? ", lzcnt": ""),

(supports_sse4a() ? ", sse4a": ""),
(supports_ht() ? ", ht": ""),
(supports_tsc() ? ", tsc": ""),
@@ -771,8 +770,8 @@ void VM_Version::get_processor_features() {
(supports_bmi2() ? ", bmi2" : ""),
(supports_adx() ? ", adx" : ""),
(supports_evex() ? ", avx512f" : ""),

(supports_avx512dq() ? ", avx512dq" : ""),

(supports_avx512pf() ? ", avx512pf" : ""),
(supports_avx512er() ? ", avx512er" : ""),
(supports_avx512cd() ? ", avx512cd" : ""),
@@ -782,8 +781,8 @@ void VM_Version::get_processor_features() {
(supports_avx512_vpclmulqdq() ? ", avx512_vpclmulqdq" : ""),
(supports_avx512_vbmi() ? ", avx512_vbmi" : ""),
(supports_avx512_vbmi2() ? ", avx512_vbmi2" : ""),

(supports_avx512_vaes() ? ", avx512_vaes" : ""),

(supports_avx512_vnni() ? ", avx512_vnni" : ""),
(supports_sha() ? ", sha" : ""),
(supports_fma() ? ", fma" : ""),

0 comments on commit 2df1826

Please sign in to comment.