-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] movsd/movss/movd/movq - add support for constant comments #78601
Conversation
@llvm/pr-subscribers-lld @llvm/pr-subscribers-llvm-globalisel Author: Simon Pilgrim (RKSimon) ChangesIf we're loading a constant value, print the constant (and the zero upper elements) instead of just the shuffle mask This did require me to move the shuffle mask handling into addConstantComments as we can't handle this in the MC layer Patch is 668.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78601.diff 109 Files Affected:
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 20b37d5a99902b..619328af12719d 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1212,15 +1212,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVSDZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
- [[fallthrough]];
-
- case X86::MOVSDrm_alt:
- case X86::MOVSDrm:
- case X86::VMOVSDrm_alt:
- case X86::VMOVSDrm:
- case X86::VMOVSDZrm:
- case X86::VMOVSDZrm_alt:
- DecodeScalarMoveMask(2, nullptr == Src2Name, ShuffleMask);
+ DecodeScalarMoveMask(2, false, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1229,15 +1221,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVSSZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
- [[fallthrough]];
-
- case X86::MOVSSrm:
- case X86::MOVSSrm_alt:
- case X86::VMOVSSrm:
- case X86::VMOVSSrm_alt:
- case X86::VMOVSSZrm:
- case X86::VMOVSSZrm_alt:
- DecodeScalarMoveMask(4, nullptr == Src2Name, ShuffleMask);
+ DecodeScalarMoveMask(4, false, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1248,22 +1232,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVZPQILo2PQIrr:
case X86::VMOVZPQILo2PQIZrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
- [[fallthrough]];
-
- case X86::MOVQI2PQIrm:
- case X86::VMOVQI2PQIrm:
- case X86::VMOVQI2PQIZrm:
DecodeZeroMoveLowMask(2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::MOVDI2PDIrm:
- case X86::VMOVDI2PDIrm:
- case X86::VMOVDI2PDIZrm:
- DecodeZeroMoveLowMask(4, ShuffleMask);
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
-
case X86::EXTRQI:
if (MI->getOperand(2).isImm() &&
MI->getOperand(3).isImm())
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 2d5ccbfdfc765f..cc615a3fac34fd 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1806,6 +1806,96 @@ static void addConstantComments(const MachineInstr *MI,
break;
}
+ case X86::MOVSDrm:
+ case X86::MOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDZrm:
+ case X86::VMOVSSZrm:
+ case X86::MOVSDrm_alt:
+ case X86::MOVSSrm_alt:
+ case X86::VMOVSDrm_alt:
+ case X86::VMOVSSrm_alt:
+ case X86::VMOVSDZrm_alt:
+ case X86::VMOVSSZrm_alt:
+ case X86::MOVDI2PDIrm:
+ case X86::MOVQI2PQIrm:
+ case X86::VMOVDI2PDIrm:
+ case X86::VMOVQI2PQIrm:
+ case X86::VMOVDI2PDIZrm:
+ case X86::VMOVQI2PQIZrm: {
+ assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+ int SclWidth = 32;
+ int VecWidth = 128;
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Invalid opcode");
+ case X86::MOVSDrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVSDZrm:
+ case X86::MOVSDrm_alt:
+ case X86::VMOVSDrm_alt:
+ case X86::VMOVSDZrm_alt:
+ case X86::MOVQI2PQIrm:
+ case X86::VMOVQI2PQIrm:
+ case X86::VMOVQI2PQIZrm:
+ SclWidth = 64;
+ VecWidth = 128;
+ break;
+ case X86::MOVSSrm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSSZrm:
+ case X86::MOVSSrm_alt:
+ case X86::VMOVSSrm_alt:
+ case X86::VMOVSSZrm_alt:
+ case X86::MOVDI2PDIrm:
+ case X86::VMOVDI2PDIrm:
+ case X86::VMOVDI2PDIZrm:
+ SclWidth = 32;
+ VecWidth = 128;
+ break;
+ }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+
+ if (auto *C =
+ X86::getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
+ if (SclWidth == C->getType()->getScalarSizeInBits()) {
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
+ CS << "[";
+ printConstant(CI->getValue(), CS);
+ for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
+ CS << ",0";
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ break; // early-out
+ }
+ if (auto *CF = dyn_cast<ConstantFP>(C)) {
+ CS << "[";
+ printConstant(CF->getValue(), CS);
+ APFloat ZeroFP = APFloat::getZero(CF->getValue().getSemantics());
+ for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
+ CS << ",";
+ printConstant(ZeroFP, CS);
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ break; // early-out
+ }
+ }
+ }
+
+ // We didn't find a constant load, fallback to a shuffle mask decode.
+ CS << (SclWidth == 32 ? "mem[0],zero,zero,zero" : "mem[0],zero");
+ OutStreamer.AddComment(CS.str());
+ break;
+ }
+
#define MOV_CASE(Prefix, Suffix) \
case X86::Prefix##MOVAPD##Suffix##rm: \
case X86::Prefix##MOVAPS##Suffix##rm: \
diff --git a/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
index a2dd55767a7efa..6288f7e1d039c9 100644
--- a/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
+++ b/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -5,7 +5,7 @@
define inreg double @foo1() nounwind {
; CHECK-LABEL: foo1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; CHECK-NEXT: retl
ret double 1.0
}
@@ -13,7 +13,7 @@ define inreg double @foo1() nounwind {
define inreg float @foo2() nounwind {
; CHECK-LABEL: foo2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: retl
ret float 1.0
}
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
index 1e08c804af586e..a9b2037e9947a1 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
@@ -8,7 +8,7 @@
define void @test_float(ptr %a , float %b) {
; CHECK64_SMALL-LABEL: test_float:
; CHECK64_SMALL: # %bb.0: # %entry
-; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK64_SMALL-NEXT: addss %xmm0, %xmm1
; CHECK64_SMALL-NEXT: movd %xmm1, %eax
; CHECK64_SMALL-NEXT: movl %eax, (%rdi)
@@ -26,7 +26,7 @@ define void @test_float(ptr %a , float %b) {
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK32-NEXT: movd %ecx, %xmm1
; CHECK32-NEXT: addss %xmm0, %xmm1
; CHECK32-NEXT: movd %xmm1, %ecx
diff --git a/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll b/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
index 053ca11b95a50c..56b05418afa946 100644
--- a/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
+++ b/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
@@ -5,7 +5,7 @@ define i64 @test1() nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
-; CHECK-NEXT: vmovq {{.*#+}} xmm16 = mem[0],zero
+; CHECK-NEXT: vmovq 0, %xmm16
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovq %xmm16, %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index d933ffec623b94..1094edd19af438 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -207,28 +207,28 @@ define dso_local void @fadd_32g() nounwind {
;
; X86-SSE2-LABEL: fadd_32g:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-SSE2-NEXT: addss glob32, %xmm0
; X86-SSE2-NEXT: movss %xmm0, glob32
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: fadd_32g:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0
; X86-AVX-NEXT: vmovss %xmm0, glob32
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: fadd_32g:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-SSE-NEXT: addss glob32(%rip), %xmm0
; X64-SSE-NEXT: movss %xmm0, glob32(%rip)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_32g:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip)
; X64-AVX-NEXT: retq
@@ -319,14 +319,14 @@ define dso_local void @fadd_64g() nounwind {
;
; X64-SSE-LABEL: fadd_64g:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-SSE-NEXT: addsd glob64(%rip), %xmm0
; X64-SSE-NEXT: movsd %xmm0, glob64(%rip)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_64g:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip)
; X64-AVX-NEXT: retq
@@ -368,14 +368,14 @@ define dso_local void @fadd_32imm() nounwind {
;
; X86-SSE2-LABEL: fadd_32imm:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-SSE2-NEXT: addss -559038737, %xmm0
; X86-SSE2-NEXT: movss %xmm0, -559038737
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: fadd_32imm:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0
; X86-AVX-NEXT: vmovss %xmm0, -559038737
; X86-AVX-NEXT: retl
@@ -383,7 +383,7 @@ define dso_local void @fadd_32imm() nounwind {
; X64-SSE-LABEL: fadd_32imm:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-SSE-NEXT: addss (%rax), %xmm0
; X64-SSE-NEXT: movss %xmm0, (%rax)
; X64-SSE-NEXT: retq
@@ -391,7 +391,7 @@ define dso_local void @fadd_32imm() nounwind {
; X64-AVX-LABEL: fadd_32imm:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, (%rax)
; X64-AVX-NEXT: retq
@@ -483,7 +483,7 @@ define dso_local void @fadd_64imm() nounwind {
; X64-SSE-LABEL: fadd_64imm:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-SSE-NEXT: addsd (%rax), %xmm0
; X64-SSE-NEXT: movsd %xmm0, (%rax)
; X64-SSE-NEXT: retq
@@ -491,7 +491,7 @@ define dso_local void @fadd_64imm() nounwind {
; X64-AVX-LABEL: fadd_64imm:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
; X64-AVX-NEXT: retq
@@ -534,7 +534,7 @@ define dso_local void @fadd_32stack() nounwind {
; X86-SSE2-LABEL: fadd_32stack:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-SSE2-NEXT: addss (%esp), %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%esp)
; X86-SSE2-NEXT: popl %eax
@@ -543,7 +543,7 @@ define dso_local void @fadd_32stack() nounwind {
; X86-AVX-LABEL: fadd_32stack:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax
-; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
; X86-AVX-NEXT: popl %eax
@@ -551,14 +551,14 @@ define dso_local void @fadd_32stack() nounwind {
;
; X64-SSE-LABEL: fadd_32stack:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-SSE-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0
; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_32stack:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: retq
@@ -650,14 +650,14 @@ define dso_local void @fadd_64stack() nounwind {
;
; X64-SSE-LABEL: fadd_64stack:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-SSE-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0
; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_64stack:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll
index 919edb334b3671..0c3d9d6f7277c4 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp.ll
@@ -70,7 +70,7 @@ define float @test5(float %p) #0 {
; ALL-NEXT: retq
; ALL-NEXT: LBB3_1: ## %if.end
; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1
-; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; ALL-NEXT: vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
; ALL-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll
index def5ad51d732c2..c5a994e6846a45 100644
--- a/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -1150,19 +1150,12 @@ define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <1
; This test case used to crash due to combineFMA not bitcasting results of isFNEG.
define <4 x float> @foo() {
-; X86-LABEL: foo:
-; X86: # %bb.0: # %entry
-; X86-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: foo:
-; X64: # %bb.0: # %entry
-; X64-NEXT: vmovss (%rax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
-; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
+; CHECK-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
entry:
%0 = load <4 x float>, ptr undef, align 16
%sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 4a2dd7673f4e76..abfe3e6428e663 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -2175,7 +2175,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
; KNL-NEXT: movzwl %ax, %eax
; KNL-NEXT: vmovd %eax, %xmm1
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
-; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
; KNL-NEXT: vucomiss %xmm2, %xmm1
; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
@@ -2217,7 +2217,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
; SKX-NEXT: movzwl %ax, %eax
; SKX-NEXT: vmovd %eax, %xmm1
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SKX-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
; SKX-NEXT: vucomiss %xmm2, %xmm1
; SKX-NEXT: setb %al
; SKX-NEXT: kmovd %eax, %k0
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index a5a4bf1e53631e..6c9c28bc9e55e1 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -10104,10 +10104,10 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
-; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
-; X86-NEXT: ## xmm1 = mem[0],zero,zero,zero
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
+; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
; X86-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
@@ -10117,10 +10117,10 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
;
; X64-LABEL: fmadd_ss_mask_memfold:
; X64: ## %bb.0:
-; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
-; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
-; X64-NEXT: ## xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
+; X64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
; X64-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
; X64-NEXT: kmovw %edx, %k1 ## encoding: ...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesIf we're loading a constant value, print the constant (and the zero upper elements) instead of just the shuffle mask This did require me to move the shuffle mask handling into addConstantComments as we can't handle this in the MC layer Patch is 668.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78601.diff 109 Files Affected:
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 20b37d5a99902be..619328af12719d1 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -1212,15 +1212,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVSDZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
- [[fallthrough]];
-
- case X86::MOVSDrm_alt:
- case X86::MOVSDrm:
- case X86::VMOVSDrm_alt:
- case X86::VMOVSDrm:
- case X86::VMOVSDZrm:
- case X86::VMOVSDZrm_alt:
- DecodeScalarMoveMask(2, nullptr == Src2Name, ShuffleMask);
+ DecodeScalarMoveMask(2, false, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1229,15 +1221,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVSSZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
- [[fallthrough]];
-
- case X86::MOVSSrm:
- case X86::MOVSSrm_alt:
- case X86::VMOVSSrm:
- case X86::VMOVSSrm_alt:
- case X86::VMOVSSZrm:
- case X86::VMOVSSZrm_alt:
- DecodeScalarMoveMask(4, nullptr == Src2Name, ShuffleMask);
+ DecodeScalarMoveMask(4, false, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
@@ -1248,22 +1232,10 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVZPQILo2PQIrr:
case X86::VMOVZPQILo2PQIZrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
- [[fallthrough]];
-
- case X86::MOVQI2PQIrm:
- case X86::VMOVQI2PQIrm:
- case X86::VMOVQI2PQIZrm:
DecodeZeroMoveLowMask(2, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::MOVDI2PDIrm:
- case X86::VMOVDI2PDIrm:
- case X86::VMOVDI2PDIZrm:
- DecodeZeroMoveLowMask(4, ShuffleMask);
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
-
case X86::EXTRQI:
if (MI->getOperand(2).isImm() &&
MI->getOperand(3).isImm())
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 2d5ccbfdfc765fa..cc615a3fac34fd3 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1806,6 +1806,96 @@ static void addConstantComments(const MachineInstr *MI,
break;
}
+ case X86::MOVSDrm:
+ case X86::MOVSSrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSDZrm:
+ case X86::VMOVSSZrm:
+ case X86::MOVSDrm_alt:
+ case X86::MOVSSrm_alt:
+ case X86::VMOVSDrm_alt:
+ case X86::VMOVSSrm_alt:
+ case X86::VMOVSDZrm_alt:
+ case X86::VMOVSSZrm_alt:
+ case X86::MOVDI2PDIrm:
+ case X86::MOVQI2PQIrm:
+ case X86::VMOVDI2PDIrm:
+ case X86::VMOVQI2PQIrm:
+ case X86::VMOVDI2PDIZrm:
+ case X86::VMOVQI2PQIZrm: {
+ assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+ int SclWidth = 32;
+ int VecWidth = 128;
+
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Invalid opcode");
+ case X86::MOVSDrm:
+ case X86::VMOVSDrm:
+ case X86::VMOVSDZrm:
+ case X86::MOVSDrm_alt:
+ case X86::VMOVSDrm_alt:
+ case X86::VMOVSDZrm_alt:
+ case X86::MOVQI2PQIrm:
+ case X86::VMOVQI2PQIrm:
+ case X86::VMOVQI2PQIZrm:
+ SclWidth = 64;
+ VecWidth = 128;
+ break;
+ case X86::MOVSSrm:
+ case X86::VMOVSSrm:
+ case X86::VMOVSSZrm:
+ case X86::MOVSSrm_alt:
+ case X86::VMOVSSrm_alt:
+ case X86::VMOVSSZrm_alt:
+ case X86::MOVDI2PDIrm:
+ case X86::VMOVDI2PDIrm:
+ case X86::VMOVDI2PDIZrm:
+ SclWidth = 32;
+ VecWidth = 128;
+ break;
+ }
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ const MachineOperand &DstOp = MI->getOperand(0);
+ CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+
+ if (auto *C =
+ X86::getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
+ if (SclWidth == C->getType()->getScalarSizeInBits()) {
+ if (auto *CI = dyn_cast<ConstantInt>(C)) {
+ CS << "[";
+ printConstant(CI->getValue(), CS);
+ for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
+ CS << ",0";
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ break; // early-out
+ }
+ if (auto *CF = dyn_cast<ConstantFP>(C)) {
+ CS << "[";
+ printConstant(CF->getValue(), CS);
+ APFloat ZeroFP = APFloat::getZero(CF->getValue().getSemantics());
+ for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
+ CS << ",";
+ printConstant(ZeroFP, CS);
+ }
+ CS << "]";
+ OutStreamer.AddComment(CS.str());
+ break; // early-out
+ }
+ }
+ }
+
+ // We didn't find a constant load, fallback to a shuffle mask decode.
+ CS << (SclWidth == 32 ? "mem[0],zero,zero,zero" : "mem[0],zero");
+ OutStreamer.AddComment(CS.str());
+ break;
+ }
+
#define MOV_CASE(Prefix, Suffix) \
case X86::Prefix##MOVAPD##Suffix##rm: \
case X86::Prefix##MOVAPS##Suffix##rm: \
diff --git a/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
index a2dd55767a7efae..6288f7e1d039c9c 100644
--- a/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
+++ b/llvm/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -5,7 +5,7 @@
define inreg double @foo1() nounwind {
; CHECK-LABEL: foo1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; CHECK-NEXT: retl
ret double 1.0
}
@@ -13,7 +13,7 @@ define inreg double @foo1() nounwind {
define inreg float @foo2() nounwind {
; CHECK-LABEL: foo2:
; CHECK: # %bb.0:
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: retl
ret float 1.0
}
diff --git a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
index 1e08c804af586e3..a9b2037e9947a1b 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll
@@ -8,7 +8,7 @@
define void @test_float(ptr %a , float %b) {
; CHECK64_SMALL-LABEL: test_float:
; CHECK64_SMALL: # %bb.0: # %entry
-; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK64_SMALL-NEXT: addss %xmm0, %xmm1
; CHECK64_SMALL-NEXT: movd %xmm1, %eax
; CHECK64_SMALL-NEXT: movl %eax, (%rdi)
@@ -26,7 +26,7 @@ define void @test_float(ptr %a , float %b) {
; CHECK32: # %bb.0: # %entry
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT: movss {{.*#+}} xmm0 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK32-NEXT: movd %ecx, %xmm1
; CHECK32-NEXT: addss %xmm0, %xmm1
; CHECK32-NEXT: movd %xmm1, %ecx
diff --git a/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll b/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
index 053ca11b95a50c7..56b05418afa9464 100644
--- a/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
+++ b/llvm/test/CodeGen/X86/asm-reg-type-mismatch-avx512.ll
@@ -5,7 +5,7 @@ define i64 @test1() nounwind {
; CHECK-LABEL: test1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: #APP
-; CHECK-NEXT: vmovq {{.*#+}} xmm16 = mem[0],zero
+; CHECK-NEXT: vmovq 0, %xmm16
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vmovq %xmm16, %rax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/atomic-fp.ll b/llvm/test/CodeGen/X86/atomic-fp.ll
index d933ffec623b945..1094edd19af4380 100644
--- a/llvm/test/CodeGen/X86/atomic-fp.ll
+++ b/llvm/test/CodeGen/X86/atomic-fp.ll
@@ -207,28 +207,28 @@ define dso_local void @fadd_32g() nounwind {
;
; X86-SSE2-LABEL: fadd_32g:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-SSE2-NEXT: addss glob32, %xmm0
; X86-SSE2-NEXT: movss %xmm0, glob32
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: fadd_32g:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0
; X86-AVX-NEXT: vmovss %xmm0, glob32
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: fadd_32g:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-SSE-NEXT: addss glob32(%rip), %xmm0
; X64-SSE-NEXT: movss %xmm0, glob32(%rip)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_32g:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddss glob32(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, glob32(%rip)
; X64-AVX-NEXT: retq
@@ -319,14 +319,14 @@ define dso_local void @fadd_64g() nounwind {
;
; X64-SSE-LABEL: fadd_64g:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-SSE-NEXT: addsd glob64(%rip), %xmm0
; X64-SSE-NEXT: movsd %xmm0, glob64(%rip)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_64g:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddsd glob64(%rip), %xmm0, %xmm0
; X64-AVX-NEXT: vmovsd %xmm0, glob64(%rip)
; X64-AVX-NEXT: retq
@@ -368,14 +368,14 @@ define dso_local void @fadd_32imm() nounwind {
;
; X86-SSE2-LABEL: fadd_32imm:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-SSE2-NEXT: addss -559038737, %xmm0
; X86-SSE2-NEXT: movss %xmm0, -559038737
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: fadd_32imm:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0
; X86-AVX-NEXT: vmovss %xmm0, -559038737
; X86-AVX-NEXT: retl
@@ -383,7 +383,7 @@ define dso_local void @fadd_32imm() nounwind {
; X64-SSE-LABEL: fadd_32imm:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-SSE-NEXT: addss (%rax), %xmm0
; X64-SSE-NEXT: movss %xmm0, (%rax)
; X64-SSE-NEXT: retq
@@ -391,7 +391,7 @@ define dso_local void @fadd_32imm() nounwind {
; X64-AVX-LABEL: fadd_32imm:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, (%rax)
; X64-AVX-NEXT: retq
@@ -483,7 +483,7 @@ define dso_local void @fadd_64imm() nounwind {
; X64-SSE-LABEL: fadd_64imm:
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-SSE-NEXT: addsd (%rax), %xmm0
; X64-SSE-NEXT: movsd %xmm0, (%rax)
; X64-SSE-NEXT: retq
@@ -491,7 +491,7 @@ define dso_local void @fadd_64imm() nounwind {
; X64-AVX-LABEL: fadd_64imm:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
-; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
; X64-AVX-NEXT: retq
@@ -534,7 +534,7 @@ define dso_local void @fadd_32stack() nounwind {
; X86-SSE2-LABEL: fadd_32stack:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-SSE2-NEXT: addss (%esp), %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%esp)
; X86-SSE2-NEXT: popl %eax
@@ -543,7 +543,7 @@ define dso_local void @fadd_32stack() nounwind {
; X86-AVX-LABEL: fadd_32stack:
; X86-AVX: # %bb.0:
; X86-AVX-NEXT: pushl %eax
-; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
; X86-AVX-NEXT: popl %eax
@@ -551,14 +551,14 @@ define dso_local void @fadd_32stack() nounwind {
;
; X64-SSE-LABEL: fadd_32stack:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-SSE-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-SSE-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0
; X64-SSE-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_32stack:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: retq
@@ -650,14 +650,14 @@ define dso_local void @fadd_64stack() nounwind {
;
; X64-SSE-LABEL: fadd_64stack:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-SSE-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0
; X64-SSE-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; X64-SSE-NEXT: retq
;
; X64-AVX-LABEL: fadd_64stack:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
; X64-AVX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll
index 919edb334b3671b..0c3d9d6f7277c40 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp.ll
@@ -70,7 +70,7 @@ define float @test5(float %p) #0 {
; ALL-NEXT: retq
; ALL-NEXT: LBB3_1: ## %if.end
; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1
-; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ALL-NEXT: vmovss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; ALL-NEXT: vmovss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1}
; ALL-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll
index def5ad51d732c29..c5a994e6846a455 100644
--- a/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -1150,19 +1150,12 @@ define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <1
; This test case used to crash due to combineFMA not bitcasting results of isFNEG.
define <4 x float> @foo() {
-; X86-LABEL: foo:
-; X86: # %bb.0: # %entry
-; X86-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
-; X86-NEXT: # xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: foo:
-; X64: # %bb.0: # %entry
-; X64-NEXT: vmovss (%rax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
-; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
+; CHECK-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
entry:
%0 = load <4 x float>, ptr undef, align 16
%sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 4a2dd7673f4e767..abfe3e6428e6632 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -2175,7 +2175,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
; KNL-NEXT: movzwl %ax, %eax
; KNL-NEXT: vmovd %eax, %xmm1
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
-; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
; KNL-NEXT: vucomiss %xmm2, %xmm1
; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
@@ -2217,7 +2217,7 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
; SKX-NEXT: movzwl %ax, %eax
; SKX-NEXT: vmovd %eax, %xmm1
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SKX-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
; SKX-NEXT: vucomiss %xmm2, %xmm1
; SKX-NEXT: setb %al
; SKX-NEXT: kmovd %eax, %k0
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index a5a4bf1e53631ee..6c9c28bc9e55e11 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -10104,10 +10104,10 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
-; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
-; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero
-; X86-NEXT: vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
-; X86-NEXT: ## xmm1 = mem[0],zero,zero,zero
+; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
+; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
; X86-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
@@ -10117,10 +10117,10 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) {
;
; X64-LABEL: fmadd_ss_mask_memfold:
; X64: ## %bb.0:
-; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
-; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
-; X64-NEXT: ## xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
+; X64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
; X64-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
; X64-NEXT: kmovw %ed...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
9944860
to
c628cea
Compare
c628cea
to
b3a35cf
Compare
@@ -5,7 +5,7 @@ define i64 @test1() nounwind { | |||
; CHECK-LABEL: test1: | |||
; CHECK: # %bb.0: # %entry | |||
; CHECK-NEXT: #APP | |||
; CHECK-NEXT: vmovq {{.*#+}} xmm16 = mem[0],zero | |||
; CHECK-NEXT: vmovq 0, %xmm16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand the patch just changes the comments, why codegen affected here too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The instruction comes from inline assembly, when we had the comments in MC we could handle them, but not after moving to MI.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I misunderstood it. The 0
is memory address, I took it as immediate.
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
; CHECK-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The change seems unrelated, need to rebase?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its an ordering thing - moving to MI from MC means the constant/shuffle comment gets appended before the EVEX pass (see #78585 where I had to clean this up)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A few changes due to the MC-MI diff, there's no clean way to get the ConstantPool data in MC (we already have to do something similar for all the shuffles that load a mask)
@@ -5,7 +5,7 @@ define i64 @test1() nounwind { | |||
; CHECK-LABEL: test1: | |||
; CHECK: # %bb.0: # %entry | |||
; CHECK-NEXT: #APP | |||
; CHECK-NEXT: vmovq {{.*#+}} xmm16 = mem[0],zero | |||
; CHECK-NEXT: vmovq 0, %xmm16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The instruction comes from inline assembly, when we had the comments in MC we could handle them, but not after moving to MI.
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero | ||
; CHECK-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its an ordering thing - moving to MI from MC means the constant/shuffle comment gets appended before the EVEX pass (see #78585 where I had to clean this up)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
If we're loading a constant value, print the constant (and the zero upper elements) instead of just the shuffle mask This did require me to move the shuffle mask handling into addConstantComments as we can't handle this in the MC layer
b3a35cf
to
ecfbb33
Compare
If we're loading a constant value, print the constant (and the zero upper elements) instead of just the shuffle mask
This did require me to move the shuffle mask handling into addConstantComments as we can't handle this in the MC layer