Skip to content

Commit

Permalink
[X86] printZeroUpperMove - add support for constant vectors.
Browse files Browse the repository at this point in the history
Allows cases where movss/movsd etc. are loading constant (ConstantDataSequential) sub-vectors, ensuring we pad with the correct number of zero upper elements by making repeated printConstant calls to print zeroes in a matching int/fp format.
  • Loading branch information
RKSimon committed Jan 22, 2024
1 parent 4e64ed9 commit 74ab795
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 70 deletions.
61 changes: 26 additions & 35 deletions llvm/lib/Target/X86/X86MCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,50 +1484,55 @@ static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
return Comment;
}

static void printConstant(const APInt &Val, raw_ostream &CS) {
static void printConstant(const APInt &Val, raw_ostream &CS,
bool PrintZero = false) {
if (Val.getBitWidth() <= 64) {
CS << Val.getZExtValue();
CS << (PrintZero ? 0ULL : Val.getZExtValue());
} else {
// print multi-word constant as (w0,w1)
CS << "(";
for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
if (i > 0)
CS << ",";
CS << Val.getRawData()[i];
CS << (PrintZero ? 0ULL : Val.getRawData()[i]);
}
CS << ")";
}
}

static void printConstant(const APFloat &Flt, raw_ostream &CS) {
static void printConstant(const APFloat &Flt, raw_ostream &CS,
bool PrintZero = false) {
SmallString<32> Str;
// Force scientific notation to distinguish from integers.
Flt.toString(Str, 0, 0);
if (PrintZero)
APFloat::getZero(Flt.getSemantics()).toString(Str, 0, 0);
else
Flt.toString(Str, 0, 0);
CS << Str;
}

static void printConstant(const Constant *COp, unsigned BitWidth,
raw_ostream &CS) {
raw_ostream &CS, bool PrintZero = false) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
printConstant(CI->getValue(), CS);
printConstant(CI->getValue(), CS, PrintZero);
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
printConstant(CF->getValueAPF(), CS);
printConstant(CF->getValueAPF(), CS, PrintZero);
} else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) {
Type *EltTy = CDS->getElementType();
bool IsInteger = EltTy->isIntegerTy();
bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
unsigned EltBits = EltTy->getPrimitiveSizeInBits();
unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements());
assert((BitWidth % EltBits) == 0 && "Broadcast element size mismatch");
assert((BitWidth % EltBits) == 0 && "Element size mismatch");
for (unsigned I = 0; I != E; ++I) {
if (I != 0)
CS << ",";
if (IsInteger)
printConstant(CDS->getElementAsAPInt(I), CS);
printConstant(CDS->getElementAsAPInt(I), CS, PrintZero);
else if (IsFP)
printConstant(CDS->getElementAsAPFloat(I), CS);
printConstant(CDS->getElementAsAPFloat(I), CS, PrintZero);
else
CS << "?";
}
Expand All @@ -1545,31 +1550,17 @@ static void printZeroUpperMove(const MachineInstr *MI, MCStreamer &OutStreamer,
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";

if (auto *C = X86::getConstantFromPool(*MI, 1)) {
int CstEltSize = C->getType()->getScalarSizeInBits();
if (SclWidth == CstEltSize) {
if (auto *CI = dyn_cast<ConstantInt>(C)) {
CS << "[";
printConstant(CI->getValue(), CS);
for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
CS << ",0";
}
CS << "]";
OutStreamer.AddComment(CS.str());
return; // early-out
}

if (auto *CF = dyn_cast<ConstantFP>(C)) {
CS << "[";
printConstant(CF->getValue(), CS);
APFloat ZeroFP = APFloat::getZero(CF->getValue().getSemantics());
for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
CS << ",";
printConstant(ZeroFP, CS);
}
CS << "]";
OutStreamer.AddComment(CS.str());
return; // early-out
if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
isa<ConstantDataSequential>(C)) {
CS << "[";
printConstant(C, SclWidth, CS);
for (int I = 1, E = VecWidth / SclWidth; I < E; ++I) {
CS << ",";
printConstant(C, SclWidth, CS, true);
}
CS << "]";
OutStreamer.AddComment(CS.str());
return; // early-out
}
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/combine-concatvectors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ define void @concat_of_broadcast_v2f64_v4f64() {
; AVX1-NEXT: movq %rcx, 46348(%rax)
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm0 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
; AVX1-NEXT: vmovups %ymm0, 48296(%rax)
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [7.812501848093234E-3,0.0E+0]
; AVX1-NEXT: vmovsd %xmm0, 47372(%rax)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
Expand Down
64 changes: 32 additions & 32 deletions llvm/test/CodeGen/X86/nontemporal-4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ define void @test_constant_v4f32_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [2.0000004731118679E+0,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -124,7 +124,7 @@ define void @test_constant_v4i32_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [2.1219957909652723E-314,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -168,7 +168,7 @@ define void @test_constant_v8i16_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [4.1720559249406128E-309,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -212,7 +212,7 @@ define void @test_constant_v16i8_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [7.9499288951273625E-275,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -278,11 +278,11 @@ define void @test_constant_v8f32_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -346,7 +346,7 @@ define void @test_constant_v4i64_align1(ptr %dst) nounwind {
; SSE4A-NEXT: movntiq %rax, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -406,11 +406,11 @@ define void @test_constant_v8i32_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -470,11 +470,11 @@ define void @test_constant_v16i16_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -534,11 +534,11 @@ define void @test_constant_v32i8_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -803,19 +803,19 @@ define void @test_constant_v16f32_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-5.1200036668777466E+2,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.3107209417724609E+5,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-2.0971535092773438E+6,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -911,15 +911,15 @@ define void @test_constant_v8i64_align1(ptr %dst) nounwind {
; SSE4A-NEXT: movntiq %rax, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -1011,19 +1011,19 @@ define void @test_constant_v16i32_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -1115,19 +1115,19 @@ define void @test_constant_v32i16_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [NaN,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6853227412070812E+308,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.2358925997317751E+308,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT: retq
;
Expand Down Expand Up @@ -1219,19 +1219,19 @@ define void @test_constant_v64i8_align1(ptr %dst) nounwind {
; SSE4A: # %bb.0:
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 8(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.826728214441238E+279,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, (%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 24(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-1.6485712323024388E+202,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 16(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 40(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-7.1020783099933495E+124,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 32(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movntsd %xmm0, 56(%rdi)
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE4A-NEXT: movsd {{.*#+}} xmm0 = [-3.0595730451167367E+47,0.0E+0]
; SSE4A-NEXT: movntsd %xmm0, 48(%rdi)
; SSE4A-NEXT: retq
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pr13577.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
define float @pr26070() {
; CHECK-LABEL: pr26070:
; CHECK: ## %bb.0:
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: retq
%c = call float @copysignf(float 1.0, float undef) readnone
ret float %c
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/ret-mmx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ define <2 x i32> @t3() nounwind {
define double @t4() nounwind {
; CHECK-LABEL: t4:
; CHECK: ## %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1,0,0,0]
; CHECK-NEXT: retq
ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
}
Expand Down

0 comments on commit 74ab795

Please sign in to comment.