51 changes: 16 additions & 35 deletions lldb/unittests/Core/RichManglingContextTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,12 @@ using namespace lldb_private;
TEST(RichManglingContextTest, Basic) {
RichManglingContext RMC;
ConstString mangled("_ZN3foo3barEv");
EXPECT_TRUE(RMC.FromItaniumName(mangled));

EXPECT_TRUE(RMC.FromItaniumName(mangled));
EXPECT_FALSE(RMC.IsCtorOrDtor());

RMC.ParseFunctionDeclContextName();
EXPECT_EQ("foo", RMC.GetBufferRef());

RMC.ParseFunctionBaseName();
EXPECT_EQ("bar", RMC.GetBufferRef());

RMC.ParseFullName();
EXPECT_EQ("foo::bar()", RMC.GetBufferRef());
EXPECT_EQ("foo", RMC.ParseFunctionDeclContextName());
EXPECT_EQ("bar", RMC.ParseFunctionBaseName());
EXPECT_EQ("foo::bar()", RMC.ParseFullName());
}

TEST(RichManglingContextTest, FromCxxMethodName) {
Expand All @@ -41,19 +35,12 @@ TEST(RichManglingContextTest, FromCxxMethodName) {
ConstString demangled("foo::bar()");
EXPECT_TRUE(CxxMethodRMC.FromCxxMethodName(demangled));

EXPECT_TRUE(ItaniumRMC.IsCtorOrDtor() == CxxMethodRMC.IsCtorOrDtor());

ItaniumRMC.ParseFunctionDeclContextName();
CxxMethodRMC.ParseFunctionDeclContextName();
EXPECT_TRUE(ItaniumRMC.GetBufferRef() == CxxMethodRMC.GetBufferRef());

ItaniumRMC.ParseFunctionBaseName();
CxxMethodRMC.ParseFunctionBaseName();
EXPECT_TRUE(ItaniumRMC.GetBufferRef() == CxxMethodRMC.GetBufferRef());

ItaniumRMC.ParseFullName();
CxxMethodRMC.ParseFullName();
EXPECT_TRUE(ItaniumRMC.GetBufferRef() == CxxMethodRMC.GetBufferRef());
EXPECT_EQ(ItaniumRMC.IsCtorOrDtor(), CxxMethodRMC.IsCtorOrDtor());
EXPECT_EQ(ItaniumRMC.ParseFunctionDeclContextName(),
CxxMethodRMC.ParseFunctionDeclContextName());
EXPECT_EQ(ItaniumRMC.ParseFunctionBaseName(),
CxxMethodRMC.ParseFunctionBaseName());
EXPECT_EQ(ItaniumRMC.ParseFullName(), CxxMethodRMC.ParseFullName());

// Construct with a random name.
{
Expand All @@ -68,8 +55,7 @@ TEST(RichManglingContextTest, FromCxxMethodName) {
ConstString("void * operator new(unsigned __int64)")));

// We expect its context is empty.
CxxMethodRMC.ParseFunctionDeclContextName();
EXPECT_TRUE(CxxMethodRMC.GetBufferRef().empty());
EXPECT_TRUE(CxxMethodRMC.ParseFunctionDeclContextName().empty());
}
}

Expand All @@ -79,16 +65,13 @@ TEST(RichManglingContextTest, SwitchProvider) {
llvm::StringRef demangled = "foo::bar()";

EXPECT_TRUE(RMC.FromItaniumName(ConstString(mangled)));
RMC.ParseFullName();
EXPECT_EQ("foo::bar()", RMC.GetBufferRef());
EXPECT_EQ("foo::bar()", RMC.ParseFullName());

EXPECT_TRUE(RMC.FromCxxMethodName(ConstString(demangled)));
RMC.ParseFullName();
EXPECT_EQ("foo::bar()", RMC.GetBufferRef());
EXPECT_EQ("foo::bar()", RMC.ParseFullName());

EXPECT_TRUE(RMC.FromItaniumName(ConstString(mangled)));
RMC.ParseFullName();
EXPECT_EQ("foo::bar()", RMC.GetBufferRef());
EXPECT_EQ("foo::bar()", RMC.ParseFullName());
}

TEST(RichManglingContextTest, IPDRealloc) {
Expand Down Expand Up @@ -116,13 +99,11 @@ TEST(RichManglingContextTest, IPDRealloc) {

// Demangle the short one.
EXPECT_TRUE(RMC.FromItaniumName(ConstString(ShortMangled)));
RMC.ParseFullName();
const char *ShortDemangled = RMC.GetBufferRef().data();
const char *ShortDemangled = RMC.ParseFullName().data();

// Demangle the long one.
EXPECT_TRUE(RMC.FromItaniumName(ConstString(LongMangled)));
RMC.ParseFullName();
const char *LongDemangled = RMC.GetBufferRef().data();
const char *LongDemangled = RMC.ParseFullName().data();

// Make sure a new buffer was allocated or the default buffer was extended.
bool AllocatedNewBuffer = (ShortDemangled != LongDemangled);
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2929,8 +2929,15 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit(
VTracker = nullptr;

// No scopes? No variable locations.
if (!LS.getCurrentFunctionScope())
if (!LS.getCurrentFunctionScope()) {
// FIXME: this is a sticking plaster to prevent a memory leak, these
// pointers will be automagically freed by being unique pointers, shortly.
for (unsigned int I = 0; I < MaxNumBlocks; ++I) {
delete[] MInLocs[I];
delete[] MOutLocs[I];
}
return false;
}

// Build map from block number to the last scope that uses the block.
SmallVector<unsigned, 16> EjectionMap;
Expand Down
32 changes: 0 additions & 32 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43108,38 +43108,6 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
}

// If this extract is from a loaded vector value and will be used as an
// integer, that requires a potentially expensive XMM -> GPR transfer.
// Additionally, if we can convert to a scalar integer load, that will likely
// be folded into a subsequent integer op.
// Note: Unlike the related fold for this in DAGCombiner, this is not limited
// to a single-use of the loaded vector. For the reasons above, we
// expect this to be profitable even if it creates an extra load.
bool LikelyUsedAsVector = any_of(N->uses(), [](SDNode *Use) {
return Use->getOpcode() == ISD::STORE ||
Use->getOpcode() == ISD::INSERT_VECTOR_ELT ||
Use->getOpcode() == ISD::SCALAR_TO_VECTOR;
});
auto *LoadVec = dyn_cast<LoadSDNode>(InputVector);
if (LoadVec && CIdx && ISD::isNormalLoad(LoadVec) && VT.isInteger() &&
SrcVT.getVectorElementType() == VT && DCI.isAfterLegalizeDAG() &&
!LikelyUsedAsVector) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue NewPtr =
TLI.getVectorElementPointer(DAG, LoadVec->getBasePtr(), SrcVT, EltIdx);
unsigned PtrOff = VT.getSizeInBits() * CIdx->getZExtValue() / 8;
MachinePointerInfo MPI = LoadVec->getPointerInfo().getWithOffset(PtrOff);
Align Alignment = commonAlignment(LoadVec->getAlign(), PtrOff);
SDValue Load =
DAG.getLoad(VT, dl, LoadVec->getChain(), NewPtr, MPI, Alignment,
LoadVec->getMemOperand()->getFlags(), LoadVec->getAAInfo());
SDValue Chain = Load.getValue(1);
SDValue From[] = {SDValue(N, 0), SDValue(LoadVec, 1)};
SDValue To[] = {Load, Chain};
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
return SDValue(N, 0);
}

return SDValue();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
define <4 x i32> @test(<4 x i32>* %p) {
; CHECK-LABEL: test:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $3, 8(%rdi)
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: movaps (%rdi), %xmm0
; CHECK-NEXT: extractps $2, %xmm0, %eax
; CHECK-NEXT: cmpl $3, %eax
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: retq
%v = load <4 x i32>, <4 x i32>* %p
%e = extractelement <4 x i32> %v, i32 2
Expand Down
18 changes: 12 additions & 6 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,18 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
; NODQ-LABEL: slto4f32_mem:
; NODQ: # %bb.0:
; NODQ-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
; NODQ-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; NODQ-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; NODQ-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
; NODQ-NEXT: vmovdqu (%rdi), %xmm0
; NODQ-NEXT: vmovdqu 16(%rdi), %xmm1
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; NODQ-NEXT: retq
;
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/bitcast-vector-bool.ll
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,10 @@ define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovb2m %zmm0, %k0
; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: addl -{{[0-9]+}}(%rsp), %eax
; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0
; AVX512-NEXT: vmovd %xmm0, %ecx
; AVX512-NEXT: vpextrd $1, %xmm0, %eax
; AVX512-NEXT: addl %ecx, %eax
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = icmp slt <64 x i8> %a0, zeroinitializer
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/CodeGen/X86/extractelement-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -301,35 +301,33 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
ret void
}

; A scalar load is favored over a XMM->GPR register transfer in this example.

define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
; X32-SSE2-LABEL: multi_use_load_scalarization:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE2-NEXT: movl (%ecx), %eax
; X32-SSE2-NEXT: movdqu (%ecx), %xmm0
; X32-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; X32-SSE2-NEXT: movd %xmm0, %eax
; X32-SSE2-NEXT: psubd %xmm1, %xmm0
; X32-SSE2-NEXT: movdqa %xmm0, (%ecx)
; X32-SSE2-NEXT: retl
;
; X64-SSSE3-LABEL: multi_use_load_scalarization:
; X64-SSSE3: # %bb.0:
; X64-SSSE3-NEXT: movl (%rdi), %eax
; X64-SSSE3-NEXT: movdqu (%rdi), %xmm0
; X64-SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
; X64-SSSE3-NEXT: movd %xmm0, %eax
; X64-SSSE3-NEXT: psubd %xmm1, %xmm0
; X64-SSSE3-NEXT: movdqa %xmm0, (%rdi)
; X64-SSSE3-NEXT: retq
;
; X64-AVX-LABEL: multi_use_load_scalarization:
; X64-AVX: # %bb.0:
; X64-AVX-NEXT: movl (%rdi), %eax
; X64-AVX-NEXT: vmovdqu (%rdi), %xmm0
; X64-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi)
; X64-AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm1
; X64-AVX-NEXT: vmovdqa %xmm1, (%rdi)
; X64-AVX-NEXT: vmovd %xmm0, %eax
; X64-AVX-NEXT: retq
%v = load <4 x i32>, <4 x i32>* %p, align 1
%v1 = add <4 x i32> %v, <i32 1, i32 1, i32 1, i32 1>
Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/X86/oddsubvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -161,46 +161,46 @@ define <16 x i32> @PR42819(<8 x i32>* %a0) {
define void @PR42833() {
; SSE2-LABEL: PR42833:
; SSE2: # %bb.0:
; SSE2-NEXT: movl b(%rip), %eax
; SSE2-NEXT: movdqa c+144(%rip), %xmm0
; SSE2-NEXT: movdqa c+128(%rip), %xmm1
; SSE2-NEXT: addl c+128(%rip), %eax
; SSE2-NEXT: movdqa c+144(%rip), %xmm1
; SSE2-NEXT: movdqa c+128(%rip), %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: addl b(%rip), %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: paddd %xmm1, %xmm3
; SSE2-NEXT: paddd %xmm0, %xmm3
; SSE2-NEXT: movdqa d+144(%rip), %xmm4
; SSE2-NEXT: psubd %xmm0, %xmm4
; SSE2-NEXT: paddd %xmm0, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: paddd %xmm1, %xmm5
; SSE2-NEXT: psubd %xmm1, %xmm4
; SSE2-NEXT: paddd %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: paddd %xmm0, %xmm5
; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3]
; SSE2-NEXT: movdqa %xmm0, c+144(%rip)
; SSE2-NEXT: movdqa %xmm1, c+144(%rip)
; SSE2-NEXT: movaps %xmm5, c+128(%rip)
; SSE2-NEXT: movdqa c+160(%rip), %xmm0
; SSE2-NEXT: movdqa c+160(%rip), %xmm1
; SSE2-NEXT: movdqa c+176(%rip), %xmm3
; SSE2-NEXT: movdqa d+160(%rip), %xmm5
; SSE2-NEXT: movdqa d+176(%rip), %xmm6
; SSE2-NEXT: movdqa d+128(%rip), %xmm7
; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3]
; SSE2-NEXT: psubd %xmm1, %xmm7
; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
; SSE2-NEXT: psubd %xmm0, %xmm7
; SSE2-NEXT: psubd %xmm3, %xmm6
; SSE2-NEXT: psubd %xmm0, %xmm5
; SSE2-NEXT: psubd %xmm1, %xmm5
; SSE2-NEXT: movdqa %xmm5, d+160(%rip)
; SSE2-NEXT: movdqa %xmm6, d+176(%rip)
; SSE2-NEXT: movdqa %xmm4, d+144(%rip)
; SSE2-NEXT: movdqa %xmm7, d+128(%rip)
; SSE2-NEXT: paddd %xmm3, %xmm3
; SSE2-NEXT: paddd %xmm0, %xmm0
; SSE2-NEXT: movdqa %xmm0, c+160(%rip)
; SSE2-NEXT: paddd %xmm1, %xmm1
; SSE2-NEXT: movdqa %xmm1, c+160(%rip)
; SSE2-NEXT: movdqa %xmm3, c+176(%rip)
; SSE2-NEXT: retq
;
; SSE42-LABEL: PR42833:
; SSE42: # %bb.0:
; SSE42-NEXT: movl b(%rip), %eax
; SSE42-NEXT: movdqa c+144(%rip), %xmm0
; SSE42-NEXT: movdqa c+128(%rip), %xmm1
; SSE42-NEXT: addl c+128(%rip), %eax
; SSE42-NEXT: movd %xmm1, %eax
; SSE42-NEXT: addl b(%rip), %eax
; SSE42-NEXT: movd %eax, %xmm2
; SSE42-NEXT: paddd %xmm1, %xmm2
; SSE42-NEXT: movdqa d+144(%rip), %xmm3
Expand Down Expand Up @@ -232,20 +232,20 @@ define void @PR42833() {
;
; AVX1-LABEL: PR42833:
; AVX1: # %bb.0:
; AVX1-NEXT: movl b(%rip), %eax
; AVX1-NEXT: addl c+128(%rip), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vmovdqa c+128(%rip), %xmm1
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vmovdqa c+128(%rip), %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: addl b(%rip), %eax
; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2
; AVX1-NEXT: vmovdqa c+144(%rip), %xmm3
; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vmovdqa d+144(%rip), %xmm2
; AVX1-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2
; AVX1-NEXT: vmovups %ymm0, c+128(%rip)
; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0
; AVX1-NEXT: vmovups %ymm1, c+128(%rip)
; AVX1-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
; AVX1-NEXT: vmovdqa d+128(%rip), %xmm1
; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmovdqa d+176(%rip), %xmm1
Expand Down Expand Up @@ -314,20 +314,20 @@ define void @PR42833() {
;
; XOP-LABEL: PR42833:
; XOP: # %bb.0:
; XOP-NEXT: movl b(%rip), %eax
; XOP-NEXT: addl c+128(%rip), %eax
; XOP-NEXT: vmovd %eax, %xmm0
; XOP-NEXT: vmovdqa c+128(%rip), %xmm1
; XOP-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm2
; XOP-NEXT: vmovdqa c+128(%rip), %xmm0
; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: addl b(%rip), %eax
; XOP-NEXT: vmovd %eax, %xmm1
; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; XOP-NEXT: vpaddd %xmm0, %xmm0, %xmm2
; XOP-NEXT: vmovdqa c+144(%rip), %xmm3
; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3
; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
; XOP-NEXT: vmovdqa d+144(%rip), %xmm2
; XOP-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2
; XOP-NEXT: vmovups %ymm0, c+128(%rip)
; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0
; XOP-NEXT: vmovups %ymm1, c+128(%rip)
; XOP-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0
; XOP-NEXT: vmovdqa d+128(%rip), %xmm1
; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0
; XOP-NEXT: vmovdqa d+176(%rip), %xmm1
Expand Down
17 changes: 11 additions & 6 deletions llvm/test/CodeGen/X86/pr45378.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,23 +76,28 @@ define i1 @parseHeaders2_scalar_and(i64 * %ptr) nounwind {
; SSE2-LABEL: parseHeaders2_scalar_and:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqu (%rdi), %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, (%rdi)
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: testq %rcx, %rax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
; SSE41-LABEL: parseHeaders2_scalar_and:
; SSE41: # %bb.0:
; SSE41-NEXT: movq (%rdi), %rax
; SSE41-NEXT: testq %rax, 8(%rdi)
; SSE41-NEXT: movdqu (%rdi), %xmm0
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: pextrq $1, %xmm0, %rcx
; SSE41-NEXT: testq %rcx, %rax
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
; AVX-LABEL: parseHeaders2_scalar_and:
; AVX: # %bb.0:
; AVX-NEXT: movq (%rdi), %rax
; AVX-NEXT: testq %rax, 8(%rdi)
; AVX-NEXT: vmovdqu (%rdi), %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
; AVX-NEXT: testq %rcx, %rax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%vptr = bitcast i64 * %ptr to <2 x i64> *
Expand Down
31 changes: 17 additions & 14 deletions llvm/test/CodeGen/X86/scalar_widen_div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -403,29 +403,32 @@ define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: jle .LBB12_3
; CHECK-NEXT: # %bb.1: # %bb.nph
; CHECK-NEXT: movl %edx, %r11d
; CHECK-NEXT: movl %edx, %r9d
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB12_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl 8(%rdi,%rcx), %r8d
; CHECK-NEXT: movl (%rdi,%rcx), %r9d
; CHECK-NEXT: movl 4(%rdi,%rcx), %eax
; CHECK-NEXT: movdqa (%rdi,%rcx), %xmm0
; CHECK-NEXT: movdqa (%rsi,%rcx), %xmm1
; CHECK-NEXT: pextrd $1, %xmm0, %eax
; CHECK-NEXT: pextrd $1, %xmm1, %r8d
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl 4(%rsi,%rcx)
; CHECK-NEXT: movl %eax, %r10d
; CHECK-NEXT: movl %r9d, %eax
; CHECK-NEXT: idivl %r8d
; CHECK-NEXT: movl %eax, %r8d
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: movd %xmm1, %r10d
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl (%rsi,%rcx)
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: pinsrd $1, %r10d, %xmm0
; CHECK-NEXT: movl %r8d, %eax
; CHECK-NEXT: idivl %r10d
; CHECK-NEXT: movd %eax, %xmm2
; CHECK-NEXT: pinsrd $1, %r8d, %xmm2
; CHECK-NEXT: pextrd $2, %xmm0, %eax
; CHECK-NEXT: pextrd $2, %xmm1, %r8d
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl 8(%rsi,%rcx)
; CHECK-NEXT: idivl %r8d
; CHECK-NEXT: movl %eax, 8(%rdi,%rcx)
; CHECK-NEXT: movq %xmm0, (%rdi,%rcx)
; CHECK-NEXT: movq %xmm2, (%rdi,%rcx)
; CHECK-NEXT: addq $16, %rcx
; CHECK-NEXT: decl %r11d
; CHECK-NEXT: decl %r9d
; CHECK-NEXT: jne .LBB12_2
; CHECK-NEXT: .LBB12_3: # %for.end
; CHECK-NEXT: retq
Expand Down
214 changes: 130 additions & 84 deletions llvm/test/CodeGen/X86/shrink_vmul.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vec_cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ define <3 x i16> @h(<3 x i32> %a) nounwind {
; CHECK-WIN-LABEL: h:
; CHECK-WIN: # %bb.0:
; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0
; CHECK-WIN-NEXT: movl (%rcx), %eax
; CHECK-WIN-NEXT: movd %xmm0, %eax
; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx
; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx
; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax
Expand Down
591 changes: 375 additions & 216 deletions llvm/test/CodeGen/X86/vec_int_to_fp.ll

Large diffs are not rendered by default.